21,886
社区成员
发帖
与我相关
我的任务
分享
// 加入头文件
require 'pscws/pscws4.class.php';
header("Content-Type:text/html;charset=utf-8");
// 建立分词类对像, 参数为字符集, 默认为 gbk, 可在后面调用 set_charset 改变
$pscws = new PSCWS4('utf-8');
$pscws->set_ignore('yes');
$pscws->set_dict('E:/scws/etc/dict.utf8.xdb');
$pscws->set_rule('E:/scws/etc/rules_cht.utf8.ini');
// 分词调用 send_text() 将待分词的字符串传入, 紧接着循环调用 get_result() 方法取回一系列分好的词
// 直到 get_result() 返回 false 为止
// 返回的词是一个关联数组, 包含: word 词本身, idf 逆词率(重), off 在text中的偏移, len 长度, attr 词性
//
$text = "中国航天官员应邀到美国与太空总署官员开会";
$pscws->send_text($text);
while ($some = $pscws->get_result())
{
foreach ($some as $word)
{
echo $word['word'].'=>'.$word['attr'].'<br>';
}
flush();
}
$pscws->close();
?>
if (!isset($_SERVER['argv'][1]) || !is_file($_SERVER['argv'][1]))
{
echo "Usage: {$_SERVER['argv'][0]} <xdb file> [output file]\n";
exit(0);
}
$output = isset($_SERVER['argv'][2]) ? $_SERVER['argv'][2] : 'php://stdout';
'argv'
Array of arguments passed to the script. When the script is run on the command line, this gives C-style access to the command line parameters. When called via the GET method, this will contain the query string.
// 构造函数
function PSCWS4($charset = 'gbk')
{
$this->_xd = false;
$this->_rs = $this->_rd = array();
$this->set_charset($charset);
}
// FOR PHP5
function __construct() { $this->PSCWS4(); }