21,886
社区成员
发帖
与我相关
我的任务
分享
<?php
error_reporting(E_ALL & ~E_NOTICE & ~E_DEPRECATED & ~E_STRICT);
class weight {
private $_aItems = array();
private $_aTable = array();
private $_aDict = array();
private $_aMatchs = array();
private $_aShow = array();
function __construct() {
}
public function newItems($aItems){
//添加新的检索内容
if (!is_array($aItems))
$aItems = (array)$aItems;
$this->_aItems = $aItems;
$this->_aMatchs = array();
$this->_aShow = array();
}
public function newTable($aTable){
if (!is_array($aTable))
$aTable = (array)$aTable;
$this->_aTable = $aTable;
$this->generateDict();
}
private function generateDict() {
//将字典处理成数组形式
$convert = function($value) {
$value = str_replace('|', ',', $value);
$value = explode(',', $value);
return $value;
};
$this->_aDict = array_map($convert, $this->_aTable);
}
public function getMatchs() {
//返回对照表
return $this->_aMatchs;
}
public function getShow($sRule = 'debug') {
/*返回格式化的结果集
* $sFormat: 指定输出格式
*/
$tb = microtime(TRUE)*1000;
if (empty($this->_aItems)||empty($this->_aTable))
//字典源文件不存在
return false;
if (empty($this->_aShow)) {
/*匹配表还没有生成,自动调用相应的命令生成*/
$this->loopTable();
}
//print_r($this->_aMatchs);
$makeDumpStr = function($value, $key) use (&$dumpStr) {
//生成导出文件的文本
if (count($value) >1) {
foreach ($value as $valueOne) {
$valueStr .= $valueOne. ',';
}
$dumpStr .= $this->_aItems[$key] . "\t匹配多个记录号\t". $valueStr ."\r\n";
} else {
$dumpStr .= $this->_aItems[$key] . "\t匹配惟一记录号\t". $value[0] ."\r\n";
}
};
switch($sRule) {
case 'debug':
$ta = microtime(TRUE)*1000;
$elasped = $ta - $tb;
echo count($this->_aItems). '个条目<br/>';
echo count($this->_aTable). '个参照表条目<br/>';
echo count($this->_aMatchs). '个对应关系<br/>';
echo "执行时间: {$elasped}s";
var_dump($this->_aShow);
break;
case 'json':
return json_encode($this->_aShow);
break;
case 'txt':
$timeExport = date("Y/M/D h:i:s");
$dumpStr = '';
$rFile = fopen('dump.txt', 'w');
array_walk($this->_aShow, $makeDumpStr);
$sContent = <<<EOT
========DUMP-FILE-{$timeExport}=========================
{$dumpStr}
EOT;
fwrite($rFile, $sContent);
fclose($rFile);
break;
case 'calctime':
$ta = microtime(TRUE)*1000;
$elasped = $ta - $tb;
echo count($this->_aItems). '个条目<br/>';
echo count($this->_aTable). '个参照表条目<br/>';
echo count($this->_aMatchs). '个对应关系<br/>';
echo "执行时间: {$elasped}s";
break;
default:
return $this->_aShow;
break;
}
}
private function loopTable() {
//遍历
foreach ($this->_aItems as $iItemKey=> $sItemLine) {
$multiMatch = $this->matchElement($iItemKey);
if (count($multiMatch)> 1) {
//多于一条记录匹配值相同
$this->_aShow[$iItemKey] = $multiMatch;
} else {
//匹配值最大值唯一
$this->_aShow[$iItemKey] = array($this->_aMatchs[$iKey]['index']['key']);
}
}
}
private function matchElement($iKey) {
$iMax = 0;
$multiMatch = array();
foreach ($this->_aDict as $iDictKey => $aDictLine) {
foreach($aDictLine as $sDictElement) {
$str = $this->_aItems[$iKey];
if(strstr($str, $sDictElement)){
//匹配到一个元素,计数器+1
++$this->_aMatchs[$iKey]['keyring'][$iDictKey];
}
}
if (!$this->_aMatchs[$iKey]['keyring'][$iDictKey]) {
//没有匹配到内容
$this->_aMatchs[$iKey]['keyring'][$iDictKey] = 0;
}
if ($iMax< $this->_aMatchs[$iKey]['keyring'][$iDictKey]) {
//记录的最大值比目前的计数器小,更新之,并且清空充值值记录器的内容
$iMax = $this->_aMatchs[$iKey]['keyring'][$iDictKey];
$multiMatch = array();
$multiMatch[] = $iDictKey;
} else {
$multiMatch[] = $iDictKey;
}
$this->_aMatchs[$iKey]['index'] = array(
'key' => $iDictKey,
'count' => $iMax
);
}
return $multiMatch;
}
}
$aItems = array(
'chinaisbig',
'whichisnot',
......
几万条
......
'totalyrightforme',
);
$aTable = array(
'china,is|small',
'china,big|me',
'china,is|big,which|not,me',
......
上万条
......
'china,is|small',
);
$weight = new weight();
$weight->newItems($aItems);
$weight->newTable($aTable);
$weight->getShow('debug');
?>
Array
(
[0] => Array
(
[0] => 2
)
[1] => Array
(
[0] => 2
)
[2] => Array
(
[0] => 3
)
)
没有什么问题,只是把简单的事情弄复杂了<?php
error_reporting(E_ALL & ~E_NOTICE & ~E_DEPRECATED & ~E_STRICT);
header('Cache-Control: no-cache, must-revalidate');
header("Expires: Sat, 26 Jul 1997 05:00:00 GMT");
$aItems = array(
'prefixchinaisbig',
'whichisnotpostfix',
'totalyconfusionrightforme',
);
$aTable = array(
'china,is|small',
'china,big|me',
'china,is|big,which|not,me',
'totaly|right,for,me',
);
$oWeight = new weight;
$oWeight->newTable($aTable);
$oWeight->newItems($aItems);
$oWeight->bDebug && printf("%s\n=========================================\n", 'Debug Mode');
$tb = microtime(true);
$aRes = $oWeight->getShow();
$ta = microtime(true);
$elasped = ($ta - $tb)*1000;
$oWeight->bDebug && printf("%s\n=========================================\n", 'Tire Dict');
$oWeight->bDebug && print_r($oWeight->getDict());
echo count($oWeight->getItems()). ' 个条目, ';
echo "执行时间: {$elasped} ms<br/>\n";
var_dump($aRes);
class weight{
public $bDebug = true;
public $aShow = array();
protected $aDict = array( array() );
protected $aItems = array();
public function newItems($mItems) {
//导入新的要查询的内容
$this->aItems = (is_array($mItems))? $mItems: array($mItems);
$this->init();
}
public function newTable(array $aTable) {
//导入新的对照表,并生成tire树形字典
foreach($aTable as $iTableKey=>$sTableLine) {
$aTableLine = explode(',', str_replace('|', ',', $sTableLine));
$setter = function($v, $k, $paraMeter) {
$k1 = $paraMeter[0]; $oWeight = $paraMeter[1];
$oWeight->genDict($v, $k1);
};
array_walk($aTableLine, $setter, array($iTableKey, $this));
}
$this->init();
}
private function init() {
//清空记录的匹配表和输出结果
unset($this->aShow);
}
public function getShow() {
//获取最终的显示结果
if (empty($this->aShow))
return $this->genShow();
return $this->aShow;
}
public function getItems() {
}
public function getDict() {
return $this->aDict;
}
private function genShow() {
$aMatchs = array();
$aShow = array();
$debug = $this->bDebug;
$getter = function($v, $k, $oWeight) use(&$aMatchs, &$aShow, $debug) {
$debug && print 'item: '. $v. '(length:'.strlen($v).")\n".'-----------------------'."\n";
$aMatchs[$k] = array_count_values($oWeight->matchElement($v));
$debug && print 'matchRules Generated'."\n";
$debug && print_r($aMatchs[$k]);
$aShow[$k] = array_keys($aMatchs[$k], max($aMatchs[$k]));
};
array_walk($this->aItems, $getter, $this);
$this->aShow = $aShow;
return $this->aShow;
}
private function genDict($mWord, $sAction='') {
//将字典处理为Trie树形
if(is_array($mWord)) {
foreach ($mWord as $k=>$v) $this->genDict($v, $k);
return;
}
$iP = count($this->aDict);
$iCur = 0;
foreach (str_split($mWord) as $sChar) {
if (isset($this->aDict[$iCur][$sChar])) {
$iCur = $this->aDict[$iCur][$sChar];
continue;
}
$this->aDict[$iP] = array();
$this->aDict[$iCur][$sChar] = $iP;
$iCur = $iP;
$iP++;
}
$this->aDict[$iCur]['acc'][] = $sAction;
}
function matchElement($sLine) {
//同Tire字典比对
$iCur = 0;
$iOffset = 0;
$iBackPos =0;
$iLen = strlen($sLine);
$iWordLength = 0;//记录单词长度
$aRefer = array();
$debug = $this->bDebug;
while ($iOffset < $iLen) {
$sChar = $sLine{$iOffset};
$debug && print 'trying char:'.$sChar ."\n";
if ($debug) {
print "isset?\$this->aDict[$iCur][$sChar],";
isset($this->aDict[$iCur][$sChar])? print 'yes'. "\n": print 'no'. "\n";
}
$debug && print "wordLength: $iWordLength\n";
if (isset($this->aDict[$iCur][$sChar])) {
$iCur = $this->aDict[$iCur][$sChar];
$iWordLength++;
$debug && print "iCur: $iCur\n";
if (isset($this->aDict[$iCur]['acc'])) {
$debug && print 'word('.substr($sLine, $iBackPos, $iWordLength) . ') got new acc'."\n";
$debug && print_r($this->aDict[$iCur]['acc']);
$aRefer = array_merge($aRefer, $this->aDict[$iCur]['acc']);
$iBackPos = $iOffset + 1;
$iCur = 0;
$iWordLength = 0;
$iOffset++;
continue;
}
} else {
$iCur = 0;
$iBackPos = $iOffset + 1;
$iWordLength = 0;
}
$iOffset++;
}
$debug && print_r($aRefer);
return $aRefer;
}
}
?>
$aItems = array(
'prefixchinaisbig',
'whichisnotpostfix',
'totalyconfusionrightforme',
);
$aTable = array(
'china,is|small',
'china,big|me',
'china,is|big,which|not,me',
'totaly|right,for,me',
);
$p = new trie;
foreach($aTable as $k=>$r) {
foreach(explode(',', strtr($r, '|', ',')) as $v)
$p->set($v, $k);
}
foreach($aItems as $s) {
print_r($p->match($s));
}
class trie {
public $dict = array();
function set($word, $acc) {
$p =& $this->dict;
foreach(str_split($word) as $c) {
if(! isset($p[$c])) $p[$c] = array();
$p =& $p[$c];
}
$p['acc'][] = $acc;
}
function match($s) {
$s = "$s ";
$res = array();
$p =& $this->dict;
$k = -1;
for($i=0; $i<strlen($s); $i++) {
if(isset($p[$s{$i}])) {
$p =& $p[$s{$i}];
if($k < 0) $k = $i;
}else {
if(isset($p['acc'])) {
$res = array_merge($res, $p['acc']);
}else if($k>=0) $i = $k + 1;
$p =& $this->dict;
$k = -1;
}
}
return $res;
}
}
这个字典是 9496b,你用的那个字典是12760b