33,007
社区成员
发帖
与我相关
我的任务
分享
#include <time.h>
#include <iostream>
#include <fstream>
#include <set>
#include <string>
#include <algorithm>
using namespace std;
#define ASSERT(T) if (!(T)) {exit(-1);}
set<string> matchstrs;
size_t maxmatchstrlen = 0;
// 初始化匹配字符串
void init_matchstrs()
{
ifstream f("strings.txt");
ASSERT(f);
string s;
while(getline(f, s))
{
transform(s.begin(), s.end(), s.begin(), (int(*)(int))tolower);
matchstrs.insert(s);
size_t len = s.length();
if (len > maxmatchstrlen)
{
maxmatchstrlen = len;
}
}
f.close();
}
// 判断字符串是否匹配
bool match(string str)
{
// 转换为小写
transform(str.begin(), str.end(), str.begin(), (int(*)(int))tolower);
size_t len = str.length();
for (size_t offset = 0; offset < len; offset ++)
{
for (size_t count = 1; count <= min(maxmatchstrlen, len - offset); count++)
{
string sub_s = str.substr(offset, count);
if (matchstrs.find(sub_s) != matchstrs.end())
{
return true;
}
}
}
return false;
}
int main()
{
// 初始化匹配字符串
init_matchstrs();
ifstream fin("in.txt");
ofstream fout("out.txt");
ASSERT(fin);
ASSERT(fout);
string line;
while(getline(fin, line))
{
if (!match(line))
{
fout << line << endl;
}
}
fin.close();
fout.close();
cout << "耗时:" << clock() * 1.0 / CLOCKS_PER_SEC << "秒" << endl;
return 0;
}
--[[
使用Aho-Corasick算法多模式匹配,时间控制在1秒内
]]
-- 初始化根
root = {};
root.value = 0;
root.failure = root;
-- 创建失败指针
function makefailure(node)
local p = node.parent.failure;
while true do
if p[node.value] then
node.failure = p[node.value];
break;
elseif p == root then
node.failure = p;
else
p = p.parent;
end;
end;
end;
-- 插入一个字符串
function insert(pattern)
-- 循环插入字节
local pos;
local p = root;
for pos = 1, pattern:len() do
local byte = pattern:byte(pos);
if not p[byte] then
-- 创建节点
p[byte] = {};
p[byte].value = byte;
-- 父节点
p[byte].parent = p;
-- 失败指针
makefailure(p[byte]);
end;
p = p[byte];
end;
p.matched = true;
end;
-- 初始化树
function init(patternfile)
-- 按行读模式字符串
local pattern;
for pattern in io.lines(patternfile) do
-- 插入字符串
insert(pattern:lower());
end;
end;
-- 判断字符串是否匹配
function match(str)
local p = root;
local pos;
for pos = 1, str:len() do
local byte = str:byte(pos);
if p[byte] then
p = p[byte];
else
p = p.failure;
while not (p[byte] or p == root) do
p = p.parent;
end;
end;
if p.matched then
return true;
end;
end;
return false;
end;
-- 函数:转换文件
function convert(inname, outname)
io.write("正在转换:\"", inname, "\" -> \"", outname, "\"\n");
-- 打开输出文件
local fileout = assert(io.open(outname, "w"));
-- 按行读取输入文件
local line;
for line in io.lines(inname) do
-- 如果不包含匹配字符串,则写到输出文件中
if not match(line:lower()) then
fileout:write(line, "\n");
end;
end;
-- 关闭输入和输出文件
fileout:close();
end;
starttime = os.time();
print("初始化...");
init("matchstr.txt");
convert("in\\DOSNET.INF", "out\\DOSNET.INF");
convert("in\\DRVINDEX.INF", "out\\DRVINDEX.INF");
convert("in\\TXTSETUP.SIF", "out\\TXTSETUP.SIF");
endtime = os.time();
io.write("用时:", os.difftime(endtime, starttime), "秒\n");
#include<iostream>
#include<string>
#include<deque>
#include<time.h>
using namespace std;
int step = 250 ;
struct trieTree
{
trieTree * next[400] ;
int n ;
trieTree * fail ;
trieTree(){
memset( next , 0 , sizeof(next) ) ;
n = 0 ;
fail = 0 ;
}
} ;
trieTree * u , v ;
deque<trieTree*> dq ;
bool isLetter(char c )
{
if( c >='a' && c<='z') return true;
//if( c >='A' && c<='Z') return true;
return false;
}
int turnLetter(char c)
{
if(!isLetter(c) )
return c + step ;
else return c + step - 32 ;
}
void Fail ( trieTree * root ){
root->fail = root ;
dq.clear() ;
dq.push_back( root ) ;
int i , j ;
while(!dq.empty() ){
u = dq.front() ; dq.pop_front() ;
//for( i = 0 ; i < 26 ; i ++ ){
for( i = 0 ;i < 400 ; i ++) {
if( !u ->next[i] ) continue ;
if( u == root ){
u->next[i]->fail = root ;
}
else{
trieTree * tmp = u ;
while (!tmp->fail->next[i] ){
tmp = tmp->fail ;
if( tmp == root ) break;
}
if( tmp!=root )
u->next[i]->fail = tmp->fail->next[i] ;
else
u->next[i]->fail = root ;
}
dq.push_back ( u->next[i] ) ;
}
}
}
int run ( trieTree * root , char * text ){
trieTree * p = root ;
int ret = 0 ;
int i , j ;
for( i = 0 ; text[i] ;i ++){
// j = text[i] -'a' ;
///j = text[i] + step ;
j = turnLetter(text[i]);
while ( !p->next[j] && p!=root) p = p->fail ;
p = p ->next[j] ;
if ( !p ) p = root ;
trieTree * tmp =p ;
if(tmp!=root && tmp->n !=0) return true ;
// while( tmp!=root && tmp->n!=-1){
// ret += tmp->n ;
// tmp->n = -1 ;
// tmp = tmp->fail ;
// }
}
// return ret ;
return false;
}
void insert ( char key[] , trieTree *root ){
/// printf("%s\n",key);
int i = 0 , j = 0;
for( i = 0; key[i] ; i ++ ){
//j = key[i]+step ;
j = turnLetter(key[i]);
if( !root->next[j])
root->next[j] = new trieTree() ;
root = root->next[j];
}
root->n ++ ;
}
char key[550] ;
char text[10000] ;
char textCopy[10000];
int main()
{
trieTree * root = new trieTree() ;
freopen("strings.txt","r",stdin);
int i ;
while( gets(key)>0){
if( strlen(key) < 1 ) continue;
insert ( key , root ) ;
}
Fail ( root ) ;
freopen("in.txt","r",stdin);
freopen("myOut.txt","w",stdout);
while(gets(text)>0){
//puts(text);
if(!run(root ,text) )
//if(strlen(text)<1||!run(root , text));
puts(text) ;
}
printf("耗时: %lf \n" ,clock() * 1.0 / CLOCKS_PER_SEC );
return 0 ;
}
#include<iostream>
#include<string>
#include<deque>
#include<time.h>
using namespace std;
struct trieTree
{
trieTree * next[128] ;
int n ;
trieTree * fail ;
trieTree(){
memset( next , 0 , sizeof(next) ) ;
n = 0 ;
fail = 0 ;
}
} ;
trieTree * u , v ;
deque<trieTree*> dq ;
void Fail ( trieTree * root ){
root->fail = root ;
dq.clear() ;
dq.push_back( root ) ;
int i , j ;
while(!dq.empty() ){
u = dq.front() ; dq.pop_front() ;
//for( i = 0 ; i < 26 ; i ++ ){
for( i = 0 ;i < 128 ; i ++) {
if( !u ->next[i] ) continue ;
if( u == root ){
u->next[i]->fail = root ;
}
else{
trieTree * tmp = u ;
while (!tmp->fail->next[i] ){
tmp = tmp->fail ;
if( tmp == root ) break;
}
if( tmp!=root )
u->next[i]->fail = tmp->fail->next[i] ;
else
u->next[i]->fail = root ;
}
dq.push_back ( u->next[i] ) ;
}
}
}
int run ( trieTree * root , char * text ){
trieTree * p = root ;
int ret = 0 ;
int i , j ;
for( i = 0 ; text[i] ;i ++){
// j = text[i] -'a' ;
j = text[i] ;
while ( !p->next[j] && p!=root) p = p->fail ;
p = p ->next[j] ;
if ( !p ) p = root ;
trieTree * tmp =p ;
if(tmp!=root && tmp->n !=0) return true ;
// while( tmp!=root && tmp->n!=-1){
// ret += tmp->n ;
// tmp->n = -1 ;
// tmp = tmp->fail ;
// }
}
// return ret ;
return false;
}
void insert ( char key[] , trieTree *root ){
/// printf("%s\n",key);
int i = 0 , j = 0;
for( i = 0; key[i] ; i ++ ){
// j = key[i]-'a' ;
j = key[i] ;
if( !root->next[j])
root->next[j] = new trieTree() ;
root = root->next[j];
}
root->n ++ ;
}
char key[550] ;
char text[10000] ;
int main()
{
trieTree * root = new trieTree() ;
freopen("strings.txt","r",stdin);
int i ;
while( gets(key)>0){
if( strlen(key) < 1 ) continue;
insert ( key , root ) ;
}
Fail ( root ) ;
freopen("in.txt","r",stdin);
freopen("myOut.txt","w",stdout);
while(gets(text)>0){
//puts(text);
if(!run(root ,text) )
//if(strlen(text)<1||!run(root , text));
puts(text) ;
}
printf("耗时: %lf \n" ,clock() * 1.0 / CLOCKS_PER_SEC );
return 0 ;
}
#include <time.h>
...
int main()
{
...
...
// 在程序最后这样输出时间
cout << "耗时:" << clock() * 1.0 / CLOCKS_PER_SEC << "秒" << endl;
return 0;
}