64,637
社区成员
发帖
与我相关
我的任务
分享
#### Makefile
TMPDIR := tmp
TARGET := scanner.exe
DFAMST := $(TMPDIR)/scanner.inl
RULEGEN := $(TMPDIR)/gen.exe
INPUTLEX := $(TMPDIR)/input.lex
GENDFATBL := dfa_table_flex.sh
all : $(TMPDIR) $(TARGET)
clean: ; rm -rf $(TMPDIR) $(TARGET)
$(TMPDIR): ; mkdir -p $@
$(TARGET): sanner.c $(DFAMST)
gcc -o $@ $<
$(DFAMST): $(INPUTLEX)
sh $(GENDFATBL) flex $< $(TMPDIR) $@ yy
$(INPUTLEX): $(RULEGEN)
$< > $@
$(RULEGEN): gen.c
gcc -o $@ $<
#### dfa_table_flex.sh
#
# $APP $FLEXPATH $INPUT $TEMPDIR $OUTPUT $BASENAME
#
FLEXPATH=$1
INPUTFILE=$2
TEMPDIR=$3
OUTPUTFILE=$4
BASENAME=$5
OUTSRC=$TEMPDIR/$BASENAME.cpp
OUTBIN=$TEMPDIR/$BASENAME.exe
GENSCONLIST_SRC=$TEMPDIR/$BASENAME-gen-scon.c
GENSCONLIST_BIN=$TEMPDIR/$BASENAME-gen-scon.exe
GENSACTLIST_SRC=$TEMPDIR/$BASENAME-gen-actlist.c
GENSACTLIST_BIN=$TEMPDIR/$BASENAME-gen-actlist.exe
function error_exit()
{
rm -f $OUTPUTFILE $OUTSRC $OUTBIN $GENSCONLIST_SRC $GENSCONLIST_BIN $GENSACTLIST_SRC $GENSACTLIST_BIN
echo Fatal Error : $0 $*
exit 1
}
$FLEXPATH -o $GENSCONLIST_SRC << XXEOF || error_exit
%{
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
void on_define_scon(char* line);
%}
%option noyywrap
WS [[:blank:]]+
OPTWS [[:blank:]]*
NL \r\n|\r|\n
IDEN [[:alpha:]_][[:alnum:]_]*
NUMBER [[:digit:]]+
%x SCON_DEFINE_BLOCK
%%
"#"{OPTWS}"define"{WS}INITIAL{WS}0{OPTWS}{NL} BEGIN(SCON_DEFINE_BLOCK);
.|\n
<SCON_DEFINE_BLOCK>{
"#"{OPTWS}"define"{WS}{IDEN}{WS}{NUMBER}{OPTWS}{NL} on_define_scon(yytext);
.|\n BEGIN(INITIAL);
}
%%
static int nSconListCount = 0;
static char basename[120]="X4C_DFA_TABLE";
void on_define_scon(char* line)
{
char* p = strstr(line, "define") , *pName;
int value;
p += 6;
while(isspace(*p)) ++p;
pName = p;
while(!isspace(*p)) ++p;
*p++ = 0;
while(isspace(*p)) ++p;
value = atoi(p);
if( 0 == nSconListCount++ )
printf("enum %s_SCONLIST_enum\n{\n" , basename );
printf("\t%-30s = %4d,\n" , pName , value);
}
int main(int argc , char* argv[])
{
char *p;
if(argc == 2)
strncpy( basename , argv[1] , sizeof(basename) - 20 );
for(p = basename; *p ; ++p )
{
if(!isalnum(*p)) *p = '_';
*p = toupper(*p);
}
yylex();
if( 0 != nSconListCount )
printf( "%s_SCONLIST_END\n};\n" , basename );
return 0;
}
XXEOF
$FLEXPATH -o $GENSACTLIST_SRC << XXEOF || error_exit
%{
void on_define_action(const char* name , int id);
%}
%option noyywrap
NL \r\n|\r|\n
WS [[:blank:]]+
OPTWS [[:blank:]]*
IDEN [[:alpha:]_][[:alnum:]_]*
NUMBER [[:digit:]]+
%%
case{WS}{NUMBER}{OPTWS}":"{OPTWS}{NL}({OPTWS}"/*".*"*/"{OPTWS}{NL})?{OPTWS}YY_RULE_SETUP{OPTWS}{NL}"#"{OPTWS}"line".*{NL}{OPTWS}"///@"{OPTWS}{IDEN}{OPTWS} {
int value; char* name , *p;
p = yytext + 4;
while(isspace(*p)) ++p;
value = atoi(p);
p = strstr( p , "///@" );
p = p + 4;
while(isspace(*p)) ++p;
name = p;
while(!isspace(*p)) ++p;
*p++ = 0;
on_define_action(name, value);
}
.|\n
%%
static int nActionCount = 0;
char basename[120]="X4C_DFA_TABLE";
void on_define_action(const char* name , int id)
{
if(0 == nActionCount++)
printf("enum %s_ACTIONLIST_enum\n{\n" , basename );
printf("\t%-30s = %4d,\n" , name , id);
}
int main(int argc , char* argv[])
{
char *p;
if(argc == 2)
strncpy( basename , argv[1] , sizeof(basename) - 20 );
for(p = basename; *p ; ++p )
{
if(!isalnum(*p)) *p = '_';
*p = toupper(*p);
}
yylex();
if(0 != nActionCount)
printf( "%s_ACTIONLIST_END\n};\n" , basename );
return 0;
}
XXEOF
##
##
##
$FLEXPATH -o $OUTSRC $INPUTFILE || error_exit
cat << XXEOF >> $OUTSRC
template<typename T>
void gentbl( T* arr , int N , const char* name , const char* s0 )
{
int i , f16 = 0 , f32 = 0;
for( i = 0; i < N; ++i )
{
if( arr[i] > 0xFFFF )
f32 = 1;
else if( arr[i] > 0xFF )
f16 = 1;
}
printf( "static const %s %s[%d] = { %s ," , f32?"unsigned int":f16?"unsigned short":"unsigned char" , name , N , s0 );
for( i = 1; i < N ; ++i )
{
if( i > 100 && i%100 == 1 )
printf( "\n\n\t" );
else if( i%10 == 1 )
printf( "\n\t" );
printf( "%4d%c" , arr[i] , i==N-1?' ':',' );
}
printf( "\n};\n\n" );
}
int main()
{
int i ;
int lastdfa , jambase , defrule;
lastdfa = sizeof( yy_accept ) / sizeof( yy_accept[0] ) - 2;
jambase = yy_base[ lastdfa + 1 ];
for( i = 0; yy_accept[i] == 0; ++i )
;
defrule = yy_accept[i] - 1;
printf(
"enum {\n"
"\tYY_LASTDFA =\t%4d,\n"
"\tYY_JAMBASE =\t%4d,\n"
"\tYY_DEFAULT_RULE =\t%4d,\n"
"\tYY_ECTBL_SIZE =\t%4d\n"
"};\n\n",
lastdfa , jambase , defrule , 256 );
#define GENTBL( x , s0 ) gentbl( x , sizeof(x)/sizeof(x[0]) , #x , s0 )
#define GENTBL0( x ) GENTBL( x , "0" )
GENTBL0( yy_accept );
GENTBL ( yy_ec , "_XDFA_YYEC0" );
GENTBL0( yy_meta );
GENTBL0( yy_base );
GENTBL0( yy_def );
GENTBL0( yy_nxt );
GENTBL0( yy_chk );
return 0;
}
int yywrap() { return 1; }
XXEOF
gcc -o $GENSCONLIST_BIN $GENSCONLIST_SRC || error_exit
gcc -o $GENSACTLIST_BIN $GENSACTLIST_SRC || error_exit
gcc -o $OUTBIN $OUTSRC || error_exit
function gen_scon_list()
{
echo "/*"
echo " * --------- X1 SCON LIST ------------"
echo " */"
echo "#ifdef _XDFA_DECLARE_SCONLIST_"
$GENSCONLIST_BIN $BASENAME < $OUTSRC
echo "#undef _XDFA_DECLARE_SCONLIST_"
echo "#endif /* _XDFA_DECLARE_SCONLIST_ */"
echo "/*"
echo " * --------- X1 SCON LIST ------------"
echo " */"
echo
echo
}
function gen_action_list()
{
echo "/*"
echo " * --------- X2 ACTION LIST ------------"
echo " */"
echo "#ifdef _XDFA_DECLARE_ACTIONLIST_"
$GENSACTLIST_BIN $BASENAME < $OUTSRC
echo "#undef _XDFA_DECLARE_ACTIONLIST_"
echo "#endif /* _XDFA_DECLARE_ACTIONLIST_ */"
echo "/*"
echo " * --------- X2 ACTION LIST ------------"
echo " */"
echo
echo
}
function gen_dfa_table()
{
echo "/*"
echo " * --------- X3 STATE MACHINE ------------"
echo " */"
echo "#ifdef _XDFA_DECLARE_STATE_MACHINE_"
$OUTBIN $BASENAME
echo "#undef _XDFA_DECLARE_STATE_MACHINE_"
echo "#endif /* _XDFA_DECLARE_STATE_MACHINE_ */"
echo "/*"
echo " * --------- X3 STATE MACHINE ------------"
echo " */"
echo
echo
}
cat << XXEOF > $OUTPUTFILE
/*
* Auto Generate by : $INPUTFILE
* *** DO NOT *** modify this file
*/
#ifndef _XDFA_YYEC0
#define _XDFA_YYEC0 (1)
#endif /* _XDFA_YYEC0 */
XXEOF
gen_scon_list >> $OUTPUTFILE
gen_action_list >> $OUTPUTFILE
gen_dfa_table >> $OUTPUTFILE
cat << XXEOF >> $OUTPUTFILE
#undef _XDFA_YYEC0
XXEOF
echo Success : $OUTPUTFILE
rm -f $OUTSRC $OUTBIN $GENSCONLIST_SRC $GENSCONLIST_BIN $GENSACTLIST_SRC $GENSACTLIST_BIN
/* gen.c */
#include <stdio.h>
void rule1()
{
int i;
printf("0000");
for(i = 1; i < 10; ++i)
printf("|%d%d%d%d", i,i,i,i);
printf("\n");
}
void rule2()
{
int i;
printf("01234");
for(i = 1; i < 6; ++i)
printf("|%d%d%d%d%d", i, i+1, i+2, i+3, i+4);
for(i = 0; i < 6; ++i)
printf("|%d%d%d%d%d", i+4, i+3, i+2, i+1, i);
printf("\n");
}
void rule3()
{
int i, j;
for(i = 0; i < 10; ++i) for(j = 0; j < 10; ++j) {
if(i == j) continue;
printf("%s%d%d%d%d%d|%d%d%d%d%d", i==0&&j==1?"":"|", i,i,i,j,j, i,i,j,j,j);
}
printf("\n");
}
void rule4()
{
int i,j,k;
printf("^(");
for(i = 0; i < 10; ++i) for(j = i+1; j < 10; ++j) for(k = j+1; k < 10; ++k) {
printf("%s[%d%d%d]+",i==0&&j==1&&k==2?"":"|", i,j,k);
}
printf(")\\n\n");
}
void rule5()
{
int i,j;
for(i = 0; i < 10; ++i) for(j = 0; j < 10; ++j) {
if(i == j) continue;
printf("%s%d%d%d[0-9]*%d%d%d", i==0&&j==1?"":"|", i,i,i,j,j,j);
}
printf("\n");
}
void rule6()
{
int i,j,k;
for(i = 0; i < 10; ++i) for(j = 0; j < 10; ++j) for(k = 0; k < 10; ++k) {
if(i == j || j == k || i == k) continue;
printf("%s%d%d%d%d%d%d", i==0&&j==1&&k==2?"":"|",i,i,j,j,k,k);
}
printf("\n");
}
void rule7()
{
printf("(19|20|21)[0-9][0-9](0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])\n");
}
void rule8()
{
printf("8881956|1314156\n");
}
void rule9_a()
{
int i;
printf("^(");
for(i = 0; i < 10; ++i)
printf("%s%d%d%d", i==0?"":"|", i,i,i);
printf(")\n");
}
void rule9_b()
{
int i;
printf("(");
for(i = 0; i < 10; ++i)
printf("%s%d%d%d", i==0?"":"|", i,i,i);
printf(")\\n\n");
}
int main()
{
printf("%%option 7bit\n%%%%\n");
rule1();
rule2();
rule3();
rule4();
rule5();
rule6();
rule7();
rule8();
rule9_a();
rule9_b();
return 0;
}
/* scanner.c */
#include <stdio.h>
#include <string.h>
#define _XDFA_DECLARE_STATE_MACHINE_
#include "tmp/scanner.inl"
int slove(const char* s_, char**o_start, int* o_len)
{
unsigned char* p = (unsigned char*)s_;
int curr = 2, c, action = 0, stbak = 0;
unsigned char* text, *txbak = NULL;
for(;*p;curr = 1)
{
text = p;
do {
c = *p;
if(0 == c)
break;
if(c >= YY_ECTBL_SIZE)
c = 1;
else
c = yy_ec[c];
if(0 != (action = yy_accept[curr]))
{
if(action != YY_DEFAULT_RULE)
return *o_start = (char*)text, *o_len = (int)(p - text), action < 10 ? action : 9;
stbak = curr, txbak = p;
}
while(yy_chk[yy_base[curr]+c]!=curr)
{
if((curr=yy_def[curr])>=YY_LASTDFA+2)
c = yy_meta[c];
}
curr = yy_nxt[yy_base[curr]+c];
++p;
} while(yy_base[curr] != YY_JAMBASE);
action = yy_accept[curr];
if(0 == action)
{
p = txbak;
curr = stbak;
action = yy_accept[curr];
}
if(action != YY_DEFAULT_RULE)
return *o_start = (char*)text, *o_len = (int)(p - text), action < 10 ? action : 9;
}
return -1;
}
int main()
{
char buff[32*1024 + 16];
char* yytext;
int len, rule, i;
while(1 == scanf("%s", buff))
{
len = strlen(buff);
if(len >= 3 && buff[0] != '0')
{
for(i=0; i < len; ++i)
{
if(buff[i] < '0' || buff[i] > '9')
break;
}
if(i != len)
{
printf("invalid input!\n");
continue;;
}
strcpy(buff + len, "\n");
rule = slove(buff, &yytext, &len);
if(rule > 0)
printf("match rule %d, [%d , %d]\n", rule, yytext - buff, len);
else
printf("not match! %d\n", rule);
}
else
{
printf("invalid input!\n");
}
}
return 0;
}
//1、连续4个及以上的同样的数字,比如2222546等;
//2、包含5个连续递增的数字或者递减的数字,比如1234512和3498765等
//3、包含AAABB或者AABBB的字符串,比如22233567和22333567等
//4、仅有2个或者3个数字字符组成的字符串,比如1616161和186168186等
//5、包含AAA+BBB格式的字符串,比如2228333和34447666等
//6、包含AABBCC格式的字符串,比如33225512等
//7、包含生日号,比如19740820和219930621等
//8、包含特殊含义的字符串,比如8881956和1314156等
//9、以AAA开头或者结尾的字符串,比如2221956和1956222等
//说明:字符串全部由数字构成,第一个字符的范围为[1-9],其他位上可以包含0,字符串无位数要求#include <iostream>
#include <iostream>
#include <string>
#include <regex>
using namespace std;
int main(int argc,char **argv) {
if (argc<2) {
cout<<"Usage: "<<argv[0]<<" string"<<endl;
return 2;
}
string s(argv[1]);
char *regs_str[9]={
"(\\d)\\1\\1\\1" ,//1、连续4个及以上的同样的数字,比如2222546等;
"01234|12345|23456|34567|45678|56789|98765|87654|76543|65432|54321|43210" ,//2、包含5个连续递增的数字或者递减的数字,比如1234512和3498765等
"(\\d)\\1\\1(\\d)\\2|(\\d)\\3(\\d)\\4\\4" ,//3、包含AAABB或者AABBB的字符串,比如22233567和22333567等
"x"/*这个用正则好象很难实现*/ ,//4、仅有2个或者3个数字字符组成的字符串,比如1616161和186168186等
"(\\d)\\1\\1\\d*?(\\d)\\2\\2" ,//5、包含AAA+BBB格式的字符串,比如2228333和34447666等
"(\\d)\\1(\\d)\\2(\\d)\\3" ,//6、包含AABBCC格式的字符串,比如33225512等
"(19[5-9]\\d|20[01]\\d)[01]\\d[0-3]\\d"/*这个用正则精确实现也很难*/ ,//7、包含生日号,比如19740820和219930621等
"8881956|1314156" ,//8、包含特殊含义的字符串,比如8881956和1314156等
"^(\\d)\\1\\1|(\\d)\\2\\2$" ,//9、以AAA开头或者结尾的字符串,比如2221956和1956222等
};
regex regs[9];
for (int i=0;i<9;i++) {
regs[i]=regex(regs_str[i]);
if (regex_search(s,regs[i])) {
cout<<s<<" match rule "<<(i+1)<<":"<<regs_str[i]<<endl;
return 1;
}
}
return 0;
}
//C:\test\Debug>test
//Usage: test string
//
//C:\test\Debug>test 2222546
//2222546 match rule 1:(\d)\1\1\1
//
//C:\test\Debug>test 1234512
//1234512 match rule 2:01234|12345|23456|34567|45678|56789|98765|87654|76543|65432|54321|43210
//
//C:\test\Debug>test 3498765
//3498765 match rule 2:01234|12345|23456|34567|45678|56789|98765|87654|76543|65432|54321|43210
//
//C:\test\Debug>test 22233567
//22233567 match rule 3:(\d)\1\1(\d)\2|(\d)\3(\d)\4\4
//
//C:\test\Debug>test 22333567
//22333567 match rule 3:(\d)\1\1(\d)\2|(\d)\3(\d)\4\4
//
//C:\test\Debug>test 2228333
//2228333 match rule 5:(\d)\1\1\d*?(\d)\2\2
//
//C:\test\Debug>test 34447666
//34447666 match rule 5:(\d)\1\1\d*?(\d)\2\2
//
//C:\test\Debug>test 33225512
//33225512 match rule 6:(\d)\1(\d)\2(\d)\3
//
//C:\test\Debug>test 19740820
//19740820 match rule 7:(19[5-9]\d|20[01]\d)[01]\d[0-3]\d
//
//C:\test\Debug>test 219930621
//219930621 match rule 7:(19[5-9]\d|20[01]\d)[01]\d[0-3]\d
//
//C:\test\Debug>test 8881956
//8881956 match rule 8:8881956|1314156
//
//C:\test\Debug>test 1314156
//1314156 match rule 8:8881956|1314156
//
//C:\test\Debug>test 2221956
//2221956 match rule 9:^(\d)\1\1|(\d)\2\2$
//
//C:\test\Debug>test 1956222
//1956222 match rule 9:^(\d)\1\1|(\d)\2\2$
//
//有字符串
//1_22_333,,4444__55555,_666666
//需要解析为
//1
//22
//333
//_
//4444
//55555
//666666
#include <stdio.h>
char s[]="1_22_333,,4444__55555,_666666";
char c,*p,*p1;
int st;
void main() {
st=0;
p=s;
while (1) {
c=*p;
if (0==c) {
switch (st) {
case 1:printf("_\n"); break;
case 2:printf("%s\n",p1);break;
}
break;//
}
switch (st) {
case 0:
if ('_'==c) { st=0;}
else if (','==c) { st=1;}
else {p1=p; st=2;}
break;
case 1:
if ('_'==c) { st=1;}
else if (','==c) {printf("_\n"); st=1;}
else {p1=p; st=2;}
break;
case 2:
if ('_'==c) {*p=0;printf("%s\n",p1);*p=c;st=0;}
else if (','==c) {*p=0;printf("%s\n",p1);*p=c;st=1;}
else { st=2;}
break;
}
p++;
}
}
//1
//22
//333
//_
//4444
//55555
//666666