64,654
社区成员
发帖
与我相关
我的任务
分享
/*
1.txt存放文章
table.txt是停词表,每行一个停用词
2.txt是修改后的文章
*/
#include <stdio.h>
#include <wchar.h>
FILE *fi1, *fi2, *fo;
wchar_t buf[256];
wchar_t word[30];
wchar_t table[256][30];
wchar_t *p;
int i, n, N;
int main()
{
if (NULL == (fi1 = fopen("1.txt", "rb")))
{
fprintf(stderr, "Can not open file : 1.txt\n");
return 1;
}
if (NULL == (fi2 = fopen("table.txt", "rb")))
{
fprintf(stderr, "Can not open file : table.txt\n");
fclose(fi1);
return 1;
}
if (NULL == (fo = fopen("2.txt", "wb")))
{
fprintf(stderr, "Can not open file : 2.txt\n");
fclose(fi1);
fclose(fi2);
return 1;
}
//读入停用词表
N = 0;
while (1)
{
if (NULL == fgetws(buf, 256, fi2)) break;
n = wcslen(buf);
if (L'\r' == buf[0]) continue; //空行
buf[n - 2] = L'\0';
wcsncpy(table[N++], buf, n - 1);
}
//根据停用词表,删除文件中出现的停用词
while (1)
{
if (NULL == fgetws(buf, 256, fi1)) break;
n = wcslen(buf);
if (L'\r' == buf[0]) continue; //空行
buf[n - 2] = L'\0';
for (i = 0; i < N; i++)
{
p = buf;
n = wcslen(table[i]);
while (p = wcsstr(p, table[i]))
{
wcsncpy(p, p + n, wcslen(p + n) + 1);
}
}
fwprintf(fo, L"%s\r\n", buf);
}
fclose(fi1);
fclose(fi2);
fclose(fo);
return 0;
}
aaaaaaxxaaaaxaaaaaa
bbbb啦啦bbbbbb啦bbbbbb
2.txt
aaaaaabeautyaaaaxaaa
bbbb我爱你bbbbbb啦bbbb
/*
将"xx"替换为"beauty"
将"啦啦"替换为"我爱你"
*/
#include <stdio.h>
#include <wchar.h>
FILE *fi, *fo;
wchar_t buf[256];
wchar_t *p;
int n;
int main()
{
if (NULL == (fi = fopen("1.txt", "rb")))
{
fprintf(stderr, "Can not open file : 1.txt\n");
return 1;
}
if (NULL == (fo = fopen("2.txt", "wb")))
{
fprintf(stderr, "Can not open file : 2.txt\n");
fclose(fi);
return 1;
}
while (1)
{
if (NULL == fgetws(buf, 256, fi)) break;
n = wcslen(buf);
buf[n - 1] = L'\0';
p = buf;
while (p = wcsstr(p, L"xx"))
{
memmove(p + 6, p + 2, wcslen(p) + 1);
wcsncpy(p, L"beauty", 6);
p += 6;
}
p = buf;
while (p = wcsstr(p, L"啦啦"))
{
memmove(p + 3, p + 2, wcslen(p) + 1);
wcsncpy(p, L"我爱你", 3);
p += 3;
}
fwprintf(fo, L"%s\r\n", buf);
}
fclose(fi);
fclose(fo);
return 0;
}
在我眼里,一篇文章和一个字符串没有区别。
在我眼里,一篇文章和一个字符串没有区别。
分词前就把停用词剔除,会导致分词不准吧。如果实在要删除,就根据停用词表直接把字符串中的停用词删掉
仅供参考:#include <stdio.h> #include <string.h> char s[256]; char *p; int r,n,i; int main() { while (1) { printf("请输入一行文字(空行结束),\"%%20\"将替换为\" \",\"你懂得\"将替换为\"XXXXXX\":\n"); fgets(s,256,stdin); if ('\n'==s[0]) break; p=s; while (1) { p=strstr(p,"%20"); if (p) { memmove(p+1,p+3,strlen(p)-3+1); p[0]=' '; } else break; } p=s; while (1) { p=strstr(p,"你懂得"); if (p) { memmove(p+6,p+6,strlen(p)-6+1); for (i=0;i<6;i++) p[i]='X'; } else break; } printf("%s",s); } return 0; } //请输入一行文字(空行结束),"%20"将替换为" ","你懂得"将替换为"XXXXXX": //abcdefg%20helloworld%20something.pdf //abcdefg helloworld something.pdf //请输入一行文字(空行结束),"%20"将替换为" ","你懂得"将替换为"XXXXXX": //这是测试文字你懂得,在这个你懂的地方,就得做你懂得的事 //这是测试文字XXXXX,在这个你懂的地方,就得做XXXXX的事 //请输入一行文字(空行结束),"%20"将替换为" ","你懂得"将替换为"XXXXXX": //