69,368
社区成员
发帖
与我相关
我的任务
分享
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <windows.h>
#define WFNSIZE 35
#define CLASIZE 4
#define CLASSNUM 300
#define TPSIZE 75
#define ENTRYSIZE 192429
//定义结构体用于存储文件名,以及文件类别
typedef struct WebFileNameNode
{
char wfname[WFNSIZE];
struct WebFileNameNode *next;
}WNode,*WebfNameList;
struct ClassificationLink
{
char webclass[CLASIZE];
WebfNameList wfn;
}classlink[CLASSNUM];
//处理字符数组函数,并将文件名和文件类别存入结构体
void deal( char *p)
{
char classtp[CLASIZE],wfntp[WFNSIZE];
WNode *node=NULL,*t=NULL;
int i,j,num=0;
//取出主题类号,并放入临时字符数组classtp
for (i=0,j=0;p[i]!=' '&&i<75;i++)
{
classtp[j]=p[i];
j++;
}
classtp[j]='\0';
i=i+3;
//取出文件名,并放入临时字符数组wfntp
for (j=0;p[i]!=' '&&i<75;i++)
{
wfntp[j]=p[i];
j++;
}
wfntp[j]='\0';
i=1;
//计算当前主题类号的整形表示
num=((int)classtp[i]-48)*100+((int)classtp[i+1]-48)*10+((int)classtp[i+2]-48);
node=(WebfNameList)malloc(sizeof(WNode));
node->next = NULL;
memset(node->wfname, 0, WFNSIZE);
strcpy(node->wfname,wfntp);
//将对应文件节点连入对应类链表
if (classlink[num].wfn==NULL)
{
strcpy(classlink[num].webclass,classtp);
classlink[num].wfn=node;
t=node;
}
else
{
t->next=node;
t=node;
}
t->next=NULL;
}
//构建文件类别和文件链表函数,将文件名结构体按序接在文件类别结构体后
void CreatWebNameList()
{
void deal(char *p);
FILE *fp;
int i,k;
char temp[TPSIZE];
//对由主题类号充当的链表头进行初始化
for (k=0;k<CLASSNUM;k++)
{
classlink[k].wfn=NULL;
}
if ((fp=fopen("BASELINE-D-C-1.txt","r"))==NULL)
{
printf("can not open file\n");
exit(0);
}
fgets(temp,TPSIZE,fp);
for (i=0; i<ENTRYSIZE; ++i)
{
memset(temp,0,TPSIZE);
fgets(temp,TPSIZE,fp);
deal(temp);
}
fclose(fp);
}
//匹配函数
void match()
{
WIN32_FIND_DATA FileData;
HANDLE hSearch;
char szDirPath[] = "E:\\material\\BASELINE-D-C-1-top1000contents\\";
char szNewPath[MAX_PATH];
char szFPath[MAX_PATH];
int i;
int flagnf[CLASSNUM];
WNode *p;
BOOL fFinished = FALSE;
BOOL cFinished = FALSE;
//初始化各类文件夹是否创建标记
for (i=0;i<CLASSNUM;i++)
{
flagnf[i]=1;
}
//在当前目录下搜索.htm文件
hSearch = FindFirstFile("*.htm", &FileData);
if (hSearch == INVALID_HANDLE_VALUE)
{
printf("No .htm files found.");
}
//创建文件夹
if (!CreateDirectory(szDirPath, NULL))
{
printf("Couldn't create new directory.");
}
//对每个文件进行匹配并分类
while (!fFinished)
{
//逐个扫描每个类别链表
for (i=0;i<CLASSNUM;i++)
{
p=classlink[i].wfn;
while (p&&cFinished)
{
if (strcmp(p->wfname,FileData.cFileName)==0)
{
if (flagnf[i]!=0)
{
lstrcpy(szNewPath, szDirPath);
lstrcat(szNewPath, classlink[i].webclass);
if (!CreateDirectory(szNewPath, NULL))
{
printf("Couldn't create new directory.");
flagnf[i]=0;
}
}
lstrcpy(szFPath, szNewPath);
lstrcat(szFPath, FileData.cFileName);
if (CopyFile(FileData.cFileName, szFPath, FALSE))
{
cFinished = TRUE;
}
else
{
printf("Couldn't copy file.");
}
//优化:当匹配到对应文件后,将链表中的对应文件节点删除,将源文件删除;
}
else
{
p=p->next;
}
}
}
//提取下一个要匹配的文件信息
if (i==CLASSNUM&&cFinished==FALSE)
printf("Couldn't find find %s's class.",FileData.cFileName);
else if (!FindNextFile(hSearch, &FileData))
{
if (GetLastError() == ERROR_NO_MORE_FILES)
{
MessageBox(NULL, "No more .TXT files.",
"Search completed.", MB_OK);
fFinished = TRUE;
}
else
{
printf("Couldn't find next file.");
}
}
}
//关闭搜索控制
if (!FindClose(hSearch))
{
printf("Couldn't close search handle.");
}
}
//主函数
int main()
{
void CreatWebNameList();
void match();
CreatWebNameList();
match();
return 0;
}