文本文件内容定位问题？

sun420 2008-04-07 02:56:29

文本文件中保存多行数据，数据的格式如下：
2008_51,25,文件记录文件记录,文件记录文件记录.aaa
2008_52,266,文件记录文件记录,文件记录文件记录.aaa
2008_53,255,文件记录文件记录,文件记录文件记录.aaa
2008_54,25,文件记录文件记录,文件记录文件记录.aaa
......
***************************************************
每一行的记录长度不确定，那么如何取得任意一条记录的内容呢？

用C语言实现的，谢谢各位了！

...全文

231 9 打赏收藏转发到动态举报

写回复

用AI写文章

9 条回复

切换为时间正序

请发表友善的回复…

发表回复

rushman 2008-04-08

打赏
举报

> 我的文件很大，又几十万行数据吧。我打算的是分块读入内存，然后进行排序和查找等操作......
建个索引吧。不一定要分块，记录跨块边界时比较麻烦。一行行读，一条记录一条记录的处理并不会降低效率。

索引的组织和排序的算法很多，需要斟酌。
最简单的索引是一维数组，配合快速排序的话，可以这样处理：

//

// 随手写的，没有检查、测试，程序中也没有检查参数及返回值，仅供参考

//



// 索引项

struct IndexItem{

  long offset;         // 记录位置

  unsigned long hashkey; // 为便于排序/查找，避免频繁访问文件，可以对记录的键值进行散列

};



// 由于文件较大，可以一次读取、排序若干条记录，形成索引块，然后进行合并

// 若干索引块可以用链表组织起来

struct IndexBlock{

  IndexItem * index;

  int itemcount;

  IndexBlock * nextblock;

};



// 为一条记录生成散列值

unsigned long hashKeyValue(const char * item){

  // 根据需要设计一个散列算法

	return(((unsigned long *)(const void*)item)[0] ^ ((unsigned long *)(const void*)item)[1]);

}



// 比较两条记录

int compRecord(const void * item1,const void * item2){

	// 先比较 hashkey，如果相同再读取文件进行准确的比较

	//..........

}



// 一次读取idxCount条记录

int readDataBlock(FILE* inputfile, IndexItem * idx,int idxcount){

  int i;

  char buffer[BUFFERSIZE];

  for(i = 0; i < idxcount && !feof(inputfile);i++){

    fgets(buffer,BUFFERSIZE,inputfile);

    idx[i].offset = ftell(inputfile);

    idx[i].hashkey = hashKeyValue(buffer);

  }

  return (i);

}



// 读取文件记录

int readData(FILE * inputfile, IndexBlock * head){

  int count = 0;

  IndexBlock * newblock = head;

  do{

    newblock->nextblock = new IndexBlock;

    newblock = newblock->nextblock;

    newblock->index = new IndexItem[BLOCKSIZE];

    newblock->itemcount = readDataBlock(inputfile,newblock->index,BLOCKSIZE);

    count += newblock->itemcount;

  }while(!feof(inputfile));

  newblock->nextblock = NULL;

  return (count);

}



// 合并两个索引块

int unionIndexBlock(const IndexBlock * index1,const IndexBlock * index2,IndexBlock * resuindex){

  int i1,i2,ir,total;

  total = index1->itemcount + index2->itemcount;

  for(ir = i1 = i2 = 0; ir < total; ir++){

    if(compRecord(index1->index[i1],index2->index[i2]) > 0){

      copyIndex(index2->index[i2],resuindex->index[ir]);

      i2++;

    }else{

      copyIndex(index1->index[i1],resuindex->index[ir]);

      i1++;

    }

  }

  resuindex-itemcount = ir;

  return (resuindex->itemcount);

}



// 读取文件并创建索引

IndexBlock * createIndex(FILE * inputfile){

	IndexBlock head;

  IndexBlock * currblock;

  IndexBlock * newblock;

  

  readData(inputfile, &head);

  

  // 排序

  for(currblock = head.nextblock; currblock != NULL; currblock = currblock->nextblock){

    qsort((void*)currblock->head,currblock->itemcount,sizeof(IndexItem),compRecord);

  }

  

  // 合并索引块

  while(head.nextblock->nextblock != NULL){

  	currblock = &head;

	  while(true){

  	  if(currblock->nextblock == NULL || currblock->nextblock->nextblock == NULL)break;

    	newblock = new IndexBlock;

	    newblock->index = new IndexItem[currblock->nextblock->itemcount + currblock->nextblock->nextblock->itemcount];

	    newblock->nextblock = currblock->nextblock->nextblock->nextblock;

	    unionIndexBlock(currblock->nextblock,currblock->nextblock->nextblock,newblock);

	    delete []currblock->nextblock->nextblock->index;

	    delete []currblock->nextblock->index;

	    delete currblock->nextblock->nextblock;

	    delete currblock->nextblock;

	    currblock->nextblock = newblock;

	    currblock = newblock;

	  }

	}

	return (head.nextblock);

}

sun420 2008-04-08

打赏
举报

[Quote=引用 7 楼 rushman 的回复:]
如果文件不大，可以考虑一次性全部读入内存。
如果文件比较大，那就建个索引。

C/C++ code#define MAXSIZE 1000
#define BUFFSIZE 1000

fpos_t index[MAXSIZE];
char buffer[BUFFSIZE];

for(int i = 0;i < MAXSIZE;i++){
fgetpos(index + i,inputFile);
fgets(buffer,BUFFSIZE,inputFile);
if(feof(inputFile))break;
}
[/Quote]
我的文件很大，又几十万行数据吧。我打算的是分块读入内存，然后进行排序和查找等操作。
不知道又什么好的建议吗？谢谢了

rushman 2008-04-07

打赏
举报

如果文件不大，可以考虑一次性全部读入内存。
如果文件比较大，那就建个索引。

#define MAXSIZE 1000

#define BUFFSIZE 1000



fpos_t index[MAXSIZE];

char buffer[BUFFSIZE];



for(int i = 0;i < MAXSIZE;i++){

  fgetpos(index + i,inputFile);

  fgets(buffer,BUFFSIZE,inputFile);

  if(feof(inputFile))break;

}

冰矿 2008-04-07

打赏
举报



#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#define M 256

int main()

{

    FILE *F = fopen("file.txt","rt");

    char buff[M];

    char searchstr[M];

    char *find = NULL;

    if (F == NULL) exit(1);

    printf("输入你要找的纪录号：");

    scanf("%s",searchstr);

    while (!feof(F))

    {

          fgets(buff,M-1,F);

          if (buff[0] == '\n') continue; /*如果是空行直接再读取下一行*/

          find = strstr(buff,searchstr);

          if (find != NULL)

          {

              puts(buff); find = NULL;

          }

    }

    fclose(F);

    system("PAUSE");

    return 0;

}

测试

输入你要找的纪录号：2008_54

2008_54,25,文件记录文件记录,文件记录文件记录.aaa

请按任意键继续. . .

Inhibitory 2008-04-07

打赏
举报

自己再写一个程序, 对这个文件的每一行开始的位置建立索引, 然后先读入索引文件, 读取行的位置, 打开这个文本文件, 把文件指针跳到目标位置

lala_benben 2008-04-07

打赏
举报

一次读一行。。。

sun420 2008-04-07

打赏
举报

[Quote=引用 2 楼 Kenmark 的回复:]
读入内存建立表格然后索引
因为记录长度不一样
[/Quote]
能不能具体讲一下呢？
你的意思是再建立一个文件，用来记录每条记录的长度吗？
那如果现在就想按顺序遍历这个文件，同时返回此时位置指针指向记录的内容呢？
这样应该比随机取的实现方法简单一些吧？