因为没有文件,没法调试,你自己调试吧,要注意 ReadInt 如果读取的结果不对,把4个字节的顺序颠倒即可:
------
BlockFileReader.cs:
--------------
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
namespace WindowsApplication1
{
public class Block
{
public string Word;
public int Int1;
public int Int2;
}
public class BlockFileReader
{
/// <summary>
/// 读取文件内容
/// </summary>
/// <param name="fileName">文件名</param>
/// <returns>字节数组</returns>
private static byte[] ReadFile(string fileName)
{
FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
try
{
int len = (int)fs.Length; // 文件长度
byte[] content = new byte[len];
fs.Read(content, 0, len);
return content;
}
finally
{
fs.Close();
}
}
public List<Block> Read(string fileName)
{
byte[] content = ReadFile(fileName);
List<Block> result = new List<Block>();
int blockStartIndex = 0;
while ((blockStartIndex = ReadBlock(content, blockStartIndex, result)) < content.Length) ;
return result;
}
private static int ReadBlock(byte[] content, int blockStartIndex, List<Block> result)
{
for (int i = blockStartIndex; i < content.Length; i++)
{
if (content[i] == 0)
{
Block blk = new Block();
blk.Word = Encoding.UTF8.GetString(content, blockStartIndex, i - blockStartIndex);
blk.Int1 = ReadInt(content, i + 1);
blk.Int2 = ReadInt(content, i + 5);
result.Add(blk);
return i + 9;//把指针移动到第二个整数之后
}
}
System.IO.StreamReader sr = new System.IO.StreamReader("FILE.txt", Encoding.UTF8);
string text = sr.ReadToEnd();
sr.Close();
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex("(?<word>[^\0]*)\0(?<i1>[0-9]*).*?(?<i2>[0-9]*)");
System.Text.RegularExpressions.Match match = regex.Match(text);
while (match.Success)
{
string word = match.Groups["word"].Value;
int i1 = int.Parse(match.Groups["i1"].Value);
int i2 = int.Parse(match.Groups["i2"].Value);
match = match.NextMatch();
}