使用Open XXX For Input As #1打开一文本文件时,为什么中文字符处理不对头?

嗷嗷叫的老马 2006-01-20 02:51:52
因为我使用文本文件来存储信息,但信息中包含中文。

写入时没问题,但读出来时就是乱码了。(Line Input读)

而且对于一个文件的头几个字符的读取也有问题(比如注册表文件,我写了一个过程来解析系统注册表文件,如果全是英文就没有一点问题,但是中文始终是乱码)

原因不明,希望有朋友可以给出原因及解决方案。

先谢谢了。
...全文
549 点赞 收藏 26
写回复
26 条回复
切换为时间正序
请发表友善的回复…
发表回复
zyl910 2006-01-25
没问题
回复
非常感谢这个模块!

读取中文字符现在没有问题了!

马上结帖!!!!
回复
TO zyl910(910:分儿,我又来了!)

可否让我把这个模块放在我论坛上供网友们使用?
回复
fuxc 2006-01-24
910几年以来一直都是个强人
回复
xuxun 2006-01-24
只能无条件佩服楼主了,呵呵!
回复
zyl910 2006-01-23
//晕,至于这么复杂吗?


现在本文编码技术发展飞快

GB2312-1980标准在1993年被GB13000-1993代替
GB13000-1993也在2000年被GB18030-2000代替


ASCII?!已经在坟墓里不知道烂了多久了


中国80年代的老教材早该改版了



从VB4开始
VB的String就是Unicode格式了
准确点来说是UTF-16LE编码
很是还有很多人受老教材的影响,还是以为英文1字节、中文2字节,这是根本错误的

回复
xuxun 2006-01-23
晕,至于这么复杂吗?
回复
zyl910 2006-01-23

'保存文本文件
'返回值:是否成功
'FileName:[in]文件名
'sText:[in]欲输出的文本
'IsAppend:[in]是否是添加方式
'fmt:[in,out]使用何种文本编码格式来存储文本。当IsAppend=True时允许UEF_Auto自动判断,且在fmt参数返回文本所用编码格式
'DefFmt:[in]当使用添加模式时,若文件不存在且fmt = UEF_Auto时应使用的编码格式
Public Function UEFSaveTextFile(ByVal FileName As String, _
ByRef sText As String, Optional ByVal IsAppend As Boolean = False, _
Optional ByRef fmt As UnicodeEncodeFormat = UEF_Auto, Optional ByVal DefFmt As UnicodeEncodeFormat = UEF_ANSI) As Boolean
Dim hFile As Long
Dim nFileSize As Long
Dim nNumRead As Long
Dim dwFirst As Long
Dim cbBOM As Long
Dim CurCP As Long
Dim byBuf() As Byte
Dim cbBuf As Long
Dim I As Long
Dim byTemp As Byte

'判断fmt范围
If IsAppend And (fmt = UEF_Auto) Then
Else
If fmt < [_UEF_Min] Or fmt > [_UEF_Max] Then
GoTo FunEnd
End If
End If

'打开文件
hFile = CreateFile(FileName, GENERIC_READ Or GENERIC_WRITE, FILE_SHARE_READ Or FILE_SHARE_WRITE, ByVal 0&, IIf(IsAppend, OPEN_ALWAYS, CREATE_ALWAYS), FILE_ATTRIBUTE_NORMAL, ByVal 0&)
If INVALID_HANDLE_VALUE = hFile Then '文件无法打开
GoTo FunEnd
End If

'判断文件大小
nFileSize = GetFileSize(hFile, nNumRead)
If nFileSize = 0 And nNumRead = 0 Then '文件大小为0字节
IsAppend = False '此时需要写BOM标志
If fmt = UEF_Auto Then fmt = DefFmt
End If

'判断BOM
If IsAppend And (fmt = UEF_Auto) Then
dwFirst = 0
Call ReadFile(hFile, dwFirst, 4, nNumRead, ByVal 0&)
cbBOM = UEFCheckBOM(dwFirst, fmt)
ElseIf IsAppend = False Then
cbBOM = UEFMakeBOM(fmt, dwFirst)
End If

'文件指针定位
Call SetFilePointer(hFile, 0, ByVal 0&, IIf(IsAppend, FILE_END, FILE_BEGIN))

'写BOM
If IsAppend = False Then
If cbBOM > 0 Then
Call WriteFile(hFile, dwFirst, cbBOM, nNumRead, ByVal 0&)
End If
End If

'写文本数据
If Len(sText) > 0 Then
Select Case fmt
Case UEF_ANSI, UEF_UTF8
'判断应使用的CodePage
CurCP = IIf(fmt = UEF_UTF8, CP_UTF8, UEFCodePage)

'取得缓冲区大小
cbBuf = WideCharToMultiByte(CurCP, 0, ByVal StrPtr(sText), Len(sText), ByVal 0&, 0, ByVal 0&, ByVal 0&)
If cbBuf > 0 Then
'分配缓冲区
On Error GoTo FreeHandle
ReDim byBuf(0 To cbBuf)
On Error GoTo 0

'转换文本
cbBuf = WideCharToMultiByte(CurCP, 0, ByVal StrPtr(sText), Len(sText), byBuf(0), cbBuf + 1, ByVal 0&, ByVal 0&)

'写文件
Call WriteFile(hFile, byBuf(0), cbBuf, nNumRead, ByVal 0&)

UEFSaveTextFile = True

End If

Case UEF_UTF16LE
'写文件
Call WriteFile(hFile, ByVal StrPtr(sText), LenB(sText), nNumRead, ByVal 0&)

UEFSaveTextFile = True

Case UEF_UTF16BE
'将字符串中的数据复制到byBuf
On Error GoTo FreeHandle
byBuf = sText
On Error GoTo 0
cbBuf = UBound(byBuf) - LBound(byBuf) + 1

'隔两字节翻转相邻字节
For I = 0 To cbBuf - 1 - 1 Step 2 '再-1是为了避免最后多出的那个字节
byTemp = byBuf(I)
byBuf(I) = byBuf(I + 1)
byBuf(I + 1) = byTemp
Next I

'写文件
Call WriteFile(hFile, byBuf(0), cbBuf, nNumRead, ByVal 0&)

UEFSaveTextFile = True

Case UEF_UTF32LE
UEFSaveTextFile = False '暂时不支持
Case UEF_UTF32BE
UEFSaveTextFile = False '暂时不支持
Case Else
Debug.Assert False
End Select
Else
UEFSaveTextFile = True
End If

FreeHandle:
'关闭文件
Call CloseHandle(hFile)

FunEnd:
End Function
回复
zyl910 2006-01-23

'读取文本文件
'返回值:读取的文本。返回vbNullString表示文件无法打开
'FileName:[in]文件名
'fmt:[in,out]使用何种文本编码格式来读取文本。为UEF_Auto时表示自动判断,且在fmt参数返回文本所用编码格式
Public Function UEFLoadTextFile(ByVal FileName As String, Optional ByRef fmt As UnicodeEncodeFormat = UEF_Auto) As String
Dim hFile As Long
Dim nFileSize As Long
Dim nNumRead As Long
Dim dwFirst As Long
Dim CurFmt As UnicodeEncodeFormat
Dim cbBOM As Long
Dim cbTextData As Long
Dim CurCP As Long
Dim byBuf() As Byte
Dim cchStr As Long
Dim I As Long
Dim byTemp As Byte

'判断fmt范围
If fmt <> UEF_Auto Then
If fmt < [_UEF_Min] Or fmt > [_UEF_Max] Then
GoTo FunEnd
End If
End If

'打开文件
hFile = CreateFile(FileName, GENERIC_READ, FILE_SHARE_READ Or FILE_SHARE_WRITE, ByVal 0&, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, ByVal 0&)
If INVALID_HANDLE_VALUE = hFile Then '文件无法打开
GoTo FunEnd
End If

'判断文件大小
nFileSize = GetFileSize(hFile, nNumRead)
If nNumRead <> 0 Then '超过4GB
GoTo FreeHandle
End If
If nFileSize < 0 Then '超过2GB
GoTo FreeHandle
End If

'判断BOM
dwFirst = 0
Call ReadFile(hFile, dwFirst, 4, nNumRead, ByVal 0&)
cbBOM = UEFCheckBOM(dwFirst, CurFmt)

'恢复文件指针
If fmt = UEF_Auto Then '自动判断
fmt = CurFmt
'cbBOM = cbBOM
Else '手动设置编码
If fmt = CurFmt Then '若编码相同,则忽略BOM标记
'cbBOM = cbBOM
Else '编码不同,那么都是数据
cbBOM = 0
End If
End If
Call SetFilePointer(hFile, cbBOM, ByVal 0&, FILE_BEGIN)
cbTextData = nFileSize - cbBOM

'读取数据
UEFLoadTextFile = ""
Select Case fmt
Case UEF_ANSI, UEF_UTF8
'判断应使用的CodePage
CurCP = IIf(fmt = UEF_UTF8, CP_UTF8, UEFCodePage)

'分配缓冲区
On Error GoTo FreeHandle
ReDim byBuf(0 To cbTextData - 1)
On Error GoTo 0

'读取数据
nNumRead = 0
Call ReadFile(hFile, byBuf(0), cbTextData, nNumRead, ByVal 0&)

'取得Unicode文本长度
cchStr = MultiByteToWideChar(CurCP, 0, byBuf(0), nNumRead, ByVal 0&, ByVal 0&)
If cchStr > 0 Then
'分配字符串空间
On Error GoTo FreeHandle
UEFLoadTextFile = String$(cchStr, 0)
On Error GoTo 0

'取得文本
cchStr = MultiByteToWideChar(CurCP, 0, byBuf(0), nNumRead, ByVal StrPtr(UEFLoadTextFile), cchStr + 1)

End If

Case UEF_UTF16LE
cchStr = (cbTextData + 1) \ 2

'分配字符串空间
On Error GoTo FreeHandle
UEFLoadTextFile = String$(cchStr, 0)
On Error GoTo 0

'取得文本
nNumRead = 0
Call ReadFile(hFile, ByVal StrPtr(UEFLoadTextFile), cbTextData, nNumRead, ByVal 0&)

'修正文本长度
cchStr = (nNumRead + 1) \ 2
If cchStr > 0 Then
If Len(UEFLoadTextFile) > cchStr Then
UEFLoadTextFile = Left$(UEFLoadTextFile, cchStr)
End If
Else
UEFLoadTextFile = ""
End If

Case UEF_UTF16BE
'分配缓冲区
On Error GoTo FreeHandle
ReDim byBuf(0 To cbTextData - 1)
On Error GoTo 0

'读取数据
nNumRead = 0
Call ReadFile(hFile, byBuf(0), cbTextData, nNumRead, ByVal 0&)

If nNumRead > 0 Then
'隔两字节翻转相邻字节
For I = 0 To nNumRead - 1 - 1 Step 2 '再-1是为了避免最后多出的那个字节
byTemp = byBuf(I)
byBuf(I) = byBuf(I + 1)
byBuf(I + 1) = byTemp
Next I

'取得文本
UEFLoadTextFile = byBuf 'VB允许String中的字符串数据与Byte数组直接转换

End If

Case UEF_UTF32LE
UEFLoadTextFile = vbNullString '暂时不支持
Case UEF_UTF32BE
UEFLoadTextFile = vbNullString '暂时不支持
Case Else
Debug.Assert False
End Select

FreeHandle:
'关闭文件
Call CloseHandle(hFile)

FunEnd:
End Function
回复
zyl910 2006-01-23
支持UTF文本文件访问的模块
支持常见的ANSI、UTF-8、UTF-16LE、UTF-16BE这几种编码文本



Option Explicit

'mTextUTF.bas
'模块:UTF文本文件访问
'作者:zyl910
'版本:1.0
'日期:2006-1-23


'== 说明 ===================================================
'支持Unicode编码的文本文件读写。暂时支持ANSI、UTF-8、UTF-16LE、UTF-16BE这几种编码文本


'== 更新记录 ===============================================
'[V1.0] 2006-1-23
'1.支持最常见的ANSI、UTF-8、UTF-16LE、UTF-16BE这几种编码文本



'## 编译预处理常数 #########################################
'== 全局常数 ===============================================
'IncludeAPILib:引用了API库,此时不需要手动写API声明



'## API ####################################################
#If IncludeAPILib = 0 Then
'== File ===================================================
Private Declare Function CreateFile Lib "kernel32" Alias "CreateFileA" (ByVal lpFileName As String, ByVal dwDesiredAccess As Long, ByVal dwShareMode As Long, ByVal lpSecurityAttributes As Long, ByVal dwCreationDisposition As Long, ByVal dwFlagsAndAttributes As Long, ByVal hTemplateFile As Long) As Long
Private Declare Function CloseHandle Lib "kernel32" (ByVal hObject As Long) As Long
Private Declare Function ReadFile Lib "kernel32" (ByVal hFile As Long, lpBuffer As Any, ByVal nNumberOfBytesToRead As Long, lpNumberOfBytesRead As Long, ByVal lpOverlapped As Long) As Long
Private Declare Function WriteFile Lib "kernel32" (ByVal hFile As Long, lpBuffer As Any, ByVal nNumberOfBytesToWrite As Long, lpNumberOfBytesWritten As Long, ByVal lpOverlapped As Long) As Long
Private Declare Function GetFileSize Lib "kernel32" (ByVal hFile As Long, lpFileSizeHigh As Long) As Long
Private Declare Function SetFilePointer Lib "kernel32" (ByVal hFile As Long, ByVal lDistanceToMove As Long, lpDistanceToMoveHigh As Long, ByVal dwMoveMethod As Long) As Long

Private Const INVALID_HANDLE_VALUE = -1

Private Const GENERIC_READ = &H80000000
Private Const GENERIC_WRITE = &H40000000

Private Const FILE_SHARE_READ = &H1
Private Const FILE_SHARE_WRITE = &H2

Private Const CREATE_NEW = 1
Private Const CREATE_ALWAYS = 2
Private Const OPEN_EXISTING = 3
Private Const OPEN_ALWAYS = 4
Private Const TRUNCATE_EXISTING = 5

Private Const FILE_ATTRIBUTE_NORMAL = &H80

Private Const FILE_BEGIN = 0
Private Const FILE_CURRENT = 1
Private Const FILE_END = 2


'== Unicode ================================================

Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByRef lpWideCharStr As Any, ByVal cchWideChar As Long) As Long
Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpWideCharStr As Any, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByRef lpDefaultChar As Any, ByVal lpUsedDefaultChar As Long) As Long

Private Const CP_UTF8 As Long = 65001

#End If



'###########################################################

'Unicode编码格式
Public Enum UnicodeEncodeFormat
UEF_ANSI = 0 'ANSI+DBCS
UEF_UTF8 'UTF-8
UEF_UTF16LE 'UTF-16LE
UEF_UTF16BE 'UTF-16BE
UEF_UTF32LE 'UTF-32LE
UEF_UTF32BE 'UTF-32BE

UEF_Auto = -1 '自动识别编码

'隐藏项目
[_UEF_Min] = UEF_ANSI
[_UEF_Max] = UEF_UTF32BE

End Enum

'ANSI+DBCS方式的文本所使用的代码页。默认为0,表示使用系统当前代码页。可以利用该参数实现读取其他代码编码的文本,比如想在 简体中文平台下 读取 繁体中文平台生成的txt,就将它设为950
Public UEFCodePage As Long

'判断BOM
'返回值:BOM所占字节
'dwFirst:[in]文件最开始的4个字节
'fmt:[out]返回编码类型
Public Function UEFCheckBOM(ByVal dwFirst As Long, ByRef fmt As UnicodeEncodeFormat) As Long
If dwFirst = &HFEFF& Then
fmt = UEF_UTF32LE
UEFCheckBOM = 4
ElseIf dwFirst = &HFFFE0000 Then
fmt = UEF_UTF32BE
UEFCheckBOM = 4
ElseIf (dwFirst And &HFFFF&) = &HFEFF& Then
fmt = UEF_UTF16LE
UEFCheckBOM = 2
ElseIf (dwFirst And &HFFFF&) = &HFFFE& Then
fmt = UEF_UTF16BE
UEFCheckBOM = 2
ElseIf (dwFirst And &HFFFFFF) = &HBFBBEF Then
fmt = UEF_UTF8
UEFCheckBOM = 3
Else
fmt = UEF_ANSI
UEFCheckBOM = 0
End If
End Function

'生成BOM
'返回值:BOM所占字节
'fmt:[in]编码类型
'dwFirst:[out]文件最开始的4个字节
Public Function UEFMakeBOM(ByVal fmt As UnicodeEncodeFormat, ByRef dwFirst As Long) As Long
Select Case fmt
Case UEF_UTF8
dwFirst = &HBFBBEF
UEFMakeBOM = 3
Case UEF_UTF16LE
dwFirst = &HFEFF&
UEFMakeBOM = 2
Case UEF_UTF16BE
dwFirst = &HFFFE&
UEFMakeBOM = 2
Case UEF_UTF32LE
dwFirst = &HFEFF&
UEFMakeBOM = 4
Case UEF_UTF32BE
dwFirst = &HFFFE0000
UEFMakeBOM = 4
Case Else
dwFirst = 0
UEFMakeBOM = 0
End Select
End Function

'判断文本文件的编码类型
'返回值:编码类型。文件无法打开时,返回UEF_Auto
'FileName:文件名
Public Function UEFCheckTextFileFormat(ByVal FileName As String) As UnicodeEncodeFormat
Dim hFile As Long
Dim dwFirst As Long
Dim nNumRead As Long

'打开文件
hFile = CreateFile(FileName, GENERIC_READ, FILE_SHARE_READ Or FILE_SHARE_WRITE, ByVal 0&, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, ByVal 0&)
If INVALID_HANDLE_VALUE = hFile Then '文件无法打开
UEFCheckTextFileFormat = UEF_Auto
Exit Function
End If

'判断BOM
dwFirst = 0
Call ReadFile(hFile, dwFirst, 4, nNumRead, ByVal 0&)
nNumRead = UEFCheckBOM(dwFirst, UEFCheckTextFileFormat)
'Debug.Print nNumRead

'关闭文件
Call CloseHandle(hFile)

End Function
回复
xuxun 2006-01-22
程序的基本思路没错,读出文件的部分也没错,问题出在每行读出后再做字符串操作这一块:如果读出的字符串全都是英文,你的这段代码也没什么问题;如果是中英文混合问题就来了,这里有几个表达式:
a=len("你好")
b=len("abcd")
c=lenb("你好")
d=lenb("abcd")
如果你认为a=b,那问题就出在这里了,实际结果是a=2,b=4,也就是说一个汉字占二个字符位(双字节表示),但c=d且都等于4,这说明它们占的字节数相同。这样问题也同样存在于mid这种字符串截取函数中,这里还有几个表达式:
str1=mid("你好吗",2)
str2=midb("你好吗",2)
str3=mid("abcdef",2)
str4=midb("abcdef",2)
str5=midb("abcdef",3)
对比一下它们的值,再注意一下它们的实际截取位置就可以了,祝你好运了!

回复
zyl910 2006-01-22
以二进制方式打开,判断BOM标记,自己写格式转换程序

对于UTF-8
可以以用MultiByteToWideChar将其转为Unicode格式,使用Windows2000新增代码页65001

对于UTF-16LE
VB的String用的就是UTF-16LE格式,先用字节数组读取文件内容,再直接给字符串变量赋值(sText = byBuf)

对于UTF-16BE
这是大端方式的UTF-16,先还是用字节数组读取文件内容,然后在字节数组中两个两个地交换相邻字节,再直接给字符串变量赋值

对于UTF-32
UTF-32采用的是4字节编码,只能手动转换,幸亏其不多见。
回复
WuYunpeng 2006-01-22
注册表用的Unicode字符集吧!我以前把它导出为文本文件时为Unicode,这时我在读该文件时就会出乱码,通常我会把它转成ANSI字符集的文件
回复
WuYunpeng 2006-01-22
Unicode
ANSI
UTF-8
回复
rainstormmaster 2006-01-22
//那么是否有更好的办法来读入中文字符串呢

用2进制方式(binary)访问文件
回复
只能人为地更改文件来源才能做到吗?

由于这里的目标是由系统注册表导出的REG文件,无法人为地更改其编码.

楼上有朋友说"vb的line input只能读本地编码的文件",那么是否有更好的办法来读入中文字符串呢?

麻烦各位了
回复
IamDeane 2006-01-21
同意楼上的想法啊,看看是不是编码问题啊
回复
pigsanddogs 2006-01-21
vb的line input只能读本地编码的文件.
你的文本文件肯定不是本地编码(比如gb2312), 说不定是unicode编码的。
你在2000下用notepad打开, 另存为一下就行了。
回复
谢.....谢谢暴风雨.....

可能我还是没有说得很明白,抱歉....

读文件并处理的过程是这样的:

打开文本文件--->以行读取方式读入动态数组---->分析字符串并还原操作

现在我的问题不是第三步,而是第一二步里,"读入的内容"本身就不对头.

写入注册表的函数我有,而且可以正常写入中文.

我想得到一个此种问题的原因,因为只知道个解决办法是治标不治本啊~~~到头来我还是什么也没学到,最多就是多收藏一些代码而已.

再次麻烦各位~~~~~抱歉抱歉啦~~~呵呵
回复
rainstormmaster 2006-01-20
注册表的:
'This program needs 3 buttons
Const REG_SZ = 1 ' Unicode nul terminated string
Const REG_BINARY = 3 ' Free form binary
Const HKEY_CURRENT_USER = &H80000001
Private Declare Function RegCloseKey Lib "advapi32.dll" (ByVal hKey As Long) As Long
Private Declare Function RegCreateKey Lib "advapi32.dll" Alias "RegCreateKeyA" (ByVal hKey As Long, ByVal lpSubKey As String, phkResult As Long) As Long
Private Declare Function RegDeleteValue Lib "advapi32.dll" Alias "RegDeleteValueA" (ByVal hKey As Long, ByVal lpValueName As String) As Long
Private Declare Function RegOpenKey Lib "advapi32.dll" Alias "RegOpenKeyA" (ByVal hKey As Long, ByVal lpSubKey As String, phkResult As Long) As Long
Private Declare Function RegQueryValueEx Lib "advapi32.dll" Alias "RegQueryValueExA" (ByVal hKey As Long, ByVal lpValueName As String, ByVal lpReserved As Long, lpType As Long, lpData As Any, lpcbData As Long) As Long
Private Declare Function RegSetValueEx Lib "advapi32.dll" Alias "RegSetValueExA" (ByVal hKey As Long, ByVal lpValueName As String, ByVal Reserved As Long, ByVal dwType As Long, lpData As Any, ByVal cbData As Long) As Long
Function RegQueryStringValue(ByVal hKey As Long, ByVal strValueName As String) As String
Dim lResult As Long, lValueType As Long, strBuf As String, lDataBufSize As Long
'retrieve nformation about the key
lResult = RegQueryValueEx(hKey, strValueName, 0, lValueType, ByVal 0, lDataBufSize)
If lResult = 0 Then
If lValueType = REG_SZ Then
'Create a buffer
strBuf = String(lDataBufSize, Chr$(0))
'retrieve the key's content
lResult = RegQueryValueEx(hKey, strValueName, 0, 0, ByVal strBuf, lDataBufSize)
If lResult = 0 Then
'Remove the unnecessary chr$(0)'s
RegQueryStringValue = Left$(strBuf, InStr(1, strBuf, Chr$(0)) - 1)
End If
ElseIf lValueType = REG_BINARY Then
Dim strData As Integer
'retrieve the key's value
lResult = RegQueryValueEx(hKey, strValueName, 0, 0, strData, lDataBufSize)
If lResult = 0 Then
RegQueryStringValue = strData
End If
End If
End If
End Function
Function GetString(hKey As Long, strPath As String, strValue As String)
Dim Ret
'Open the key
RegOpenKey hKey, strPath, Ret
'Get the key's content
GetString = RegQueryStringValue(Ret, strValue)
'Close the key
RegCloseKey Ret
End Function

调用的时候用GetString这个函数即可
回复
发动态
发帖子
VB基础类
创建于2007-09-28

7453

社区成员

VB 基础类
申请成为版主
社区公告
暂无公告