怎么将文本文件从Utf-8转换为Unicode(高难)

smx717616 2005-03-17 04:17:53
如题.
...全文
367 8 打赏 收藏 转发到动态 举报
写回复
用AI写文章
8 条回复
切换为时间正序
请发表友善的回复…
发表回复
viena 2005-03-22
  • 打赏
  • 举报
回复
哦,错了,新建的文件E:\a_unicode.txt是Ansi格式的,不是Unicode
VB中是Unicode,但写文件时自动转换为Ansi了

如果一定要Unicode格式文件,可以用二进制读写,
要注意文件最前面的两个字节是格式标志

viena 2005-03-22
  • 打赏
  • 举报
回复
//还是不行,一楼的只是变换,但是现在想要把它存为:
//文本类型的 Unicode 格式,该怎么存呢?

把那一段代码放在一个Module_UTF8的模块,并添加一个Public变量m_bIsNt,标志系统是NT(2000)还是98,E:盘放一个a_utf8.txt,UTF8格式文件

Private Sub Command1_Click()
Dim str_UTF8 As String
Dim str_Unicode As String
Open "E:\a_utf8.txt" For Input As 1
str_UTF8 = StrConv(InputB$(LOF(1), 1), vbUnicode)'读到字符串
str_UTF8 = Right(str_UTF8, Len(str_UTF8) - 1) '第一个字符是格式标志,无实际意义,去掉
Close 1
Module_UTF8.m_bIsNt = True
str_Unicode = UTF8_Decode(str_UTF8)'转换为Unicode
str_Unicode = str_Unicode & "-添了几个字"'修改
Text1.Text = str_Unicode '显示到文本框

'保存到Unicode格式文件
Open "E:\a_unicode.txt" For Output As #1
Print #1, str_Unicode
Close #1
End Sub
幻影时空 2005-03-22
  • 打赏
  • 举报
回复
我有一点不明白的..呵呵
有些软件它用"**** Unicode版"
???
真没明白.在简/繁里都能自动识别变转化文字?哈哈,真不知道什么.楼下的帮我解释一下!
smx717616 2005-03-22
  • 打赏
  • 举报
回复
还是不行,一楼的只是变换,但是现在想要把它存为:

文本类型的 Unicode 格式,该怎么存呢?
smx717616 2005-03-22
  • 打赏
  • 举报
回复
搞定了,谢谢,结帐。
smx717616 2005-03-21
  • 打赏
  • 举报
回复
谢谢一楼的,这两天没开,看看,好用立即给分。
东方冉 2005-03-17
  • 打赏
  • 举报
回复
学习
viena 2005-03-17
  • 打赏
  • 举报
回复
[转贴]

老外的转换代码

Option Explicit

Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)

Public Const CP_UTF8 = 65001

'Purpose:Convert Utf8 to Unicode
Public Function UTF8_Decode(ByVal sUTF8 As String) As String

Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim n As Long

If LenB(sUTF8) = 0 Then Exit Function

If m_bIsNt Then
On Error GoTo EndFunction
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
lngUtf8Size = UBound(bytUtf8) + 1
On Error GoTo 0
'Set buffer for longest possible string i.e. each byte is
'ANSI, thus 1 unicode(2 bytes)for every utf-8 character.
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
'Translate using code page 65001(UTF-8)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
'Trim result to actual length
If lngResult Then
UTF8_Decode = Left$(strBuffer, lngResult)
End If
Else
Dim i As Long
Dim TopIndex As Long
Dim TwoBytes(1) As Byte
Dim ThreeBytes(2) As Byte
Dim AByte As Byte
Dim TStr As String
Dim BArray() As Byte

'Resume on error in case someone inputs text with accents
'that should have been encoded as UTF-8
On Error Resume Next

TopIndex = Len(sUTF8) ' Number of bytes equal TopIndex+1
If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert
BArray = StrConv(sUTF8, vbFromUnicode)
i = 0 ' Initialise pointer
TopIndex = TopIndex - 1
' Iterate through the Byte Array
Do While i <= TopIndex
AByte = BArray(i)
If AByte < &H80 Then
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
ElseIf AByte >= &HE0 Then 'was = &HE1 Then
' Start of 3 byte UTF-8 group for a character
' Copy 3 byte to ThreeBytes
ThreeBytes(0) = BArray(i): i = i + 1
ThreeBytes(1) = BArray(i): i = i + 1
ThreeBytes(2) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F))
ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then
' Start of 2 byte UTF-8 group for a character
TwoBytes(0) = BArray(i): i = i + 1
TwoBytes(1) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F))
Else
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
End If
Loop
UTF8_Decode = TStr ' Return the resultant string
Erase BArray
End If

EndFunction:

End Function

'Purpose:Convert Unicode string to UTF-8.
Public Function UTF8_Encode(ByVal strUnicode As String, Optional ByVal bHTML As Boolean = True) As String
Dim i As Long
Dim TLen As Long
Dim lPtr As Long
Dim UTF16 As Long
Dim UTF8_EncodeLong As String

TLen = Len(strUnicode)
If TLen = 0 Then Exit Function

If m_bIsNt Then
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
'Set buffer for longest possible string.
lngBufferSize = TLen * 3 + 1
ReDim bytUtf8(lngBufferSize - 1)
'Translate using code page 65001(UTF-8).
lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), _
TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)
'Trim result to actual length.
If lngResult Then
lngResult = lngResult - 1
ReDim Preserve bytUtf8(lngResult)
'CopyMemory StrPtr(UTF8_Encode), bytUtf8(0&), lngResult
UTF8_Encode = StrConv(bytUtf8, vbUnicode)
' For i = 0 To lngResult
' UTF8_Encode = UTF8_Encode & Chr$(bytUtf8(i))
' Next
End If
Else
For i = 1 To TLen
' Get UTF-16 value of Unicode character
lPtr = StrPtr(strUnicode) + ((i - 1) * 2)
CopyMemory UTF16, ByVal lPtr, 2
'Convert to UTF-8
If UTF16 < &H80 Then ' 1 UTF-8 byte
UTF8_EncodeLong = Chr$(UTF16)
ElseIf UTF16 < &H800 Then ' 2 UTF-8 bytes
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&HC0 + (UTF16 And &H1F)) & UTF8_EncodeLong ' Use 5 remaining bits
Else ' 3 UTF-8 bytes
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) & UTF8_EncodeLong ' Use next 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&HE0 + (UTF16 And &HF)) & UTF8_EncodeLong ' Use 4 remaining bits
End If
UTF8_Encode = UTF8_Encode & UTF8_EncodeLong
Next
End If

'Substitute vbCrLf with HTML line breaks if requested.
If bHTML Then
UTF8_Encode = Replace$(UTF8_Encode, vbCrLf, "<br/>")
End If

End Function

7,763

社区成员

发帖
与我相关
我的任务
社区描述
VB 基础类
社区管理员
  • VB基础类社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧