utf-8编码问题

outrace 2006-09-20 12:23:42
使用VB6
如何将一个字符串(既有字母也有汉字)转成UTF-8编码格式啊?
...全文
370 8 打赏 收藏 转发到动态 举报
写回复
用AI写文章
8 条回复
切换为时间正序
请发表友善的回复…
发表回复
Hassle 2006-09-20
  • 打赏
  • 举报
回复
Option Explicit

Private Const CP_ACP = 0 ' default to ANSI code page

Private Const CP_UTF8 = 65001 ' default to UTF-8 code page

Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long

Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long

Private Function EncodeToBytes(ByVal sData As String) As Byte() ' Note: Len(sData) > 0
Dim aRetn() As Byte
Dim nSize As Long

nSize = WideCharToMultiByte(CP_UTF8, 0, StrPtr(sData), -1, 0, 0, 0, 0)
ReDim aRetn(0 To nSize - 1) As Byte
WideCharToMultiByte CP_UTF8, 0, StrPtr(sData), -1, VarPtr(aRetn(0)), nSize, 0, 0

EncodeToString = aRetn
End Function

Private Function EncodeToString(ByVal sData As String) As String ' Note: Len(sData) > 0
Dim sRetn As String
Dim aData() As Byte
Dim nSize As Long
Dim sChar As String
Dim i As Long

nSize = WideCharToMultiByte(CP_ACP, 0, StrPtr(sData), -1, 0, 0, 0, 0)
ReDim aData(0 To nSize - 1) As Byte
WideCharToMultiByte CP_ACP, 0, StrPtr(sData), -1, VarPtr(aData(0)), nSize, 0, 0
sRetn = ""
For i = 0 To UBound(aData) - 1
sChar = Hex(aData(i))
If Len(sChar) = 1 Then sChar = 0 & sChar
sRetn = sRetn & "%" & sChar
Next

EncodeToString = sRetn
End Function
Hassle 2006-09-20
  • 打赏
  • 举报
回复
Option Explicit

Private Const CP_ACP = 0 ' default to ANSI code page

Private Const CP_UTF8 = 65001 ' default to UTF-8 code page

Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long

Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long

Private Function EncodeToBytes(ByVal sData As String) As Byte() ' Note: Len(sData) > 0
Dim aRetn() As Byte
Dim nSize As Long

nSize = WideCharToMultiByte(CP_UTF8, 0, StrPtr(sData), -1, 0, 0, 0, 0)
ReDim aRetn(0 To nSize - 1) As Byte
WideCharToMultiByte CP_UTF8, 0, StrPtr(sData), -1, VarPtr(aRetn(0)), nSize, 0, 0

EncodeToBytes = aRetn
End Function

Private Function DecodeToBytes(ByVal sData As String) As Byte() ' Note: Len(sData) > 0
Dim aRetn() As Byte
Dim nSize As Long

nSize = MultiByteToWideChar(CP_UTF8, 0, StrPtr(sData), -1, 0, 0)
ReDim aRetn(0 To 2 * nSize - 1) As Byte
MultiByteToWideChar CP_UTF8, 0, StrPtr(sData), -1, VarPtr(aRetn(0)), nSize

DecodeToBytes = aRetn
End Function

Private Sub Command1_Click()
Dim s As String
s = StrConv(EncodeToBytes("aa中文aa"), vbUnicode)
MsgBox s
s = DecodeToBytes(StrConv(s, vbFromUnicode))
MsgBox s
End Sub
happy_sea 2006-09-20
  • 打赏
  • 举报
回复
呵呵,用一楼的代码好了
outrace 2006-09-20
  • 打赏
  • 举报
回复
经过测试:
Hassle()与zq972(热)→(大·汗·天·子) 的都是可以的
怪我没有使用 StrConv(EncodeToBytes("aa中文aa"), vbUnicode)进行转换

谢谢。
顺便问一下。

如果我读取的信息是"aa涓枃aa"
如何将他转换成"aa中文aa"

谢谢
Hassle 2006-09-20
  • 打赏
  • 举报
回复
呵呵,Copy的时候忘记改了

Private Sub Command1_Click()
Dim s As String
s = StrConv(EncodeToBytes("aa中文aa"), vbUnicode)
MsgBox s
End Sub
outrace 2006-09-20
  • 打赏
  • 举报
回复
是这样的。
要求将字符串
"aa中文aa" 转成 "aa涓枃aa"

java代码如下:
String str = "aa中文aa";
System.out.print(new String(str.getBytes("UTF-8")));

我在VB如何作?
另外感谢楼上的。但是你写的第一个函数EncodeToBytes就有错误
EncodeToString = aRetn
应该修改为:
EncodeToBytes = aRetn

而且我没法用你这个函数得到我要的结果。
zq972 2006-09-20
  • 打赏
  • 举报
回复
Public Function strToUTF8(ByVal str As String) As Byte()
Dim I As Integer
Dim zAsc As Long 'Ascii码暂存
Dim L As Long '字节计数
Dim dat2() As Byte, dat3() As Byte
Dim zz As String
''''ReDim dat2(2) As Byte
''''dat2(0) = &HEF: dat2(1) = &HBB: dat2(2) = &HBF

For I = 1 To Len(str)
zz = Mid(str, I, 1): zAsc = Asc(zz)
If zAsc > 0 Then '如果不是汉字
ReDim Preserve dat2(L + 1) As Byte
dat2(L) = zAsc: L = L + 1
Else
ReDim Preserve dat2(L + 3) As Byte
dat3 = zz
dat2(L) = (dat3(1) And 240) / 16 Or 224
dat2(L + 1) = (dat3(1) And 15) * 4 + ((dat3(0) And 192) / 64) Or 128
dat2(L + 2) = dat3(0) And 63 Or 128
L = L + 3
End If
Next
strToUTF8 = dat2

End Function
happy_sea 2006-09-20
  • 打赏
  • 举报
回复
Private Sub Command1_Click()
MsgBox USC2UTF8("test试验")
End Sub

Private Function USC2UTF8(ByVal HZ As String) As String '汉字换为UTF-8
Dim i As Integer
Dim str_Char As String
Dim DAT(2) As Byte '存放UTF-8数据
Dim DAT1() As Byte '存放原始字节数据,1汉字需要4个数租元素
USC2UTF8 = vbNullString
For i = 1 To Len(HZ)
str_Char = Mid(HZ, i, 1) '判断是不是汉字
If AscW(str_Char) > &H0 And AscW(str_Char) < &H800 Then
USC2UTF8 = USC2UTF8 & str_Char
Else '按照 FFFF FFFF转换为二进制的 1110xxxx 10xxxxxx 10xxxxxx’高位低位也要互换
ReDim DAT1(1) As Byte
DAT1 = str_Char 'DAT1变成两个元素的数租
DAT(0) = (DAT1(1) And 240) / 16 Or 224 '将第一个字节取前4位进行 1110+
DAT(1) = (DAT1(1) And 15) * 4 + ((DAT1(0) And 192) / 64) Or 128 '将第1个字节后四位进行 10+,连接第2字节前两位
DAT(2) = DAT1(0) And 63 Or 128 '10连接 第2位后两位连接和第三位
USC2UTF8 = USC2UTF8 & CStr(Hex(DAT(0))) + CStr(Hex(DAT(1))) + CStr(Hex(DAT(2)))
End If
Next
End Function

7,763

社区成员

发帖
与我相关
我的任务
社区描述
VB 基础类
社区管理员
  • VB基础类社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧