28,391
社区成员
发帖
与我相关
我的任务
分享
'GB转UTF8
Function toUTF8(szInput)
Dim wch, uch, szRet
Dim x
Dim nAsc, nAsc2, nAsc3
'如果输入参数为空,则退出函数
If szInput = "" Then
toUTF8 = szInput
Exit Function
End If
'开始转换
For x = 1 To Len(szInput)
'利用mid函数分拆GB编码文字
wch = Mid(szInput, x, 1)
'利用ascW函数返回每一个GB编码文字的Unicode字符代码
'注:asc函数返回的是ANSI 字符代码,注意区别
nAsc = AscW(wch)
If nAsc < 0 Then nAsc = nAsc + 65536
If (nAsc And &HFF80) = 0 Then
szRet = szRet & wch
Else
If (nAsc And &HF000) = 0 Then
uch = "%" & Hex(((nAsc \ 2 ^ 6)) Or &HC0) & Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
Else
'GB编码文字的Unicode字符代码在0800 - FFFF之间采用三字节模版
uch = "%" & Hex((nAsc \ 2 ^ 12) Or &HE0) & "%" & _
Hex((nAsc \ 2 ^ 6) And &H3F Or &H80) & "%" & _
Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
End If
End If
Next
toUTF8 = szRet
End Function
' 借助 ADODB.Stream
Function GetBytes(str, cSet)
With CreateObject("ADODB.Stream")
.Mode = 3
.Type = 2
.Open
.CharSet = cSet
.WriteText(str)
.Position = 0
.Type = 1
GetBytes = .Read(-1)
.Close
End With
End Function
Function StringFromBytes(bytes, cSet)
With CreateObject("ADODB.Stream")
.Mode = 3
.Type = 1
.Open
.Write(bytes)
.Position = 0
.Type = 2
.CharSet = cSet
StringFromBytes = .ReadText(-1)
.Close
End With
End Function
'------------------------------------
Dim s, bytes
s = "文字"
bytes = GetBytes(s, "GBK") ' 取 字节数组
s = StringFromBytes(bytes, "GBK") ' 构造回字符串
MsgBox s
s = unEscape("%E6%96%87%E5%AD%97")
' 按 iso-8859-1 拆分为 byte 数组, 再按 utf-8 重构
bytes = GetBytes(s, "iso-8859-1")
s = StringFromBytes(bytes, "UTF-8")
MsgBox s