64,647
社区成员
发帖
与我相关
我的任务
分享
#include <stdio.h>
#include <windows.h>
wchar_t* AnsiToUnicode(const char* buf)
{
int len = ::MultiByteToWideChar(CP_ACP, 0, buf, -1, NULL, 0);
if (len == 0) return L"";
wchar_t* wch=new wchar_t[len];
memset(wch,0,len);
::MultiByteToWideChar(CP_ACP, 0, buf, -1, wch, len);
return wch;
}
char* UnicodeToUtf8(const wchar_t* buf)
{
int len = ::WideCharToMultiByte(CP_UTF8, 0, buf, -1, NULL, 0, NULL, NULL);
if (len == 0) return "";
char* utf8=new char[len];
memset(utf8,0,len);
::WideCharToMultiByte(CP_UTF8, 0, buf, -1, utf8, len, NULL, NULL);
return utf8;
}
void main()
{
char str[]="周华健";
wchar_t *uni=AnsiToUnicode(str);
char* utf8=UnicodeToUtf8(uni);
for(int i=0;i<strlen(utf8);i++)
printf("%X ",(unsigned char)utf8[i]);
printf("\n");
delete uni;
delete utf8;
}
wstring UTF2Uni(const char* src, std::wstring &t)
{
if (src == NULL)
{
return L"";
}
int size_s = strlen(src);
int size_d = size_s + 10; //?
wchar_t *des = new wchar_t[size_d];
memset(des, 0, size_d * sizeof(wchar_t));
int s = 0, d = 0;
bool toomuchbyte = true; //set true to skip error prefix.
while (s < size_s && d < size_d)
{
unsigned char c = src[s];
if ((c & 0x80) == 0)
{
des[d++] += src[s++];
}
else if((c & 0xE0) == 0xC0) ///< 110x-xxxx 10xx-xxxx
{
WCHAR &wideChar = des[d++];
wideChar = (src[s + 0] & 0x3F) << 6;
wideChar |= (src[s + 1] & 0x3F);
s += 2;
}
else if((c & 0xF0) == 0xE0) ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
{
WCHAR &wideChar = des[d++];
wideChar = (src[s + 0] & 0x1F) << 12;
wideChar |= (src[s + 1] & 0x3F) << 6;
wideChar |= (src[s + 2] & 0x3F);
s += 3;
}
else if((c & 0xF8) == 0xF0) ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
{
WCHAR &wideChar = des[d++];
wideChar = (src[s + 0] & 0x0F) << 18;
wideChar = (src[s + 1] & 0x3F) << 12;
wideChar |= (src[s + 2] & 0x3F) << 6;
wideChar |= (src[s + 3] & 0x3F);
s += 4;
}
else
{
WCHAR &wideChar = des[d++]; ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
wideChar = (src[s + 0] & 0x07) << 24;
wideChar = (src[s + 1] & 0x3F) << 18;
wideChar = (src[s + 2] & 0x3F) << 12;
wideChar |= (src[s + 3] & 0x3F) << 6;
wideChar |= (src[s + 4] & 0x3F);
s += 5;
}
}
t = des;
delete[] des;
des = NULL;
return t;
}
int Uni2UTF( const wstring& strRes, char *utf8, int nMaxSize )
{
if (utf8 == NULL) {
return -1;
}
int len = 0;
int size_d = nMaxSize;
for (wstring::const_iterator it = strRes.begin(); it != strRes.end(); ++it)
{
wchar_t wchar = *it;
if (wchar < 0x80)
{ //
//length = 1;
utf8[len++] = (char)wchar;
}
else if(wchar < 0x800)
{
//length = 2;
if (len + 1 >= size_d)
return -1;
utf8[len++] = 0xc0 | ( wchar >> 6 );
utf8[len++] = 0x80 | ( wchar & 0x3f );
}
else if(wchar < 0x10000 )
{
//length = 3;
if (len + 2 >= size_d)
return -1;
utf8[len++] = 0xe0 | ( wchar >> 12 );
utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
utf8[len++] = 0x80 | ( wchar & 0x3f );
}
else if( wchar < 0x200000 )
{
//length = 4;
if (len + 3 >= size_d)
return -1;
utf8[len++] = 0xf0 | ( (int)wchar >> 18 );
utf8[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
utf8[len++] = 0x80 | ( wchar & 0x3f );
}
}
return len;
}
public static void main(String[] args) {
String a;
try {
a = URLEncoder.encode("周华健", "GB2312");
System.out.println(a); //%D6%DC%BB%AA%BD%A1
a = URLEncoder.encode("周华健", "UTF8");
System.out.println(a); //%E5%91%A8%E5%8D%8E%E5%81%A5
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}