8,303
社区成员
发帖
与我相关
我的任务
分享
#include "stdafx.h"
#include "windows.h"
#include <string>
#include <fstream>
#include <iostream>
using namespace std;
//从Unicode到UTF8
string ToUTF8(wstring str)
{
char* buff;
int buffersize=WideCharToMultiByte(CP_UTF8,0,str.c_str(),(int)str.length(),0,0,0,0); //获取需要的输出缓冲区长度
buff=new char[buffersize+1];
WideCharToMultiByte(CP_UTF8,0,str.c_str(),(int)str.length(),buff,buffersize+1,0,0);
buff[buffersize]=0;
string utf=buff;
delete []buff;
return(utf);
}
//从Unicode到Ansi
string ToString(wstring str)
{
char *buff=new char[str.length()+1];
int i=WideCharToMultiByte(CP_ACP,0,str.c_str(),str.length(),buff,str.length()+1,0,0);
buff[i]=0;
string text=buff;
delete [] buff;
return(text);
}
//从UTF8到Unicode
wstring FromUTF8(string utf)
{
wchar_t *buff=new wchar_t[utf.length()+1];
int i=MultiByteToWideChar(CP_UTF8, 0, utf.c_str(),-1, buff, (int)utf.length());
buff[i+1]=0;
wstring str=buff;
delete [] buff;
return(str);
}
//从Ansi到Unicode
wstring ToString(string str)
{
wchar_t *buff=new wchar_t[str.length()+1];
MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, buff, str.length()+1);
wstring text=buff;
delete [] buff;
return(text);
}
void main()
{
string asc="abcd这是一个ASCII字符串";
wstring unicode=ToString(asc); //toString有两个重载,分别是Ascii to Unicode 和Unicode to Ascii
string utf8=ToUTF8(unicode);
wstring utf16=FromUTF8(utf8);
ofstream ofs("D:\\01.txt");
ofs<<utf8.c_str()<<'\n';
ofs.close();
system("pause");
}
template<typename InputIterator, typename OutputIterator>
void encode_wchar(InputIterator iter, OutputIterator &dest)
{
if(*iter <= 0x007F)
{
*dest=(char)*iter;
++dest;
}
else if(*iter <= 0x07FF)
{
*dest = (char)(
0xC0 |
((*iter & 0x07C0) >> 6)
);
++dest;
*dest = (char)(
0x80 |
(*iter & 0x003F)
);
++dest;
}
else if(*iter <= 0xFFFF)
{
*dest = (char)(
0xE0 |
((*iter & 0xF000) >> 12)
);
++dest;
*dest = (char)(
0x80 |
((*iter & 0x0FC0) >> 6)
);
++dest;
*dest = (char)(
0x80 |
(*iter & 0x003F)
);
++dest;
}
}
template<typename InputIterator, typename OutputIterator>
OutputIterator wchar_utf8(InputIterator first, InputIterator last, OutputIterator dest)
{
for(; first!=last; ++first)
encode_wchar(first, dest);
return dest;
}
template<typename InputIterator>
wchar_t decode_utf8(InputIterator &iter, InputIterator last)
{
wchar_t ret;
if (((*iter) & 0x80) == 0) // one byte
{
ret = *iter++;
}
else if (((*iter) & 0xe0) == 0xc0) // two bytes
{
wchar_t byte1 = (*iter++) & 0x1f;
wchar_t byte2 = decode_utf8_mb(iter, last);
ret = (byte1 << 6) | byte2;
}
else if (((*iter) & 0xf0) == 0xe0) // three bytes
{
wchar_t byte1 = (*iter++) & 0x0f;
wchar_t byte2 = decode_utf8_mb(iter, last);
wchar_t byte3 = decode_utf8_mb(iter, last);
ret = (byte1 << 12) | (byte2 << 6) | byte3;
}
// TODO: support surrogate pairs
else throw std::runtime_error("UTF-8 not convertable to UTF-16");
return ret;
}
template<typename InputIterator, typename OutputIterator>
OutputIterator utf8_wchar(InputIterator first, InputIterator last, OutputIterator dest)
{
for(; first!=last; ++dest)
*dest = decode_utf8(first, last);
return dest;
}