谁能提供UTF-8字符串与标准字符串的转换函数,标准C最好。这100分就归你了!

Borlandor 2002-05-20 04:07:40
char* UTF8ToANSI(wchar_t* s);
wchar_t* ANSIToUTF8(char* s);
...全文
50 4 打赏 收藏 转发到动态 举报
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
Borlandor 2002-05-21
  • 打赏
  • 举报
回复
I have got them,thank you very much!

Bless you!
kingbird 2002-05-20
  • 打赏
  • 举报
回复
转换码表已发出,注意查收
有关utf8的编吗规则你可以查阅RFC2044(http://www.faqs.org/rfcs/rfc2044.html).
Borlandor 2002-05-20
  • 打赏
  • 举报
回复
谢谢,需要你的转换码表。我给你发信去!
kingbird 2002-05-20
  • 打赏
  • 举报
回复
核心代码如下,有四个文件:utf.h, utf.c, gbtou8.c, u8togb.c
自己花点时间改下吧。
另外还需要两个转换码表,想要的话,发MAIL到:stevenxyj@163.net

/* =====FILE: utf.h===== */
#include <stdio.h>

typedef long Char;

/* The UTF-FSS (aka UTF-2) encoding of ISO-10646/Unicode */

Char utf_getc(
#ifdef __STDC__
FILE *f
#endif
);
Char utf_putc(
#ifdef __STDC__
Char c, FILE *f
#endif
);

#define utf_getchar() utf_getc(stdin)
#define utf_putchar(wc) utf_putc((wc),stdout)


/* =====FILE: utf.C===== */
#include "utf.h"

#define reg register

/*
* The UTF-FSS (aka UTF-2) encoding of UCS, as described in the following
* quote from Ken Thompson's utf-fss.c:
*
* Bits Hex Min Hex Max Byte Sequence in Binary
* 7 00000000 0000007f 0vvvvvvv
* 11 00000080 000007FF 110vvvvv 10vvvvvv
* 16 00000800 0000FFFF 1110vvvv 10vvvvvv 10vvvvvv
* 21 00010000 001FFFFF 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
* 26 00200000 03FFFFFF 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
* 31 04000000 7FFFFFFF 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
*
* The UCS value is just the concatenation of the v bits in the multibyte
* encoding. When there are multiple ways to encode a value, for example
* UCS 0, only the shortest encoding is legal.
*/

/* This implementation does not enforce the last restriction on input */

#define CODE_ERROR 0x80

Char
utf_getc(f)
reg FILE *f;
{
reg int c;
reg Char wc;
reg int extras;
reg unsigned bit;

if ((c = getc(f)) == EOF)
return EOF;
if ((c & 0x80) == 0) /* ASCII character */
return c;
if ((c & 0xc0) == 0x80) /* unexpected tail character */
return CODE_ERROR;
/* how many extra bytes? */
extras = 1;
for (bit = 0x20; (c & bit) != 0; bit >>= 1)
extras++;
if (extras > 5)
return CODE_ERROR;
/* put all the bits together */
wc = c & (bit-1);
while (extras-- > 0) {
if ((c = getc(f)) == EOF)
return EOF;
if ((c & 0xc0) != 0x80) { /* unexpected head character */
ungetc(c, f);
return CODE_ERROR;
}
wc = (wc<<6) | c&0x3f;
}
return wc;
}

Char
utf_putc(wc, f)
Char wc;
FILE *f;
{
reg Char tmp;
reg int extras;

wc &= 0x7fffffffL;
if ((wc & 0x7f) == wc) /* ASCII character */
return putc((int)wc, f) == EOF ? EOF : wc;
/* how many extra bytes are required? */
extras = 1;
for (tmp = wc >> 11; tmp != 0; tmp >>= 5)
extras++;
/* put header byte */
if (putc(0xff&(0x1f80 >> extras) | (int)(wc >> (extras*6)), f) == EOF)
return EOF;
/* put tail bytes */
while (extras-- != 0)
if (putc(0x80|0x3f&(int)(wc >> (extras*6)), f) == EOF)
return EOF;
return wc;
}


/* =====FILE: gbtou8===== */
#include "utf.h"

extern unsigned short gb_in[];

int
main()
{
int c1, c2;

while ((c1 = getchar()) != EOF)
if ((c1&0x80) == 0)
utf_putc(c1, stdout);
else {
if ((c2 = getchar()) == EOF)
break;
utf_putc(gb_in[(c1 - 0xa1)*94 + (c2&0x7f) - 0x21],
stdout);
}
return 0;
}


/* =====FILE: u8togb===== */

#include "utf.h"

extern unsigned short gb_out[];

int
main()
{
long c;
unsigned int gb;

while ((c = utf_getc(stdin)) != EOF)
if (c < 0x80)
putchar((int)c);
else {
gb = gb_out[c];
putchar(gb >> 8);
putchar(gb & 0xff);
}
return 0;
}

69,371

社区成员

发帖
与我相关
我的任务
社区描述
C语言相关问题讨论
社区管理员
  • C语言
  • 花神庙码农
  • 架构师李肯
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧