dingsongtao大哥,刚才那问题 续

woshimajia0005 2010-07-01 01:51:41
http://topic.csdn.net/u/20100701/11/743c2632-aa16-4151-868e-30be67c9cc3e.html

我用你刚才的代码(如下),能正确转换中文到这种编码。:

<?php

$str = '上海';
$ss = new StrFormat_Ascii();
echo $ss->encode($str);

class StrFormat_Ascii{
static function encode($var){
$ascii = '';
$strlen_var = strlen($var);
for ($c = 0; $c < $strlen_var; ++$c) {

$ord_var_c = ord($var{$c});

switch (true) {
case $ord_var_c == 0x08:
$ascii .= '\b';
break;
case $ord_var_c == 0x09:
$ascii .= '\t';
break;
case $ord_var_c == 0x0A:
$ascii .= '\n';
break;
case $ord_var_c == 0x0C:
$ascii .= '\f';
break;
case $ord_var_c == 0x0D:
$ascii .= '\r';
break;

case $ord_var_c == 0x22:
case $ord_var_c == 0x2F:
case $ord_var_c == 0x5C:
$ascii .= '\\'.$var{$c};
break;

case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
$ascii .= $var{$c};
break;

case (($ord_var_c & 0xE0) == 0xC0):
$char = pack('C*', $ord_var_c, ord($var{$c + 1}));
$c += 1;
$utf16 = self::utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xF0) == 0xE0):
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}));
$c += 2;
$utf16 = self::utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xF8) == 0xF0):
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}));
$c += 3;
$utf16 = self::utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xFC) == 0xF8):
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}));
$c += 4;
$utf16 = self::utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;

case (($ord_var_c & 0xFE) == 0xFC):
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}),
ord($var{$c + 5}));
$c += 5;
$utf16 = self::utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
}
}

return $ascii;
}

static function decode($var){
$chrs = $var;
$utf8 = '';
$strlen_chrs = strlen($chrs);

for ($c = 0; $c < $strlen_chrs; $c++) {

$substr_chrs_c_2 = substr($chrs, $c, 2);
$ord_chrs_c = ord($chrs{$c});

switch (true) {
case $substr_chrs_c_2 == '\b':
$utf8 .= chr(0x08);
++$c;
break;
case $substr_chrs_c_2 == '\t':
$utf8 .= chr(0x09);
++$c;
break;
case $substr_chrs_c_2 == '\n':
$utf8 .= chr(0x0A);
++$c;
break;
case $substr_chrs_c_2 == '\f':
$utf8 .= chr(0x0C);
++$c;
break;
case $substr_chrs_c_2 == '\r':
$utf8 .= chr(0x0D);
++$c;
break;

case $substr_chrs_c_2 == '\\"':
case $substr_chrs_c_2 == '\\\'':
case $substr_chrs_c_2 == '\\\\':
case $substr_chrs_c_2 == '\\/':
if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
($delim == "'" && $substr_chrs_c_2 != '\\"')) {
$utf8 .= $chrs{++$c};
}
break;

case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
$utf16 = chr(hexdec(substr($chrs, ($c + 2), 2)))
. chr(hexdec(substr($chrs, ($c + 4), 2)));
$utf8 .= self::utf162utf8($utf16);
$c += 5;
break;

case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
$utf8 .= $chrs{$c};
break;

case ($ord_chrs_c & 0xE0) == 0xC0:
$utf8 .= substr($chrs, $c, 2);
++$c;
break;

case ($ord_chrs_c & 0xF0) == 0xE0:
$utf8 .= substr($chrs, $c, 3);
$c += 2;
break;

case ($ord_chrs_c & 0xF8) == 0xF0:
$utf8 .= substr($chrs, $c, 4);
$c += 3;
break;

case ($ord_chrs_c & 0xFC) == 0xF8:
$utf8 .= substr($chrs, $c, 5);
$c += 4;
break;

case ($ord_chrs_c & 0xFE) == 0xFC:
$utf8 .= substr($chrs, $c, 6);
$c += 5;
break;

}

}

return $utf8;
}

static function utf82utf16($utf8){
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
}

switch(strlen($utf8)) {
case 1:
return $utf8;

case 2:
return chr(0x07 & (ord($utf8{0}) >> 2))
. chr((0xC0 & (ord($utf8{0}) << 6))
| (0x3F & ord($utf8{1})));

case 3:
return chr((0xF0 & (ord($utf8{0}) << 4))
| (0x0F & (ord($utf8{1}) >> 2)))
. chr((0xC0 & (ord($utf8{1}) << 6))
| (0x7F & ord($utf8{2})));
}
return '';
}

static function utf162utf8($utf16){
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
}
$bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
switch(true) {
case ((0x7F & $bytes) == $bytes):
return chr(0x7F & $bytes);

case (0x07FF & $bytes) == $bytes:
return chr(0xC0 | (($bytes >> 6) & 0x1F))
. chr(0x80 | ($bytes & 0x3F));

case (0xFFFF & $bytes) == $bytes:
return chr(0xE0 | (($bytes >> 12) & 0x0F))
. chr(0x80 | (($bytes >> 6) & 0x3F))
. chr(0x80 | ($bytes & 0x3F));
}
return '';
}
}
?>




被转换的中文字符串,直接在该程序文件中直接定义的(如上面的:

$str = '上海';
$ss = new StrFormat_Ascii();
echo $ss->encode($str);
(这样直接在程序中指定中文字符串内容时,能正确转换...)


但如果被转换的字符串是通过 GET 方式传递获取的,例如下:

$str = $_GET['str'];
$ss = new StrFormat_Ascii();
echo $ss->encode($str);


那么这样就好像就得到所需要的结果,要么是\u0000,要么是空的..... 我加上urldecode 也同样这问题...
...全文
21 点赞 收藏 2
写回复
2 条回复
切换为时间正序
请发表友善的回复…
发表回复
Dleno 2010-07-01
最好的就是你页面的编码也用utf-8格式的,就不用转了,不过改页面的编码也会涉及到数据库的编码,也就是要统一编码。
一般来说,建议都用utf-8的编码,一是它更通用,二是在PHP里有些内置函数的处理上,都会直接支持utf-8的编码,而不用做特殊处理,比如有的图片处理等函数,就不能直接用gb2312的字符,而需要转码为utf-8。
当然还有其他地方也会有这种情况。而又不会有错误出来,很难找出来问题所在。
回复
Dleno 2010-07-01
$str = '上海';
$ss = new StrFormat_Ascii();
echo $ss->encode($str);
//======================
//那个类只能是转换utf-8格式的,与老大的那个的差别也在前个帖子里说了。
//都是需要转换编码的。
//这里正确是因为你的PHP页面的文档编码本就是UTF-8的,所以不用转换,直接就可以。

$str = $_GET['str'];
$ss = new StrFormat_Ascii();
echo $ss->encode($str);
//=======================
//而这里不可以,应该是因为你前台页面的编码不是utf-8,所以$_GET['str'];得到的字符串也不是utf-8的,所以这里就需要用iconv转码了,
//如你页面编码是gb2312,则
$str = iconv("GB2312","UTF-8",$_GET['str']);
$ss = new StrFormat_Ascii();
echo $ss->encode($str);
回复
发动态
发帖子
基础编程
创建于2007-09-28

2.1w+

社区成员

从PHP安装配置,PHP入门,PHP基础到PHP应用
申请成为版主
社区公告
暂无公告