今天是: 收藏本站 设为主页
网站首页 >  技术专栏  >  php  > 

PHP实现gb2312、UTF-8等字符和unicode间的编码转换及PHP版unescape

日期:2011-12-06  点击率:1885


echo iconv("UCS-2","UTF-8","专业");

@header("Content-Type:text/html; charset=UTF-8");
function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
//print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("UCS-2","UTF-8",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("UCS-2","UTF-8",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
//echo substr($v,2,-1)."\n";
$ar[$k] = iconv("UCS-2","UTF-8",pack("n",substr($v,2,-1)));
}
}
return join("",$ar);
}
echo unescape("WAP专业")

这样就好了,我 本机运行结果(运行结果的源代码) :WAP专业


<?
/**
* 将字符串转换成unicode编码
*
* @param string $input
* @param string $input_charset
* @return string
*/
function str_to_unicode($input, $input_charset = 'gbk'){
$input = iconv($input_charset, "gbk", $input);
preg_match_all("/[\x80-\xff]?./", $input, $ar);
$b = array_map('utf8_unicode_', $ar[0]);
$outstr = join("", $b);
return $outstr;
}

function utf8_unicode_($c, $input_charset = 'gbk'){
$c = iconv($input_charset, 'utf-8', $c);
return utf8_unicode($c);
}
// utf8 -> unicode
function utf8_unicode($c) {
switch(strlen($c)) {
case 1:
return $c;
case 2:
$n = (ord($c[0]) & 0x3f) << 6;
$n += ord($c[1]) & 0x3f;
break;
case 3:
$n = (ord($c[0]) & 0x1f) << 12;
$n += (ord($c[1]) & 0x3f) << 6;
$n += ord($c[2]) & 0x3f;
break;
case 4:
$n = (ord($c[0]) & 0x0f) << 18;
$n += (ord($c[1]) & 0x3f) << 12;
$n += (ord($c[2]) & 0x3f) << 6;
$n += ord($c[3]) & 0x3f;
break;
}
return "&#$n;";
}

/**
* 将unicode字符转换成普通编码字符
*
* @param string $str
* @param string $out_charset
* @return string
*/
function str_from_unicode($str, $out_charset = 'gbk'){
$str = preg_replace_callback("|&#([0-9]{1,5});|", 'unicode2utf8_', $str);
$str = iconv("UTF-8", $out_charset, $str);
return $str;
}

function unicode2utf8_($c){
return unicode2utf8($c[1]);
}
function unicode2utf8($c){
$str="";
if ($c < 0x80) {
$str.=$c;
} else if ($c < 0x800) {
$str.=chr(0xC0 | $c>>6);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x10000) {
$str.=chr(0xE0 | $c>>12);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x200000) {
$str.=chr(0xF0 | $c>>18);
$str.=chr(0x80 | $c>>12 & 0x3F);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
}
return $str;
}

/**
* 模拟JS里的unescape
*
* @param unknown_type $str
* @return unknown
*/
function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
#print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("UCS-2","GB2312",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("UCS-2","GB2312",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
echo substr($v,2,-1)."
";
$ar[$k] = iconv("UCS-2","GB2312",pack("n",substr($v,2,-1)));
}
}
return join("",$ar);
}
?>






下一篇:解决 json 乱码问题收藏 UCS-2与UTF8之间的选择    上一篇:ubuntu 安装 ffmpeg php-ffmpeg