字符串字符集的编解码转换

1、utf8转gbk

/**
 * utf8转gbk
 * @param $utfstr
 */
function utf8_to_gbk($utfstr) {
   global $UC2GBTABLE;
   $okstr = '';
   if(empty($UC2GBTABLE)) {
      $filename = CODETABLEDIR.'gb-unicode.table';
      $fp = fopen($filename, 'rb');
      while($l = fgets($fp,15)) {        
         $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));
      }
      fclose($fp);
   }
   $okstr = '';
   $ulen = strlen($utfstr);
   for($i=0; $i<$ulen; $i++) {
      $c = $utfstr[$i];
      $cb = decbin(ord($utfstr[$i]));
      if(strlen($cb)==8) { 
         $csize = strpos(decbin(ord($cb)),'0');
         for($j = 0; $j < $csize; $j++) {
            $i++; 
            $c .= $utfstr[$i];
         }
         $c = utf8_to_unicode($c);
         if(isset($UC2GBTABLE[$c])) {
            $c = dechex($UC2GBTABLE[$c]+0x8080);
            $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
         } else {
            $okstr .= '&#'.$c.';';
         }
      } else {
         $okstr .= $c;
      }
   }
   $okstr = trim($okstr);
   return $okstr;
}

2、gbk转utf8

/**
 * gbk转utf8
 * @param $gbstr
 */
function gbk_to_utf8($gbstr) {
   global $CODETABLE;
   if(empty($CODETABLE)) {
      $filename = CODETABLEDIR.'gb-unicode.table';
      $fp = fopen($filename, 'rb');
      while($l = fgets($fp,15)) { 
         $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6); 
      }
      fclose($fp);
   }
   $ret = '';
   $utf8 = '';
   while($gbstr) {
      if(ord(substr($gbstr, 0, 1)) > 0x80) {
         $thisW = substr($gbstr, 0, 2);
         $gbstr = substr($gbstr, 2, strlen($gbstr));
         $utf8 = '';
         @$utf8 = unicode_to_utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
         if($utf8 != '') {
            for($i = 0; $i < strlen($utf8); $i += 3) $ret .= chr(substr($utf8, $i, 3));
         }
      } else {
         $ret .= substr($gbstr, 0, 1);
         $gbstr = substr($gbstr, 1, strlen($gbstr));
      }
   }
   return $ret;
}

3、繁体转简体

/**
 * 繁体转简体
 * @param  $Text
 */
function big5_to_gbk($Text) {
   global $BIG5_DATA;
   if(empty($BIG5_DATA)) {
      $filename = CODETABLEDIR.'big5-gb.table';
      $fp = fopen($filename, 'rb');
      $BIG5_DATA = fread($fp, filesize($filename));
      fclose($fp);
   }
   $max = strlen($Text)-1;
   for($i = 0; $i < $max; $i++) {
      $h = ord($Text[$i]);
      if($h >= 0x80) {
         $l = ord($Text[$i+1]);
         if($h==161 && $l==64) {
            $gbstr = ' ';
         } else {
            $p = ($h-160)*510+($l-1)*2;
            $gbstr = $BIG5_DATA[$p].$BIG5_DATA[$p+1];
         }
         $Text[$i] = $gbstr[0];
         $Text[$i+1] = $gbstr[1];
         $i++;
      }
   }
   return $Text;
}

4、简体转繁体

5、unicode转utf8

/**
 * unicode转utf8
 * @param  $c
 */
function unicode_to_utf8($c) {
   $str = '';
   if($c < 0x80) {
      $str .= $c;
   } elseif($c < 0x800) {
      $str .= (0xC0 | $c >> 6);
      $str .= (0x80 | $c & 0x3F);
   } elseif($c < 0x10000) {
      $str .= (0xE0 | $c >> 12);
      $str .= (0x80 | $c >> 6 & 0x3F);
      $str .= (0x80 | $c & 0x3F);
   } elseif($c < 0x200000) {
      $str .= (0xF0 | $c >> 18);
      $str .= (0x80 | $c >> 12 & 0x3F);
      $str .= (0x80 | $c >> 6 & 0x3F);
      $str .= (0x80 | $c & 0x3F);
   }
   return $str;
}

6、utf8转unicode

/**
 * utf8转unicode
 * @param  $c
 */
function utf8_to_unicode($c) {
   switch(strlen($c)) {
      case 1:
        return ord($c);
      case 2:
        $n = (ord($c[0]) & 0x3f) << 6;
        $n += ord($c[1]) & 0x3f;
        return $n;
      case 3:
        $n = (ord($c[0]) & 0x1f) << 12;
        $n += (ord($c[1]) & 0x3f) << 6;
        $n += ord($c[2]) & 0x3f;
        return $n;
      case 4:
        $n = (ord($c[0]) & 0x0f) << 18;
        $n += (ord($c[1]) & 0x3f) << 12;
        $n += (ord($c[2]) & 0x3f) << 6;
        $n += ord($c[3]) & 0x3f;
        return $n;
   }
}

7、Ascii转拼音

/**
 * Ascii转拼音
 * @param $asc
 * @param $pyarr
 */
function asc_to_pinyin($asc,&$pyarr) {
   if($asc < 128)return chr($asc);
   elseif(isset($pyarr[$asc]))return $pyarr[$asc];
   else {
      foreach($pyarr as $id => $p) {
         if($id >= $asc)return $p;
      }
   }
}

8、gbk转拼音

/**
 * gbk转拼音
 * @param $txt
 */
function gbk_to_pinyin($txt) {
   if(CHARSET != 'gbk') {
      $txt = iconv(CHARSET,'GBK',$txt);
   }  
   $l = strlen($txt);
   $i = 0;
   $pyarr = array();
   $py = array();
   $filename = CODETABLEDIR.'gb-pinyin.table';
   $fp = fopen($filename,'r');
   while(!feof($fp)) {
      $p = explode("-",fgets($fp,32));
      $pyarr[intval($p[1])] = trim($p[0]);
   }
   fclose($fp);
   ksort($pyarr);
   while($i<$l) {
      $tmp = ord($txt[$i]);
      if($tmp>=128) {
         $asc = abs($tmp*256+ord($txt[$i+1])-65536);
         $i = $i+1;
      } else $asc = $tmp;
      $py[] = asc_to_pinyin($asc,$pyarr);
      $i++;
   }
   return $py;
}

9、数组 utf8转gbk

/**
 * 数组 utf8转gbk
 * @param $utfstr
 */
function array_utf8_to_gbk($data) {
   if (!is_array($data)) {
      return utf8_to_gbk($data);
   } else {
      foreach ($data as $key=>$val) {
         if(is_array($val)) {
            $data[$key] = array_utf8_to_gbk($val);
         } else {
            $data[$key] = utf8_to_gbk($val);
         }
      }
      return $data;
   }
}

10、数组 gbk转utf8

/**
 * 数组 gbk转utf8
 * @param $utfstr
 */
function array_gbk_to_utf8($data) {
   if (!is_array($data)) {
      return gbk_to_utf8($data);
   } else {
      foreach ($data as $key=>$val) {
         if(is_array($val)) {
            $data[$key] = array_gbk_to_utf8($val);
         } else {
            $data[$key] = gbk_to_utf8($val);
         }
      }
      return $data;
   }
}

11、json 处理gbk转码utf-8问题( json_encode转换数组,值为null)

用json_encode把数组转换为json时,发现转化的值为null。怎么回事呢?查找手册:发现了下面的话:

该函数只能接受 UTF-8 编码的数据(译注:指字符/字符串类型的数据)
原来数组中有中文,需要转码哦,写个转换字符编码的函数吧:

function encodeConvert($str,$fromCode,$toCode)
{
    if (strtoupper($toCode) == strtoupper($fromCode)) return $str;
    if (is_string($str)) {
        if (function_exists('mb_convert_encoding')) {
            return mb_convert_encoding($str, $toCode, $fromCode);
        } else {
            return iconv($fromCode, $toCode, $str);
        }
    } elseif (is_array($str)) {
        foreach ($str as $k => $v) {
            $str[$k] = encodeConvert($v, $fromCode, $toCode);
        }
        return $str;
    }
    return $str;
}

对于数组,通过下面方式json_encode调用,一切ok。

$json_api=json_encode(encodeConvert($json_api,'gb2312','utf-8'));
$json_api=json_decode(json_decode($json_api));

12、Unicode解码

//方法一:
function unicode_decode($str) {
	//Unicode解码 linux UCS-2BE,windows UCS-2LE
	$t = preg_replace("#\\\u([0-9a-f]{4})#ie", "iconv('UCS-2BE', 'UTF-8', pack('H4', '\'))", $str);
	return $t;
}

//方法二:
function unicode_decode($name){
    $json = '{"str":"'.$name.'"}';
    $arr = json_decode($json,true);
    if(empty($arr)) return '';
    return $arr['str'];
}
$content = unicode_decode($_POST['content']);
echo $content ;
字符串字符集的编解码转换
© 版权声明
THE END
喜欢就支持一下吧
点赞12 分享
评论 抢沙发

请登录后发表评论

    blank

    暂无评论内容