09-05
09

utf-8 gb2312 截取字符串(参考dede)

dede的代码:几好用的...

//此函数在UTF8版中不能直接调用
function cn_substrGb($str,$slen,$startdd=0){
$restr = "";
$c = "";
$str_len = strlen($str);
if($str_len < $startdd+1) return "";
if($str_len < $startdd + $slen || $slen==0) $slen = $str_len - $startdd;
$enddd = $startdd + $slen - 1;
for($i=0;$i<$str_len;$i++)
{
  if($startdd==0) $restr .= $c;
  else if($i > $startdd) $restr .= $c;
  if(ord($str[$i])>127){
   if($str_len>$i+1) $c = $str[$i].$str[$i+1];
   $i++;
  }
  else{ $c = $str[$i]; }
  if($i >= $enddd){
   if(strlen($restr)+strlen($c)>$slen) break;
   else{ $restr .= $c; break; }
  }
}
return $restr;
}

//中文截取,单字节截取模式
//$GLOBALS['cfg_ver_lang'] 为页面编码

function cn_substr($str,$slen,$startdd=0){
if($GLOBALS['cfg_ver_lang']=='utf-8'){
   $str =  utf82gb($str);
    return gb2utf8(cn_substrGb($str,$slen,$startdd));
  }else{
   return cn_substrGb($str,$slen,$startdd);
  }
}

//编码的转换
/******************************
//UTF-8 - GB
*******************************/
function utf82gb($utfstr)
{
if(function_exists('iconv')){ return iconv('utf-8','gbk//ignore',$utfstr); }
global $UC2GBTABLE;
$okstr = "";
if(trim($utfstr)=="") return $utfstr;
if(empty($UC2GBTABLE)){
  $filename = dirname(__FILE__)."/data/gb-utf8.table";
  $fp = fopen($filename,"r");
  while($l = fgets($fp,15))
  { $UC2GBTABLE[hexdec(substr($l, 5, 4))] = hexdec(substr($l, 0, 4));}
  fclose($fp);
}
$okstr = "";
$ulen = strlen($utfstr);
for($i=0;$i<$ulen;$i++)
{
  $c = $utfstr[$i];
  $cb = decbin(ord($utfstr[$i]));
  if(strlen($cb)==8){
   $csize = strpos(decbin(ord($cb)),"0");
   for($j=0;$j < $csize;$j++){
    $i++; $c .= $utfstr[$i];
   }
   $c = utf82u($c);
   if(isset($UC2GBTABLE[$c])){
    $c = dechex($UC2GBTABLE[$c]+0x8080);
    $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
   }
   else
   { $okstr .= "&#".$c.";";}
  }
  else $okstr .= $c;
}
$okstr = trim($okstr);
return $okstr;
}
/*******************************
//GB 2 UTF-8
*******************************/
function gb2utf8($gbstr) {
if(function_exists('iconv')){ return iconv('gbk','utf-8',$gbstr); }
global $CODETABLE;
if(trim($gbstr)=="") return $gbstr;
if(empty($CODETABLE)){
  $filename = dirname(__FILE__)."/data/gb-utf8.table";
  $fp = fopen($filename,"r");
  while ($l = fgets($fp,15))
  { $CODETABLE[hexdec(substr($l, 0, 4))] = substr($l, 5, 4); }
  fclose($fp);
}
$ret = "";
$utf8 = "";
while ($gbstr!='') {
  if (ord(substr($gbstr, 0, 1)) > 0x80) {
   $thisW = substr($gbstr, 0, 2);
   $gbstr = substr($gbstr, 2, strlen($gbstr));
   $utf8 = "";
   @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
   if($utf8!=""){
    for ($i = 0;$i < strlen($utf8);$i += 3)
     $ret .= chr(substr($utf8, $i, 3));
   }
  }
  else
  {
   $ret .= substr($gbstr, 0, 1);
   $gbstr = substr($gbstr, 1, strlen($gbstr));
  }
}
return $ret;
}

[本日志由 blurxx 于 2009-05-09 08:09 AM 编辑]
文章来自: 本站原创
引用通告: 查看所有引用 | 我要引用此文章
Tags: dede
相关日志:
评论: 0 | 引用: 0 | 查看次数: 506
发表评论
昵 称:
密 码: 游客发言不需要密码.
内 容:
验证码: 验证码
选 项:
虽然发表评论不用注册,但是为了保护您的发言权,建议您注册帐号.
字数限制 1000 字 | UBB代码 开启 | [img]标签 关闭