09-05
09
utf-8 gb2312 截取字符串(参考dede)
作者:Java伴侣 日期:2009-05-09
dede的代码:几好用的...
//此函数在UTF8版中不能直接调用
function cn_substrGb($str,$slen,$startdd=0){
$restr = "";
$c = "";
$str_len = strlen($str);
if($str_len < $startdd+1) return "";
if($str_len < $startdd + $slen || $slen==0) $slen = $str_len - $startdd;
$enddd = $startdd + $slen - 1;
for($i=0;$i<$str_len;$i++)
{
if($startdd==0) $restr .= $c;
else if($i > $startdd) $restr .= $c;
if(ord($str[$i])>127){
if($str_len>$i+1) $c = $str[$i].$str[$i+1];
$i++;
}
else{ $c = $str[$i]; }
if($i >= $enddd){
if(strlen($restr)+strlen($c)>$slen) break;
else{ $restr .= $c; break; }
}
}
return $restr;
}
//中文截取,单字节截取模式
//$GLOBALS['cfg_ver_lang'] 为页面编码
function cn_substr($str,$slen,$startdd=0){
if($GLOBALS['cfg_ver_lang']=='utf-8'){
$str = utf82gb($str);
return gb2utf8(cn_substrGb($str,$slen,$startdd));
}else{
return cn_substrGb($str,$slen,$startdd);
}
}
//编码的转换
/******************************
//UTF-8 - GB
*******************************/
function utf82gb($utfstr)
{
if(function_exists('iconv')){ return iconv('utf-8','gbk//ignore',$utfstr); }
global $UC2GBTABLE;
$okstr = "";
if(trim($utfstr)=="") return $utfstr;
if(empty($UC2GBTABLE)){
$filename = dirname(__FILE__)."/data/gb-utf8.table";
$fp = fopen($filename,"r");
while($l = fgets($fp,15))
{ $UC2GBTABLE[hexdec(substr($l, 5, 4))] = hexdec(substr($l, 0, 4));}
fclose($fp);
}
$okstr = "";
$ulen = strlen($utfstr);
for($i=0;$i<$ulen;$i++)
{
$c = $utfstr[$i];
$cb = decbin(ord($utfstr[$i]));
if(strlen($cb)==8){
$csize = strpos(decbin(ord($cb)),"0");
for($j=0;$j < $csize;$j++){
$i++; $c .= $utfstr[$i];
}
$c = utf82u($c);
if(isset($UC2GBTABLE[$c])){
$c = dechex($UC2GBTABLE[$c]+0x8080);
$okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
}
else
{ $okstr .= "".$c.";";}
}
else $okstr .= $c;
}
$okstr = trim($okstr);
return $okstr;
}
/*******************************
//GB 2 UTF-8
*******************************/
function gb2utf8($gbstr) {
if(function_exists('iconv')){ return iconv('gbk','utf-8',$gbstr); }
global $CODETABLE;
if(trim($gbstr)=="") return $gbstr;
if(empty($CODETABLE)){
$filename = dirname(__FILE__)."/data/gb-utf8.table";
$fp = fopen($filename,"r");
while ($l = fgets($fp,15))
{ $CODETABLE[hexdec(substr($l, 0, 4))] = substr($l, 5, 4); }
fclose($fp);
}
$ret = "";
$utf8 = "";
while ($gbstr!='') {
if (ord(substr($gbstr, 0, 1)) > 0x80) {
$thisW = substr($gbstr, 0, 2);
$gbstr = substr($gbstr, 2, strlen($gbstr));
$utf8 = "";
@$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
if($utf8!=""){
for ($i = 0;$i < strlen($utf8);$i += 3)
$ret .= chr(substr($utf8, $i, 3));
}
}
else
{
$ret .= substr($gbstr, 0, 1);
$gbstr = substr($gbstr, 1, strlen($gbstr));
}
}
return $ret;
}
//此函数在UTF8版中不能直接调用
function cn_substrGb($str,$slen,$startdd=0){
$restr = "";
$c = "";
$str_len = strlen($str);
if($str_len < $startdd+1) return "";
if($str_len < $startdd + $slen || $slen==0) $slen = $str_len - $startdd;
$enddd = $startdd + $slen - 1;
for($i=0;$i<$str_len;$i++)
{
if($startdd==0) $restr .= $c;
else if($i > $startdd) $restr .= $c;
if(ord($str[$i])>127){
if($str_len>$i+1) $c = $str[$i].$str[$i+1];
$i++;
}
else{ $c = $str[$i]; }
if($i >= $enddd){
if(strlen($restr)+strlen($c)>$slen) break;
else{ $restr .= $c; break; }
}
}
return $restr;
}
//中文截取,单字节截取模式
//$GLOBALS['cfg_ver_lang'] 为页面编码
function cn_substr($str,$slen,$startdd=0){
if($GLOBALS['cfg_ver_lang']=='utf-8'){
$str = utf82gb($str);
return gb2utf8(cn_substrGb($str,$slen,$startdd));
}else{
return cn_substrGb($str,$slen,$startdd);
}
}
//编码的转换
/******************************
//UTF-8 - GB
*******************************/
function utf82gb($utfstr)
{
if(function_exists('iconv')){ return iconv('utf-8','gbk//ignore',$utfstr); }
global $UC2GBTABLE;
$okstr = "";
if(trim($utfstr)=="") return $utfstr;
if(empty($UC2GBTABLE)){
$filename = dirname(__FILE__)."/data/gb-utf8.table";
$fp = fopen($filename,"r");
while($l = fgets($fp,15))
{ $UC2GBTABLE[hexdec(substr($l, 5, 4))] = hexdec(substr($l, 0, 4));}
fclose($fp);
}
$okstr = "";
$ulen = strlen($utfstr);
for($i=0;$i<$ulen;$i++)
{
$c = $utfstr[$i];
$cb = decbin(ord($utfstr[$i]));
if(strlen($cb)==8){
$csize = strpos(decbin(ord($cb)),"0");
for($j=0;$j < $csize;$j++){
$i++; $c .= $utfstr[$i];
}
$c = utf82u($c);
if(isset($UC2GBTABLE[$c])){
$c = dechex($UC2GBTABLE[$c]+0x8080);
$okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
}
else
{ $okstr .= "".$c.";";}
}
else $okstr .= $c;
}
$okstr = trim($okstr);
return $okstr;
}
/*******************************
//GB 2 UTF-8
*******************************/
function gb2utf8($gbstr) {
if(function_exists('iconv')){ return iconv('gbk','utf-8',$gbstr); }
global $CODETABLE;
if(trim($gbstr)=="") return $gbstr;
if(empty($CODETABLE)){
$filename = dirname(__FILE__)."/data/gb-utf8.table";
$fp = fopen($filename,"r");
while ($l = fgets($fp,15))
{ $CODETABLE[hexdec(substr($l, 0, 4))] = substr($l, 5, 4); }
fclose($fp);
}
$ret = "";
$utf8 = "";
while ($gbstr!='') {
if (ord(substr($gbstr, 0, 1)) > 0x80) {
$thisW = substr($gbstr, 0, 2);
$gbstr = substr($gbstr, 2, strlen($gbstr));
$utf8 = "";
@$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
if($utf8!=""){
for ($i = 0;$i < strlen($utf8);$i += 3)
$ret .= chr(substr($utf8, $i, 3));
}
}
else
{
$ret .= substr($gbstr, 0, 1);
$gbstr = substr($gbstr, 1, strlen($gbstr));
}
}
return $ret;
}
评论: 0 | 引用: 0 | 查看次数: 549
发表评论