08-04
29

utf-8转unicode

/**
  * utf-8 转换成 unicode
  * @author fanhui
  * 2007-3-15
  * @param inStr
  * @return
  */
public static String utf8ToUnicode(String inStr) {
        char[] myBuffer = inStr.toCharArray();
        
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < inStr.length(); i++) {
         UnicodeBlock ub = UnicodeBlock.of(myBuffer[i]);
            if(ub == UnicodeBlock.BASIC_LATIN){
             //英文及数字等
             sb.append(myBuffer[i]);
            }else if(ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS){
             //全角半角字符
             int j = (int) myBuffer[i] - 65248;
             sb.append((char)j);
            }else{
             //汉字
             short s = (short) myBuffer[i];
                String hexS = Integer.toHexString(s);
                String unicode = "\\u"+hexS;
             sb.append(unicode.toLowerCase());
            }
        }
        return sb.toString();
    }

/**
  * unicode 转换成 utf-8
  * @author fanhui
  * 2007-3-15
  * @param theString
  * @return
  */
public static String unicodeToUtf8(String theString) {
  char aChar;
  int len = theString.length();
  StringBuffer outBuffer = new StringBuffer(len);
  for (int x = 0; x < len;) {
   aChar = theString.charAt(x++);
   if (aChar == '\\') {
    aChar = theString.charAt(x++);
    if (aChar == 'u') {
     // Read the xxxx
     int value = 0;
     for (int i = 0; i < 4; i++) {
      aChar = theString.charAt(x++);
      switch (aChar) {
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
       value = (value << 4) + aChar - '0';
       break;
      case 'a':
      case 'b':
      case 'c':
      case 'd':
      case 'e':
      case 'f':
       value = (value << 4) + 10 + aChar - 'a';
       break;
      case 'A':
      case 'B':
      case 'C':
      case 'D':
      case 'E':
      case 'F':
       value = (value << 4) + 10 + aChar - 'A';
       break;
      default:
       throw new IllegalArgumentException(
         "Malformed   \\uxxxx   encoding.");
      }
     }
     outBuffer.append((char) value);
    } else {
     if (aChar == 't')
      aChar = '\t';
     else if (aChar == 'r')
      aChar = '\r';
     else if (aChar == 'n')
      aChar = '\n';
     else if (aChar == 'f')
      aChar = '\f';
     outBuffer.append(aChar);
    }
   } else
    outBuffer.append(aChar);
  }
  return outBuffer.toString();
}

文章来自: 本站原创
引用通告: 查看所有引用 | 我要引用此文章
Tags: UTF-8 unicode
相关日志:
评论: 0 | 引用: 0 | 查看次数: 1652
发表评论
昵 称:
密 码: 游客发言不需要密码.
内 容:
验证码: 验证码
选 项:
虽然发表评论不用注册,但是为了保护您的发言权,建议您注册帐号.
字数限制 1000 字 | UBB代码 开启 | [img]标签 关闭