07-08
14

解析html spec的类方法,HTML转义

比如说,我要在我的web页面里面产生"<B>"这样一个字符串。我可不想写

举例:<B>转换为& l t ; B & g t ;  ,也就是HTMLEncode与HTMLDecod转换

手动编写一个类

public static String enableTag(String input) throws IOException {
if (input == null) {
return null;
}

char[] s = input.toCharArray();
int length = s.length;
StringBuffer ret = new StringBuffer(length);

for (int i = 0; i < length; i++) {
if (s[i] == '&') {
if (((i + 3) < length)
&& (s[i + 1] == 'l')
&& (s[i + 2] == 't')
&& (s[i + 3] == ';')) { // & = <
ret.append('<');
i += 3;
} else if (
((i + 3) < length)
&& (s[i + 1] == 'g')
&& (s[i + 2] == 't')
&& (s[i + 3] == ';')) { // & = >
ret.append('>');
i += 3;
} else if (
((i + 4) < length)
&& (s[i + 1] == 'a')
&& (s[i + 2] == 'm')
&& (s[i + 3] == 'p')
&& (s[i + 4] == ';')) { // & = &
ret.append('&');
i += 4;
} else if (
((i + 5) < length)
&& (s[i + 1] == 'q')
&& (s[i + 2] == 'u')
&& (s[i + 3] == 'o')
&& (s[i + 4] == 't')
&& (s[i + 5] == ';')) { // & = "
ret.append('"');
i += 5;
} else {
ret.append('&');
}
} else {
ret.append(s[i]);
}
} // for
return ret.toString();
}

public static String disableTag(String input) throws IOException {
if (input == null) {
return null;
}

char[] s = input.toCharArray();
int length = s.length;
StringBuffer ret = new StringBuffer(length + 100);
// add more room to the result String

for (int i = 0; i < length; i++) {
if (s[i] == '<') {
ret.append("&");
} else if (s[i] == '>') {
ret.append("&");
} else if (s[i] == '&') {
// this hack the escape for unicode character, eg : &2345;
if (((i + 3) < length)
&& (s[i + 1] == '#')
&& (s[i + 2] >= '0' && s[i + 1] <= '9')
&& (s[i + 3] >= '0' && s[i + 2] <= '9')) {
ret.append(s[i]);
// hack & (dont escape this char more than once)
} else if (
((i + 3) < length)
&& (s[i + 1] == 'l')
&& (s[i + 2] == 't')
&& (s[i + 3] == ';')) {
ret.append(s[i]);
// hack & (dont escape this char more than once)
} else if (
((i + 3) < length)
&& (s[i + 1] == 'g')
&& (s[i + 2] == 't')
&& (s[i + 3] == ';')) {
ret.append(s[i]);
// hack & (dont escape this char more than once)
} else if (
((i + 4) < length)
&& (s[i + 1] == 'a')
&& (s[i + 2] == 'm')
&& (s[i + 3] == 'p')
&& (s[i + 4] == ';')) {
ret.append(s[i]);
// hack & (dont escape this char more than once)
} else if (
((i + 5) < length)
&& (s[i + 1] == 'q')
&& (s[i + 2] == 'u')
&& (s[i + 3] == 'o')
&& (s[i + 4] == 't')
&& (s[i + 5] == ';')) {
ret.append(s[i]);
} else {
ret.append("&");
}
} else if (s[i] == '"') {
ret.append("&");
} else {
ret.append(s[i]);
}
} // for
return ret.toString();
}


为了不误导观众,值得一提的是commons-lang里的StringUtils里有这个方法StringEscapeUtils(),另外freemarker下的“?html”也可以。
org.apache.commons.lang.StringEscapeUtils
org.apache.commons.lang.StringUtils();
但freemarker下的“?html”不支持JHTML:

escapeHtml

public static String escapeHtml(String str)

Escapes the characters in a String using HTML entities.

For example:

"bread" & "butter"
becomes:

"bread" & "butter".

Supports all known HTML 4.0 entities, including funky accents.

Parameters:
str - the String to escape, may be null
Returns:
a new escaped String, null if null string input
See Also:
unescapeHtml(String), ISO Entities, HTML 3.2 Character Entities for ISO Latin-1, HTML 4.0 Character entity references, HTML 4.01 Character References, HTML 4.01 Code positions



[本日志由 blurxx 于 2009-04-03 04:42 PM 编辑]
文章来自: 本站原创
引用通告: 查看所有引用 | 我要引用此文章
Tags: spec
相关日志:
评论: 2 | 引用: 0 | 查看次数: 1138
回复回复blurxx[2008-08-15 02:42 PM | del]
引用来自 多谢 引用来自 多谢
不错,今天帮了一个大忙!

整整一年前写的代码,自己都有些忘了
回复回复多谢[2008-08-15 01:37 PM | del]
不错,今天帮了一个大忙!
发表评论
昵 称:
密 码: 游客发言不需要密码.
内 容:
验证码: 验证码
选 项:
虽然发表评论不用注册,但是为了保护您的发言权,建议您注册帐号.
字数限制 1000 字 | UBB代码 开启 | [img]标签 关闭