08-09
01
采集规则:部分内容截取,然后得到集合(正则和逻辑两种)
作者:Java伴侣 日期:2008-09-01
引用内容
比如,我有一个变量:
String text="aabbsdasdaiqo<a href="xxx"/>sasdadsa<a href="eee"/>sdasadpqwo<a href="ggg"/>||wxwdqwq<a href="bbb"/>...";
问题,如何取得text之中href=".."中的全部值,就是xxx,eee,ggg,bbb,把他们全部放入一个list中
String text="aabbsdasdaiqo<a href="xxx"/>sasdadsa<a href="eee"/>sdasadpqwo<a href="ggg"/>||wxwdqwq<a href="bbb"/>...";
问题,如何取得text之中href=".."中的全部值,就是xxx,eee,ggg,bbb,把他们全部放入一个list中
正则的:
复制内容到剪贴板 程序代码
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
private static String REGEX = "<a href=\"\\w*\"/>";
private static String INPUT = "aabbsdasdaiqo<a href=\"xxx\"/>sasdadsa<a href=\"eee\"/>sdasadpqwo<a href=\"ggg\"/>||wxwdqwq<a href=\"bbb\"/>...";
public static void main(String[] args) {
List<String> list = new ArrayList<String>();
Pattern p = Pattern.compile(REGEX);
Matcher m = p.matcher(INPUT);
while (m.find()) {
String tmp = m.group();
list.add(tmp.substring(tmp.indexOf('"') + 1, tmp.lastIndexOf('"')));
}
printList(list);
}
private static void printList(List<String> list) {
for (String i : list) {
System.out.println(i);
}
}
}
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
private static String REGEX = "<a href=\"\\w*\"/>";
private static String INPUT = "aabbsdasdaiqo<a href=\"xxx\"/>sasdadsa<a href=\"eee\"/>sdasadpqwo<a href=\"ggg\"/>||wxwdqwq<a href=\"bbb\"/>...";
public static void main(String[] args) {
List<String> list = new ArrayList<String>();
Pattern p = Pattern.compile(REGEX);
Matcher m = p.matcher(INPUT);
while (m.find()) {
String tmp = m.group();
list.add(tmp.substring(tmp.indexOf('"') + 1, tmp.lastIndexOf('"')));
}
printList(list);
}
private static void printList(List<String> list) {
for (String i : list) {
System.out.println(i);
}
}
}
逻辑的:
复制内容到剪贴板 程序代码
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class mytest {
public static List getArrByStr(String str){
ArrayList list = new ArrayList();
String[] arr = str.split("<a");
for(int i=1;i<arr.length;i++){
int startPosTemp = arr[i].indexOf("href=\"");
int startPos = startPosTemp + 6;
StringBuffer sb = new StringBuffer();
for(int j=startPos;j<arr[i].length();j++){
if(arr[i].charAt(j) != '\"'){
sb.append(arr[i].charAt(j));
}else{
list.add(sb.toString());
break;
}
}
}
return list;
}
public static void main(String[] args) {
String text = "aabbsdasdaiqo<a href=\"xxx\"/>sasdadsa<a href=\"eee\"/>sdasadpqwo<a href=\"ggg\"/>||wxwdqwq<a href=\"bbb\"/>";
List list = getArrByStr(text);
for(Iterator it = list.iterator();it.hasNext();){
String strTemp = (String)it.next();
System.out.println(strTemp);
}
}
}
import java.util.Iterator;
import java.util.List;
public class mytest {
public static List getArrByStr(String str){
ArrayList list = new ArrayList();
String[] arr = str.split("<a");
for(int i=1;i<arr.length;i++){
int startPosTemp = arr[i].indexOf("href=\"");
int startPos = startPosTemp + 6;
StringBuffer sb = new StringBuffer();
for(int j=startPos;j<arr[i].length();j++){
if(arr[i].charAt(j) != '\"'){
sb.append(arr[i].charAt(j));
}else{
list.add(sb.toString());
break;
}
}
}
return list;
}
public static void main(String[] args) {
String text = "aabbsdasdaiqo<a href=\"xxx\"/>sasdadsa<a href=\"eee\"/>sdasadpqwo<a href=\"ggg\"/>||wxwdqwq<a href=\"bbb\"/>";
List list = getArrByStr(text);
for(Iterator it = list.iterator();it.hasNext();){
String strTemp = (String)it.next();
System.out.println(strTemp);
}
}
}
评论: 0 | 引用: 0 | 查看次数: 745
发表评论