11-11
23
java 写的http代理的获得和验证 --用于数据抽取 防止IP被封
作者:Java伴侣 日期:2011-11-23
复制内容到剪贴板 程序代码
package page;
import java.net.URL;
import java.io.BufferedReader;
import java.net.HttpURLConnection;
import java.util.*;
import java.io.InputStreamReader;
import java.util.regex.Pattern;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.regex.Matcher;
/**
* <p>Title: </p>
*
* <p>Description: </p>
*
* <p>Copyright: Copyright (c) 2008</p>
*
* <p>Company: </p>
*
* @author not attributable
* @version 1.0
*/
public class GetProxy {
HashSet<String> hs=new HashSet();
public GetProxy() {
}
public void getProxyFromProxyCN()//从代理中国访问
{
try {
URL url = new URL("http://www.proxycn.com/html_proxy/30fastproxy-1.html");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)");
InputStreamReader isr = new InputStreamReader(connection.getInputStream());
BufferedReader br = new BufferedReader(isr);
String sCurrentLine = "";
StringBuffer sTotalString = new StringBuffer();
while ((sCurrentLine = br.readLine()) != null)
{
sTotalString.append(sCurrentLine+"\n");
//System.out.println(sCurrentLine);
}
//System.out.println(sTotalString);
/**
* 抽取信息
*/
// 表达式对象
Pattern p = Pattern.compile("(\\d{3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+)");
// 创建 Matcher 对象
Matcher m = p.matcher(sTotalString.toString());
// 是否找到匹配
boolean found = m.find();
while(m.find())
{
String foundstring = m.group();
System.out.println(m.group(1));
String []pp=m.group(1).split(":");
int portnum=Integer.parseInt(pp[1]);
if(portnum<65000){
hs.add(m.group(1));
}
//学校编号 学校名 城市 名字区间
//String temp=m.group(1)+","+m.group(2)+","+m.group(3)+","+i+"\n";
//Buff.write(temp.getBytes());
//int beginPos = m.start();
//int endPos = m.end();
}
}
catch(Exception ex)
{
System.out.println(ex.toString());
ConstDatas.log(ex.toString());
}
}
public boolean isVaildProxy(String proxyAndport)
{
try {
String []pp=proxyAndport.split(":");
if(pp.length>=2){//格式正确
Properties systemProperties = System.getProperties();
systemProperties.setProperty("http.proxyHost", pp[0].trim());
systemProperties.setProperty("http.proxyPort", pp[1].trim());
URL url = new URL("http://www.baidu.com/");
HttpURLConnection connection = (HttpURLConnection) url.
openConnection();
connection.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)");
InputStreamReader isr = new InputStreamReader(connection.getInputStream());
BufferedReader br = new BufferedReader(isr);
String sCurrentLine = "";
StringBuffer sTotalString = new StringBuffer();
while ((sCurrentLine = br.readLine()) != null) {
sTotalString.append(sCurrentLine + "\n");
}
//System.out.println(sTotalString);
if(sTotalString.indexOf("把百度设为首页")!=-1)
{ System.out.println(proxyAndport +" sucess!");
return true;
}
else
return false;
}
else
return false;
}
catch(Exception ex)
{
ex.printStackTrace();
ConstDatas.log(ex.toString());
return false;
}
}
public void getproxyFromHust()
{
try {
URL url = new URL("http://info.hustonline.net/index/proxyshow.aspx");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)");
InputStreamReader isr = new InputStreamReader(connection.getInputStream());
BufferedReader br = new BufferedReader(isr);
String sCurrentLine = "";
StringBuffer sTotalString = new StringBuffer();
while ((sCurrentLine = br.readLine()) != null)
{
sTotalString.append(sCurrentLine+"\n");
//System.out.println(sCurrentLine);
}
//System.out.println(sTotalString);
/**
* 抽取信息
*/
// 表达式对象
Pattern p = Pattern.compile("<b>(\\d{3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+)</b>\\n </td><td>\\n <font color=.+>.+</font>\\n </td><td>\\n <font color=[blue]*[orange]*>.+</font>\\n </td><td>\\n <font color=.+>.+</font>");
// 创建 Matcher 对象
Matcher m = p.matcher(sTotalString.toString());
// 是否找到匹配
boolean found = m.find();
while(m.find())
{
String foundstring = m.group();
System.out.println(m.group(1));
// if(isVaildProxy(m.group(1)))
String []pp=m.group(1).split(":");
int portnum=Integer.parseInt(pp[1]);
if(portnum<65000){
hs.add(m.group(1));
}
//学校编号 学校名 城市 名字区间
//String temp=m.group(1)+","+m.group(2)+","+m.group(3)+","+i+"\n";
//Buff.write(temp.getBytes());
//int beginPos = m.start();
//int endPos = m.end();
}
}
catch(Exception ex)
{
System.out.println(ex.toString());
ConstDatas.log(ex.toString());
}
}
public String ChangeProxy()
{
Random rd=new Random(System.currentTimeMillis()) ;
int i=rd.nextInt(ConstDatas.proxy.size());
String proxyAndport= (String)(ConstDatas.proxy.toArray())[i];
String []pp=proxyAndport.split(":");
System.out.println(Thread.currentThread()+"修改代理:"+proxyAndport);
if(pp.length>=2){//格式正确
Properties systemProperties = System.getProperties();
systemProperties.setProperty("http.proxyHost", pp[0].trim());
systemProperties.setProperty("http.proxyPort", pp[1].trim());
}
return proxyAndport;
}
public static void main(String[] args) {
GetProxy g = new GetProxy();
g.getProxyFromProxyCN();
//g.getproxyFromHust();
//g.isVaildProxy("211.99.188.220:80");
}
}
import java.net.URL;
import java.io.BufferedReader;
import java.net.HttpURLConnection;
import java.util.*;
import java.io.InputStreamReader;
import java.util.regex.Pattern;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.regex.Matcher;
/**
* <p>Title: </p>
*
* <p>Description: </p>
*
* <p>Copyright: Copyright (c) 2008</p>
*
* <p>Company: </p>
*
* @author not attributable
* @version 1.0
*/
public class GetProxy {
HashSet<String> hs=new HashSet();
public GetProxy() {
}
public void getProxyFromProxyCN()//从代理中国访问
{
try {
URL url = new URL("http://www.proxycn.com/html_proxy/30fastproxy-1.html");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)");
InputStreamReader isr = new InputStreamReader(connection.getInputStream());
BufferedReader br = new BufferedReader(isr);
String sCurrentLine = "";
StringBuffer sTotalString = new StringBuffer();
while ((sCurrentLine = br.readLine()) != null)
{
sTotalString.append(sCurrentLine+"\n");
//System.out.println(sCurrentLine);
}
//System.out.println(sTotalString);
/**
* 抽取信息
*/
// 表达式对象
Pattern p = Pattern.compile("(\\d{3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+)");
// 创建 Matcher 对象
Matcher m = p.matcher(sTotalString.toString());
// 是否找到匹配
boolean found = m.find();
while(m.find())
{
String foundstring = m.group();
System.out.println(m.group(1));
String []pp=m.group(1).split(":");
int portnum=Integer.parseInt(pp[1]);
if(portnum<65000){
hs.add(m.group(1));
}
//学校编号 学校名 城市 名字区间
//String temp=m.group(1)+","+m.group(2)+","+m.group(3)+","+i+"\n";
//Buff.write(temp.getBytes());
//int beginPos = m.start();
//int endPos = m.end();
}
}
catch(Exception ex)
{
System.out.println(ex.toString());
ConstDatas.log(ex.toString());
}
}
public boolean isVaildProxy(String proxyAndport)
{
try {
String []pp=proxyAndport.split(":");
if(pp.length>=2){//格式正确
Properties systemProperties = System.getProperties();
systemProperties.setProperty("http.proxyHost", pp[0].trim());
systemProperties.setProperty("http.proxyPort", pp[1].trim());
URL url = new URL("http://www.baidu.com/");
HttpURLConnection connection = (HttpURLConnection) url.
openConnection();
connection.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)");
InputStreamReader isr = new InputStreamReader(connection.getInputStream());
BufferedReader br = new BufferedReader(isr);
String sCurrentLine = "";
StringBuffer sTotalString = new StringBuffer();
while ((sCurrentLine = br.readLine()) != null) {
sTotalString.append(sCurrentLine + "\n");
}
//System.out.println(sTotalString);
if(sTotalString.indexOf("把百度设为首页")!=-1)
{ System.out.println(proxyAndport +" sucess!");
return true;
}
else
return false;
}
else
return false;
}
catch(Exception ex)
{
ex.printStackTrace();
ConstDatas.log(ex.toString());
return false;
}
}
public void getproxyFromHust()
{
try {
URL url = new URL("http://info.hustonline.net/index/proxyshow.aspx");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)");
InputStreamReader isr = new InputStreamReader(connection.getInputStream());
BufferedReader br = new BufferedReader(isr);
String sCurrentLine = "";
StringBuffer sTotalString = new StringBuffer();
while ((sCurrentLine = br.readLine()) != null)
{
sTotalString.append(sCurrentLine+"\n");
//System.out.println(sCurrentLine);
}
//System.out.println(sTotalString);
/**
* 抽取信息
*/
// 表达式对象
Pattern p = Pattern.compile("<b>(\\d{3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+)</b>\\n </td><td>\\n <font color=.+>.+</font>\\n </td><td>\\n <font color=[blue]*[orange]*>.+</font>\\n </td><td>\\n <font color=.+>.+</font>");
// 创建 Matcher 对象
Matcher m = p.matcher(sTotalString.toString());
// 是否找到匹配
boolean found = m.find();
while(m.find())
{
String foundstring = m.group();
System.out.println(m.group(1));
// if(isVaildProxy(m.group(1)))
String []pp=m.group(1).split(":");
int portnum=Integer.parseInt(pp[1]);
if(portnum<65000){
hs.add(m.group(1));
}
//学校编号 学校名 城市 名字区间
//String temp=m.group(1)+","+m.group(2)+","+m.group(3)+","+i+"\n";
//Buff.write(temp.getBytes());
//int beginPos = m.start();
//int endPos = m.end();
}
}
catch(Exception ex)
{
System.out.println(ex.toString());
ConstDatas.log(ex.toString());
}
}
public String ChangeProxy()
{
Random rd=new Random(System.currentTimeMillis()) ;
int i=rd.nextInt(ConstDatas.proxy.size());
String proxyAndport= (String)(ConstDatas.proxy.toArray())[i];
String []pp=proxyAndport.split(":");
System.out.println(Thread.currentThread()+"修改代理:"+proxyAndport);
if(pp.length>=2){//格式正确
Properties systemProperties = System.getProperties();
systemProperties.setProperty("http.proxyHost", pp[0].trim());
systemProperties.setProperty("http.proxyPort", pp[1].trim());
}
return proxyAndport;
}
public static void main(String[] args) {
GetProxy g = new GetProxy();
g.getProxyFromProxyCN();
//g.getproxyFromHust();
//g.isVaildProxy("211.99.188.220:80");
}
}
评论: 0 | 引用: 0 | 查看次数: 369
发表评论