1. 程式人生 > >httpclient繞過登陸驗證碼抓取資料

httpclient繞過登陸驗證碼抓取資料

session的保持是通過cookie來維持的,所以如果使用者有勾選X天內免登入,這個session 就X天內一直有效,就是通過這個cookie來維護。如果沒選X天內免登入,基本上就本次才能保持session,下次開啟瀏覽器就要重新登入了。 
所以在web安全裡,黑客通過XSS,最終目的就是獲取cookie,從免登入直接進入系統。 


這次要講的是,得到使用者cookie後,免登入,用HttpClient保持原來session訪問原本一定要登入才能做的事。 


HttpClient 4.x 庫可以自己處理Cookie 
有兩咱廣度可以新增cookie, 
1.通過  httpclient.setCookieStore(cookieStore) 

2.通過  httpGet或者httpPost的addHeader(new BasicHeader("Cookie",cookie)); 




第一種, 
HttpClient是否在下次請求中攜帶從伺服器端請求來的Cookie,完全是由設定決定的。 


httpclient.getParams.setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH) 或者CookiePolicy.BROWSER_COMPATIBILITY 
如果設定為Cookie策略為BEST_MATCH,或BROWSER_COMPATIBILITY的話,HttpClient會在請求中攜帶由伺服器返回的Cookie。如果按照上面的寫法,手動添加了CookieStore,那麼就會在下次請求中夾帶著兩個Cookie,Cookie和Cookie2。 



如果設定為Cookie策略為預設的話,沒設定,則需要手動通過 
httpclient.setCookieStore(cookieStore); 去設定. 


第二種, 
通過Header去設定cookie,這種方法,就是今天要用的應用場景, 
我們得到一個登入的cookie,免登入訪問。 
可以用瀏覽器登入,然後f12通過console 執行document.cookie得到cookie, 
用這個cookie ,在訪問時,設定  httpGet或者httpPost的addHeader(new BasicHeader("Cookie",cookie));就可以免登入訪問。 
這種場景我用來用第一種方法,設定沒成功,可能是因為用第一種時,沒設定path,domain,expire的原因,我豬的。 



這種場景可以解決第一次登入也需要驗證碼的網站。沒有登入就沒辦法釋出或重新整理資訊。 
如趕集網。


我們直接用get方法訪問網站繞過驗證碼
package com.artsoft.demo;

import java.util.Date;

import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;

import com.artsoft.util.DownloadUtil;
import com.artsoft.util.HtmlAnalyze;

public class Dajiewang {

	public static void daJeWang() throws Exception {
		// TODO Auto-generated method stub
		DefaultHttpClient client = new DefaultHttpClient();
		HttpResponse response = null;
		System.out.println("******************************頁面轉向******************************");
		String newUrl = "http://www.dajie.com/home";
		HttpGet get = new HttpGet(newUrl);
		get.addHeader(new BasicHeader("Cookie",
				"DJ_UVID=MTQ0Njc3NTY0MTU1Mzc3MDc2; DJ_RF=empty; DJ_EU=http%3A%2F%2Fwww.dajie.com%2Fhome; login_email=764295333%40qq.com; dj_auth_v3=MW_qOtlnwl_JWoggzLsiIygjegD07-zT0hRU1DpC7Nwrsyf3qxtw-s9uPFHeds4*; uchome_loginuser=23860580; dj_cap=623eefeadd1d35d8d524c3a4c11e428f; USER_ACTION=request^AProfessional^ANORMAL^A-^A-; login_email=764295333%40qq.comHost:www.dajie.com"));
		get.addHeader("Content-Type", "text/html;charset=UTF-8");
		get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0");
		get.addHeader("Host", "www.dajie.com");
		get.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
		get.addHeader("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
		HttpResponse httpResponse = client.execute(get);
		String responseString = EntityUtils.toString(httpResponse.getEntity());
		// 登入後首頁的內容
		System.out.println(responseString);
		get.releaseConnection();

	}

	public static void Weibo(String newUrl) throws Exception {
		// TODO Auto-generated method stub
		DefaultHttpClient client = new DefaultHttpClient();
		HttpResponse response = null;
		System.out.println("******************************頁面轉向******************************");
//		String newUrl = "http://data.weibo.com/index/ajax/getchartdata?month=default&__rnd=1091324464527";
		HttpGet get = new HttpGet(newUrl);
		// get.addHeader("Content-Type", "text/html;charset=UTF-8");
		// get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64;
		// rv:26.0) Gecko/20100101 Firefox/26.0");
		// get.addHeader("Host", "data.weibo.com");
		// get.addHeader("Accept",
		// "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
		get.addHeader("Accept-Encoding", "gzip, deflate, sdch");
		get.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
		get.addHeader("Connection", "keep-alive");
		get.addHeader("Content-Type", "application/x-www-form-urlencoded");
		get.addHeader(new BasicHeader("Cookie",
				"SINAGLOBAL=8549726845230.907.1445398578667; SUHB=0sqQ0pK3WBV2gN; DATA=usrmdinst_5; _s_tentry=-; Apache=7532238222192.973.1448331434936; ULV=1448331434952:13:8:1:7532238222192.973.1448331434936:1447378860051; SUB=_2AkMhD0eLdcNhrAFZmP0SzG3rbolXzQ7wu9_0M03fZ2JCMnoQgT5nqiRotBF_DN7Dt0e6al7NzPhNs71jebD5Fh4XHuaWFWw.; SUBP=0033WrSXqPxfM72wWs9jqgMF55529P9D9WFVId20mkyG_N-5ejfVKF0s5JpV2hMcShz4SKe0eXWpMC4odcXt; login_sid_t=19644dacc1b9296d1e5bcfad125de02c; WBStore=062485857e03170e|undefined; PHPSESSID=ffiim2vvu63quisbpkga00pap3; UOR=picture.youth.cn,widget.weibo.com,static.xiaomi.cn"));

		get.addHeader("Host", "data.weibo.com");
		get.addHeader("Referer", "http://data.weibo.com/index/hotword");
		get.addHeader("User-Agent",
				"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36");
		get.addHeader("X-Requested-With", "XMLHttpRequest");
		// get.addHeader("Accept-Language",
		// "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");

		HttpResponse httpResponse = client.execute(get);
		String responseString = EntityUtils.toString(httpResponse.getEntity());
		// 登入後首頁的內容
		System.out.println(responseString);
		get.releaseConnection();

	}

	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		// DefaultHttpClient client = new DefaultHttpClient();
		// HttpResponse response=null;
		// System.out.println("******************************頁面轉向******************************");
		// String newUrl="http://www.dajie.com/home";
		// HttpGet get = new HttpGet(newUrl);
		// get.addHeader(new
		// BasicHeader("Cookie","DJ_UVID=MTQ0Njc3NTY0MTU1Mzc3MDc2; DJ_RF=empty;
		// DJ_EU=http%3A%2F%2Fwww.dajie.com%2Fhome;
		// login_email=764295333%40qq.com;
		// dj_auth_v3=MW_qOtlnwl_JWoggzLsiIygjegD07-zT0hRU1DpC7Nwrsyf3qxtw-s9uPFHeds4*;
		// uchome_loginuser=23860580; dj_cap=623eefeadd1d35d8d524c3a4c11e428f;
		// USER_ACTION=request^AProfessional^ANORMAL^A-^A-;
		// login_email=764295333%40qq.comHost:www.dajie.com"));
		// get.addHeader("Content-Type", "text/html;charset=UTF-8");
		// get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64;
		// rv:26.0) Gecko/20100101 Firefox/26.0");
		// get.addHeader("Host", "www.dajie.com");
		// get.addHeader("Accept",
		// "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
		// get.addHeader("Accept-Language",
		// "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
		// HttpResponse httpResponse= client.execute(get);
		// String responseString
		// =EntityUtils.toString(httpResponse.getEntity());
		// //登入後首頁的內容
		// System.out.println(responseString);
		// get.releaseConnection();
		String strHtml = DownloadUtil.getHtmlText("http://data.weibo.com/index/hotword?wid=1091324464527&wname=范冰冰",
				1000 * 30, "UTF-8", null, null);
		String timeDiff = HtmlAnalyze.getTagText(strHtml, "server_time': '", "'");
		System.out.println(new Date());
		System.out.println(timeDiff);
		
		   Date date = new Date(System.currentTimeMillis());
		   int s=0;
	        System.out.println(s=(int) (date.getTime()-Integer.parseInt(timeDiff)));

//		System.out.println(Integer.parseInt(timeDiff));
//		System.out.println(new Date()- new Date(Integer.parseInt(timeDiff));
		Weibo("http://data.weibo.com/index/ajax/getchartdata?month=default&__rnd="+s);

//		System.out.println(new SimpleDateFormat("yyyy-MM-dd hh:mm:ss").format(new Date(1446912627104l)));

	}

}