使用jsoup和httpclient爬資料時隱藏欄位的問題__VIEWSTATE
最近爬取一個.net網站遇到一個隱藏欄位__VIEWSTATE的問題
一開始天真的直接把火狐捕抓到的
__VIEWSTATE中的values設定進去結果一直報500,納悶了好久發現多次請求的__VIEWSTATE的值是不一樣
這下才解決了
public class Jsoup_getdocuments {
private static Map<String, String> cookies;
private static String urlSource="";
private static final String urlCookies="";
public static Map<String, String> getCookies(String username,String password){
Response res=null;
try {
res=Jsoup.connect(urlCookies)
.followRedirects(false)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0")
.data("TextBox1",username)
.data("TextBox2",password)
.data("Button1","")
.data("RadioButtonList1","學生")
.data("__VIEWSTATE","dDwxMTE4MjQwNDc1Ozs+MzFt0h81g6NGHTq1L9P2NfWUGLA=")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
.header("Accept-Encoding", "gzip, deflate")
.header("Connection","keep-alive")
.header("Upgrade-Insecure-Requests", "1")
.execute();
cookies=res.cookies();
} catch (IOException e) {
// TODO Auto-generated catch block
return cookies;
}
return cookies;
}
public static Document getSource(Map<String, String> cookies,String username){
Document elemts=null;
StringBuffer ppx = null;
if(cookies==null){
return elemts;
}
try {
elemts=Jsoup.connect(urlSource+username)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0")
//.header("Referer", "http://172.16.1.8/default2.aspx")
.cookies(cookies)
.get();
Elements elems=elemts.getElementsByTag("a");
for (Element elem : elems) {
if(elem.text().equals("學生選課情況查詢")){
ppx= new StringBuffer(elem.attr("href"));
ppx.insert(0, "http://61.142.33.204/");
break;
}
}
if(ppx==null)
{
elemts=null;
return elemts;
}
//中間請求-----------
Document res=null;
try {
res=Jsoup.connect(ppx.toString())
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
.header("Accept-Encoding", "gzip, deflate")
.header("Connection","keep-alive")
.header("Upgrade-Insecure-Requests", "1")
.header("Connection", "keep-alive")
.header("Host", "61.142.33.204")
.header("Referer", "http://61.142.33.204/xs_main.aspx?xh="+username)
.cookies(cookies)
.get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String view=res.select("input[name=__VIEWSTATE]").first().attr("value");
CloseableHttpClient client = HttpClients.createDefault();
HttpPost httpPost = new HttpPost(ppx.toString());
httpPost.setHeader("Content-type", "application/x-www-form-urlencoded");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0");
httpPost.setHeader("Connection", "keep-alive");
httpPost.setHeader("Upgrade-Insecure-Requests", "1");
httpPost.setHeader("Host", "61.142.33.204");
httpPost.setHeader("Referer", "http://61.142.33.204/xs_main.aspx?xh="+username);
httpPost.setHeader("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
httpPost.setHeader("Accept-Encoding", "gzip, deflate");
httpPost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8");
httpPost.setHeader("Cookie", "ASP.NET_SessionId="+cookies.get("ASP.NET_SessionId"));
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("__EVENTTARGET", "ddlXQ"));
nvps.add(new BasicNameValuePair("__EVENTARGUMENT", ""));
nvps.add(new BasicNameValuePair("__VIEWSTATE",view));
nvps.add(new BasicNameValuePair("ddlXN", "2018-2019"));
nvps.add(new BasicNameValuePair("ddlXQ", "1"));
httpPost.setEntity(new UrlEncodedFormEntity(nvps, "GBK"));
//執行請求操作,並拿到結果(同步阻塞)
CloseableHttpResponse response = client.execute(httpPost);
int statusCode = response.getStatusLine().getStatusCode();
//獲取結果實體
HttpEntity entity = response.getEntity();
String body=null;
if (entity != null) {
//按指定編碼轉換結果實體為String型別
body = EntityUtils.toString(entity, "GBK");
}
System.out.println(body);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return elemts;
}
}