爬蟲爬取知乎登陸後首頁
阿新 • • 發佈:2018-12-12
package zhihu;
import java.io.IOException; import java.util.HashMap; import java.util.Map;
import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document;
public class ZhiHuSpider {
public static void main(String[] args) {
try {
Map<String, String> cookies = null;
cookies = new HashMap<String, String>();
//使用cookies.put(“key”,“value”)方式新增獲取的首頁cookie
connection.cookies(cookies);
connection.userAgent(“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/”
+ “537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36”);
Document document = connection.get();
System.out.println(document.html());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
這樣就能獲得首頁的全部資料(解析暫時沒做)