1. 程式人生 > >一個可以獲取知乎timeline的爬蟲

一個可以獲取知乎timeline的爬蟲

name global targe cookie chrome lib int htm json

# -*- coding: utf-8 -*-
import requests
import lxml
import os,time
from bs4 import BeautifulSoup as sb
try:
    import cookielib

except:
    import http.cookiejar as cookielib
import json

headers = {
        "Host": "www.zhihu.com",
        "Accept-Language":"zh-CN,zh;q=0.8",
        "accept":"application/json, text/plain, */*
", "Referer": "https://www.zhihu.com/", "Connection":"keep-alive", User-Agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36, "authorization" : "Bearer Mi4xUXJGd0FBQUFBQUFBa0VKNTBfbnVDeGNBQUFCaEFsVk5OQmZMV1FCVnQ3aEhfeUVsUElGN1Zrd3RSSWpMdHI0ZG5B|1503889972|a235d0e24d646c5df6b1f667abc005381c273870
" } def get_session(): session = requests.session() session.cookies = cookielib.LWPCookieJar(filename="cookies") try: session.cookies.load() print("cookie 加載成功!") except: print("cookie 無法加載...") return session session = get_session() data = {"action
":"True", "limit":"10", "session_token":"c9c3581148b6d633275ba5d4412d3bd8", "action":"down", "after_id":"0", "desktop":"true" } def get_data(): res = session.get("https://www.zhihu.com/api/v3/feed/topstory", data=data, headers=headers) json = res.json() global count for i in json[data]: try: print(i[target][question][title]) except: print(沒有問題了+str(i)) try: print(i[target][content]) except: print(找不到答案了+str(i)) count += 1 print() count = 0 for n in range(5): data["after_id"] = n*10 get_data() time.sleep(3) print(count)

一個可以獲取知乎timeline的爬蟲