Python :爬取糗事百科段子
阿新 • • 發佈:2018-11-17
原始碼:
import urllib import random def JokeSet(Url,UserAgent) ''' Url : 動態url網址 UserAgent :動態請求頭 ''' #設定請求頭 Headers ={ "User-Agent" : UserAgent } #設定請求體 req = urllib.request.Request(Url,headers = Headers) response = urllib.request.urlopen(req) data = response.read().decode("utf-8") #爬取的資料寫入檔案中 path = r"D:\pythonItem\爬蟲Practice\糗事百科.html" with open(path,"w",encode="utf-8") as f : f.write(data) for i in range(1,11) : #獲取url頁碼,爬取的數量可以調整 url = r"https://www.qiushibaike.com/text/page/"+str(2)+"/" HeadersList =[ [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69." ] #隨機產生列表中的請求頭,預防被封ip UserAgent = random.choice(HeadersList) try : JokeSet(url,UserAgent) print("第{}次爬取成功".format(i)) except : print("爬取失敗”)
#Html資料處理後續更新