1. 程式人生 > >Python :爬取糗事百科段子

Python :爬取糗事百科段子

原始碼:

import urllib
import random
def  JokeSet(Url,UserAgent)
'''
Url  : 動態url網址
UserAgent :動態請求頭
'''
#設定請求頭
Headers ={
"User-Agent" : UserAgent
}
#設定請求體
req = urllib.request.Request(Url,headers = Headers)
response = urllib.request.urlopen(req)
data = response.read().decode("utf-8")
#爬取的資料寫入檔案中
path = r"D:\pythonItem\爬蟲Practice\糗事百科.html"
with open(path,"w",encode="utf-8") as f :
		f.write(data)
for i in range(1,11) :
#獲取url頁碼,爬取的數量可以調整
	url = r"https://www.qiushibaike.com/text/page/"+str(2)+"/"
	HeadersList =[
	[
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69."
   ]
   #隨機產生列表中的請求頭,預防被封ip
   UserAgent = random.choice(HeadersList)
   try :
	   JokeSet(url,UserAgent)
	   print("第{}次爬取成功".format(i))
   except :
   		print("爬取失敗”)

#Html資料處理後續更新