1. 程式人生 > >爬取糗事百科案例

爬取糗事百科案例

from random import choice
import requests
import  re
user_agents=[
    "User-Agent:Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
    "User-Agent:Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
] headers={ "User-Agent":choice(user_agents) } url="https://www.qiushibaike.com/text/page/1" response=requests.get(url,headers=headers) info=response.text infos=re.findall(r'<div class="content">\s*<span>\s*(.+)\s*</span>',info)//返回結果是列表,\s*是匹配換行 with open('smile.txt','w',encoding="utf-8"
) as f: for info in infos: f.write(info + "\n\n")