1. 程式人生 > >python爬取貓眼電影 top 100 儲存到CSV

python爬取貓眼電影 top 100 儲存到CSV

開啟是個執行緒,把迴圈事件匯入到執行緒中。

import threading
from bs4 import BeautifulSoup
import requests, csv
from lxml import etree


with open('data.csv', 'a', newline='') as f:
    spamwriter = csv.writer(f)
    spamwriter.writerow(['title', 'star', 'date', 'score'])

class Crawler(threading.Thread):

    def __init__
(self, page): super().__init__() self.page = page def run(self): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)' ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } url = 'http://maoyan.com/board/4?offset={}'
.format(10 * self.page) response = requests.get(url, headers=headers) html = etree.HTML(response.text) results = html.xpath('//*[@class="board-wrapper"]/dd/div/div') for result in results: # 電影名稱 電影主演 電影上映日期 評分 ws = [ result.xpath('./div[1]/p[1]/a/text()'
)[0], result.xpath('./div[1]/p[2]/text()')[0].strip(), result.xpath('./div[1]/p[3]/text()')[0], result.xpath('./div[2]/p/i[1]/text()')[0] + result.xpath('./div[2]/p/i[2]/text()')[0], ] print(ws) #儲存到CSV with open('data.csv','a',newline='') as f: writer = csv.writer(f) writer.writerow(ws) if __name__ == '__main__': for page in range(10): th = Crawler(page) th.start()