多執行緒 下載圖片
阿新 • • 發佈:2018-12-22
# coding=utf-8 import requests from lxml import etree # 執行緒庫 from concurrent import futures def get_url(url): headers = { 'Host': 'www.doutula.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', 'Referer': 'http://www.doutula.com' } response = requests.get(url, headers=headers) response = etree.HTML(response.text) res = response.xpath('//img[@class="lazy image_dtb img-responsive"]/@data-original') # 開啟執行緒池, 呼叫下載函式,傳遞url ex = futures.ThreadPoolExecutor(max_workers=40) for re in res: ex.submit(download_url, re) print(re) def download_url(re): # 請求圖片,以連結字尾命名,圖片儲存需要先建立imgs資料夾 page = requests.get(re) name = re.split('/')[-1] with open('imgs/' + name, 'wb') as f: f.write(page.content) if __name__ == '__main__': urls = ['http://www.doutula.com/article/list/?page={}'.format(str(i)) for i in range(1, 10)] for url in urls: get_url(url)