1. 程式人生 > >多執行緒 下載圖片

多執行緒 下載圖片

# coding=utf-8
import requests
from lxml import etree
# 執行緒庫
from concurrent import futures


def get_url(url):
    headers = {
        'Host': 'www.doutula.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
        'Referer': 'http://www.doutula.com'
    }
    response = requests.get(url, headers=headers)
    response = etree.HTML(response.text)
    res = response.xpath('//img[@class="lazy image_dtb img-responsive"]/@data-original')
    # 開啟執行緒池, 呼叫下載函式,傳遞url
    ex = futures.ThreadPoolExecutor(max_workers=40)
    for re in res:
        ex.submit(download_url, re)
        print(re)


def download_url(re):
    # 請求圖片,以連結字尾命名,圖片儲存需要先建立imgs資料夾
    page = requests.get(re)
    name = re.split('/')[-1]
    with open('imgs/' + name, 'wb') as f:
        f.write(page.content)


if __name__ == '__main__':
    urls = ['http://www.doutula.com/article/list/?page={}'.format(str(i)) for i in range(1, 10)]
    for url in urls:
        get_url(url)