1. 程式人生 > >scrapy中自定義下載中介軟體設定動態User-Agent和代理ip

scrapy中自定義下載中介軟體設定動態User-Agent和代理ip

'''
scrapy 自定義下載中介軟體
動態設定User-Agent
'''

import random

class RandomUserAgent:
    def __init__(self, agents):
        self.agents = agents

    @classmethod
    def from_crawler(cls, crawler):
        # 從Settings中載入USER_AGENTS的值
        return cls(crawler.settings.getlist('USER_AGENTS'))

    def process_request(self, request, spider):
        # 在process_request中設定User-Agent的值
        request.headers.setdefault('User-Agent', random.choice(self.agents))


'''
動態設定代理ip
'''
class RandomProxy:
    def __init__(self, iplist):
        self.iplist = iplist

    @classmethod
    def from_crawler(cls, crawler):
        # 載入IPLIST
        return cls(crawler.settings.getlist('IPLIST'))

    def process_request(self, request, spider):
        proxy = random.choice(self.iplist)
        request.meta['proxy'] = proxy

# 在settings中設定USER_AGENTS和IPLIST,並激活該中介軟體。