1. 程式人生 > >scrapy 自定義擴充套件的功能

scrapy 自定義擴充套件的功能

scrapy.telnet裡有一個TelnetConsole類

class TelnetConsole(protocol.ServerFactory):

    def __init__(self, crawler):
        if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
            raise NotConfigured
        if not TWISTED_CONCH_AVAILABLE:
            raise NotConfigured
        self.crawler = crawler
        self.noisy = False
        self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')]
        self.host = crawler.settings['TELNETCONSOLE_HOST']
        self.crawler.signals.connect(self.start_listening, signals.engine_started)
        self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    def start_listening(self):
        self.port = listen_tcp(self.portrange, self.host, self)
        h = self.port.getHost()
        logger.debug("Telnet console listening on %(host)s:%(port)d",
                     {'host': h.host, 'port': h.port},
                     extra={'crawler': self.crawler})

    def stop_listening(self):
        self.port.stopListening()

    def protocol(self):
        telnet_vars = self._get_telnet_vars()
        return telnet.TelnetTransport(telnet.TelnetBootstrapProtocol,
            insults.ServerProtocol, manhole.Manhole, telnet_vars)

可以自己擴充套件爬蟲在出現哪個訊號時做出什麼行為。

裡面有以下訊號:

engine_started = object()
engine_stopped = object()
spider_opened = object()
spider_idle = object()
spider_closed = object()
spider_error = object()
request_scheduled = object()#請求放入排程器
request_dropped = object()#丟棄請求
response_received = object()#響應被接收
response_downloaded = object()#響應被下載
item_scraped = object()#獲得item
item_dropped = object()#丟棄item

例如我在更目錄下建立一個extend.py檔案,裡面建立一個類MyExtend,在settings中設定ITEM_PIPELINES = { 'shan.pipelines.ShanPipeline': 300, }

from scrapy import signals


class MyExtend:

    def __init__(self, crawler):
        self.crawler = crawler
        self.crawler.signals.connect(self.start, signals.engine_started)
        self.crawler.signals.connect(self.stop, signals.engine_stopped)
    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)


    def start(self):
        print("signals.engine_started")

    def stop(self):
        print("signals.engine_stop")

我規定在引擎開啟時列印signals.engine_started,引擎關閉時列印signals.engine_stop。

(venv) D:\shan>scrapy crawl chouti --nolog
D:\shan\shan\spiders\chouti.py:9: ScrapyDeprecationWarning: Module `scrapy.dupefilter` is deprecated, use `scrapy.dupefilters` instead
  from scrapy.dupefilter import RFPDupeFilter
D:\shan\shan\spiders\chouti.py:11: ScrapyDeprecationWarning: Module `scrapy.telnet` is deprecated, use `scrapy.extensions.telnet` instead
  from scrapy.telnet import TelnetConsole
signals.engine_started
{"result":{"code":"9999", "message":"", "data":{"complateReg":"0","destJid":"cdu_53923279913"}}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671818766000","lvCount":"13","nick":"Danbro","uvCount":"30","voteTime":"小於1分鐘前"}
}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819521000","lvCount":"7","nick":"Danbro","uvCount":"31","voteTime":"小於1分鐘前"}}
}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819621000","lvCount":"9","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"}}
}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819634000","lvCount":"20","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"}
}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819614000","lvCount":"124","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"
}}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819663000","lvCount":"32","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"}
}}
signals.engine_stop

開啟爬蟲時可以看見這兩句話。