scrapy 自定義擴充套件的功能
阿新 • • 發佈:2018-11-05
在scrapy.telnet裡有一個TelnetConsole類
class TelnetConsole(protocol.ServerFactory): def __init__(self, crawler): if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'): raise NotConfigured if not TWISTED_CONCH_AVAILABLE: raise NotConfigured self.crawler = crawler self.noisy = False self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')] self.host = crawler.settings['TELNETCONSOLE_HOST'] self.crawler.signals.connect(self.start_listening, signals.engine_started) self.crawler.signals.connect(self.stop_listening, signals.engine_stopped) @classmethod def from_crawler(cls, crawler): return cls(crawler) def start_listening(self): self.port = listen_tcp(self.portrange, self.host, self) h = self.port.getHost() logger.debug("Telnet console listening on %(host)s:%(port)d", {'host': h.host, 'port': h.port}, extra={'crawler': self.crawler}) def stop_listening(self): self.port.stopListening() def protocol(self): telnet_vars = self._get_telnet_vars() return telnet.TelnetTransport(telnet.TelnetBootstrapProtocol, insults.ServerProtocol, manhole.Manhole, telnet_vars)
可以自己擴充套件爬蟲在出現哪個訊號時做出什麼行為。
裡面有以下訊號:
engine_started = object() engine_stopped = object() spider_opened = object() spider_idle = object() spider_closed = object() spider_error = object() request_scheduled = object()#請求放入排程器 request_dropped = object()#丟棄請求 response_received = object()#響應被接收 response_downloaded = object()#響應被下載 item_scraped = object()#獲得item item_dropped = object()#丟棄item
例如我在更目錄下建立一個extend.py檔案,裡面建立一個類MyExtend,在settings中設定ITEM_PIPELINES = { 'shan.pipelines.ShanPipeline': 300, }
from scrapy import signals class MyExtend: def __init__(self, crawler): self.crawler = crawler self.crawler.signals.connect(self.start, signals.engine_started) self.crawler.signals.connect(self.stop, signals.engine_stopped) @classmethod def from_crawler(cls, crawler): return cls(crawler) def start(self): print("signals.engine_started") def stop(self): print("signals.engine_stop")
我規定在引擎開啟時列印signals.engine_started,引擎關閉時列印signals.engine_stop。
(venv) D:\shan>scrapy crawl chouti --nolog
D:\shan\shan\spiders\chouti.py:9: ScrapyDeprecationWarning: Module `scrapy.dupefilter` is deprecated, use `scrapy.dupefilters` instead
from scrapy.dupefilter import RFPDupeFilter
D:\shan\shan\spiders\chouti.py:11: ScrapyDeprecationWarning: Module `scrapy.telnet` is deprecated, use `scrapy.extensions.telnet` instead
from scrapy.telnet import TelnetConsole
signals.engine_started
{"result":{"code":"9999", "message":"", "data":{"complateReg":"0","destJid":"cdu_53923279913"}}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671818766000","lvCount":"13","nick":"Danbro","uvCount":"30","voteTime":"小於1分鐘前"}
}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819521000","lvCount":"7","nick":"Danbro","uvCount":"31","voteTime":"小於1分鐘前"}}
}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"30010", "message":"你已經推薦過了", "data":""}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819621000","lvCount":"9","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"}}
}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819634000","lvCount":"20","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"}
}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819614000","lvCount":"124","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"
}}}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819663000","lvCount":"32","nick":"Danbro","uvCount":"35","voteTime":"小於1分鐘前"}
}}
signals.engine_stop
開啟爬蟲時可以看見這兩句話。