1. 程式人生 > >scrapy-redis 自定義去重規則

scrapy-redis 自定義去重規則

pdu mes efi import ngs server xxx iter 編碼

############### xxx.py  ######

from
scrapy_redis.dupefilter import RFPDupeFilter from scrapy_redis.connection import get_redis_from_settings from scrapy_redis import defaults class RdisDupeFilter(RFPDupeFilter): @classmethod def from_settings(cls, settings): server = get_redis_from_settings(settings) key
= defaults.DUPEFILTER_KEY % {timestamp:myScrapy} debug = settings.getbool(DUPEFILTER_DEBUG) return cls(server, key=key, debug=debug)

到settings.py中配置

# ######################### scrapy redis連接 ##############
REDIS_HOST = "129.28.96.43"  #主機名
REDIS_PORT = 6379            #
端口 REDIS_PARAMS = {password:"beta"} REDIS_ENCODEING = "utf-8" #redis編碼類型 # REDIS_URL = ‘redis://user:pwd@hostname:9001‘ #連接URL 優先上面配置 DUPEFILTER_KEY = dupefilter:%(timestamp)s # DUPEFLITER_CLASS = ‘scrapy_redis.dupefilter.RFPDupeFilter‘ DUPEFLITER_CLASS = myscrapy.xxx.RedisDupeFilter

scrapy-redis 自定義去重規則