scrapy MysqlPipeline 同步和異步
阿新 • • 發佈:2017-05-14
python scrapy
import MySQLdb import MySQLdb.cursors class MysqlPipeline(object): #采用同步的機制寫入mysql def __init__(self): self.conn = MySQLdb.connect(‘192.168.0.106‘, ‘root‘, ‘root‘, ‘article_spider‘, charset="utf8", use_unicode=True) self.cursor = self.conn.cursor() def process_item(self, item, spider): insert_sql = """ insert into jobbole_article(title, url, create_date, fav_nums) VALUES (%s, %s, %s, %s) """ self.cursor.execute(insert_sql, (item["title"], item["url"], item["create_date"], item["fav_nums"])) self.conn.commit() class MysqlTwistedPipline(object): def __init__(self, dbpool): self.dbpool = dbpool @classmethod def from_settings(cls, settings): dbparms = dict( host = settings["MYSQL_HOST"], db = settings["MYSQL_DBNAME"], user = settings["MYSQL_USER"], passwd = settings["MYSQL_PASSWORD"], charset=‘utf8‘, cursorclass=MySQLdb.cursors.DictCursor, use_unicode=True, ) dbpool = adbapi.ConnectionPool("MySQLdb", **dbparms) return cls(dbpool) def process_item(self, item, spider): #使用twisted將mysql插入變成異步執行 query = self.dbpool.runInteraction(self.do_insert, item) query.addErrback(self.handle_error, item, spider) #處理異常 def handle_error(self, failure, item, spider): # 處理異步插入的異常 print (failure) def do_insert(self, cursor, item): #執行具體的插入 #根據不同的item 構建不同的sql語句並插入到mysql中 insert_sql, params = item.get_insert_sql() print (insert_sql, params) cursor.execute(insert_sql, params)
scrapy MysqlPipeline 同步和異步