1. 程式人生 > >Scrapy學習-7-數據存儲至數據庫

Scrapy學習-7-數據存儲至數據庫

dev install root nbsp cti titles inter object PE

使用MySQL數據庫存儲 安裝mysql模塊包
pip install mysqlclient

相關庫文件
sudo apt-get install libmysqlclient-devel

sudo apt-get install python-devel mysql-devel

阻塞型的數據寫入操作
class MysqlPipeline(object):

    def __init__(self):
        self.conn = pymysql.connect(192.168.1.1, root, 123456, titlespider
, charset=utf-8, use_unicode=True) self.cursor = self.conn.cursor() def process_item(self, item, spider): insert_sql = """ insert into article(title, cteate_time, url, content) VALUES (%s, %s, %s, %s) """ self.cursor.execute(insert_sql, (item[
title], item[cteate_time], item[url], item[content])) self.conn.commit() return item

使用twisted提供的數據庫連接池,異步化寫入,緩解寫數據操作堵塞
# 首先在settings定義數據庫關鍵字變量

    MYSQL_HOST = 192.168.1.1
    MYSQL_USER = root
    MYSQL_PASSWD = 123456
    MYSQL_DB = articlespider

# 然後編寫Pipeline類
from twisted.enterprise import adbapi import pymysql import pymysql.cursors class MysqlTwistedPipeline(object): def __init__(self, dbpool): self.dbpool = dbpool @classmethod def from_settings(cls, settings): conn_dict = dict( host = settings[MYSQL_HOST], user = settings[MYSQL_USER], passwd = settings[MYSQL_PASSWD], database = settings[MYSQL_DB], charset = utf-8, cursorclass = pymysql.cursors.DictCursor, use_unicode = True ) dbpool = adbapi.ConnectionPool(PyMySQL, **conn_dict) return cls(dbpool) def process_item(self, item, spider): query = self.dbpool.runInteraction(self.do_insert, item) query.addErrorback(self.handle_error) return item def do_insert(self, item): insert_sql = """ insert into article(title, cteate_time, url, content) VALUES (%s, %s, %s, %s) """ self.cursor.execute(insert_sql, (item[title], item[cteate_time], item[url], item[content])) self.conn.commit() def handle_error(self, failure): print(failure)

使用類似django-model的方式寫入數據庫
https://github.com/scrapy-plugins/scrapy-djangoitem

Scrapy學習-7-數據存儲至數據庫