1. 程式人生 > >scrapy爬取資料之後,如何存入mysql

scrapy爬取資料之後,如何存入mysql

pipelines.py檔案中新建MySQLPipeline類:

# 匯入庫
from scrapy.utils.project import get_project_settings
import pymysql

# 寫入資料庫
class MySQLPipeline(object):
    def connect_db(self):
        # 從settings.py檔案中匯入資料庫連線需要的相關資訊
        settings = get_project_settings()

        self.host = settings['DB_HOST']
        self.port = settings['DB_PORT'
] self.user = settings['DB_USER'] self.password = settings['DB_PASSWORD'] self.name = settings['DB_NAME'] self.charset = settings['DB_CHARSET'] # 連線資料庫 self.conn = pymysql.connect( host = self.host, port = self.port, user = self.user, password = self.password, db = self.name, # 資料庫名
charset = self.charset, ) # 操作資料庫的物件 self.cursor = self.conn.cursor() # 連線資料庫 def open_spider(self, spider): self.connect_db() # 關閉資料庫連線 def close_spider(self, spider): self.cursor.close() self.conn.close() # 寫入資料庫 def
process_item(self, item, spider):
# 寫入資料庫內容 # 這裡根據需求自行設定要寫入的欄位及值 sql = 'insert into book (title, img_url) values ("%s", "%s")' % (item['title'], item['img_url']) # 執行sql語句 self.cursor.execute(sql) # 需要強制提交資料,否則資料回滾之後,資料庫為空 self.conn.commit() return item

設定settings.py檔案,開啟ITEM_PIPELINE,並設定資料庫相關資訊:

ITEM_PIPELINES = {
   'dushuProject.pipelines.DushuprojectPipeline': 300,
   'dushuProject.pipelines.MySQLPipeline': 200,  # 優先順序設定稍靠前點
}

DB_HOST = 'localhost'
DB_PORT = 3306
DB_USER = '資料庫使用者名稱'
DB_PASSWORD = '資料庫密碼'
DB_NAME = '資料庫名'
DB_CHARSET = 'utf8'