scrapy爬取資料之後,如何存入mysql
阿新 • • 發佈:2019-01-24
pipelines.py檔案中新建MySQLPipeline類:
# 匯入庫
from scrapy.utils.project import get_project_settings
import pymysql
# 寫入資料庫
class MySQLPipeline(object):
def connect_db(self):
# 從settings.py檔案中匯入資料庫連線需要的相關資訊
settings = get_project_settings()
self.host = settings['DB_HOST']
self.port = settings['DB_PORT' ]
self.user = settings['DB_USER']
self.password = settings['DB_PASSWORD']
self.name = settings['DB_NAME']
self.charset = settings['DB_CHARSET']
# 連線資料庫
self.conn = pymysql.connect(
host = self.host,
port = self.port,
user = self.user,
password = self.password,
db = self.name, # 資料庫名
charset = self.charset,
)
# 操作資料庫的物件
self.cursor = self.conn.cursor()
# 連線資料庫
def open_spider(self, spider):
self.connect_db()
# 關閉資料庫連線
def close_spider(self, spider):
self.cursor.close()
self.conn.close()
# 寫入資料庫
def process_item(self, item, spider):
# 寫入資料庫內容
# 這裡根據需求自行設定要寫入的欄位及值
sql = 'insert into book (title, img_url) values ("%s", "%s")' % (item['title'], item['img_url'])
# 執行sql語句
self.cursor.execute(sql)
# 需要強制提交資料,否則資料回滾之後,資料庫為空
self.conn.commit()
return item
設定settings.py檔案,開啟ITEM_PIPELINE,並設定資料庫相關資訊:
ITEM_PIPELINES = {
'dushuProject.pipelines.DushuprojectPipeline': 300,
'dushuProject.pipelines.MySQLPipeline': 200, # 優先順序設定稍靠前點
}
DB_HOST = 'localhost'
DB_PORT = 3306
DB_USER = '資料庫使用者名稱'
DB_PASSWORD = '資料庫密碼'
DB_NAME = '資料庫名'
DB_CHARSET = 'utf8'