1. 程式人生 > >Python3.6實現scrapy框架爬取資料並將資料插入MySQL與存入文件中

Python3.6實現scrapy框架爬取資料並將資料插入MySQL與存入文件中

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import pymysql

####注意一定要匯入配置,因為資料庫的一些連線資訊寫在settings檔案裡的
#此類是把資訊寫入文件,寫入時末尾都加了一個逗號,是為了資料的觀看與直觀性
#也方便以後用mysql語言直接匯入資料
from scrapy.conf import settings

# class GamerankPipeline(object):
#     def process_item(self, item, spider):
#         with open('games_list.txt','a',encoding='utf-8') as f:
#         	f.write(item['rank']+',')
#         	f.write(item['game']+',')
#         	f.write(item['type']+',')
#         	f.write(item['status']+',')
#         	f.write(item['hot'])
#         	f.write('\n')


#下面是將爬取到的資訊插入到MySQL資料庫中
class Gamerank_Pipeline(object):
	 def process_item(self, item, spider):
	 	host = settings['MYSQL_HOSTS']
	 	user = settings['MYSQL_USER']
	 	psd = settings['MYSQL_PASSWORD']
	 	db = settings['MYSQL_DB']
	 	c=settings['CHARSET']
	 	port=settings['MYSQL_PORT']
#資料庫連線
	 	con=pymysql.connect(host=host,user=user,passwd=psd,db=db,charset=c,port=port)
#資料庫遊標
	 	cue=con.cursor()
	 	print("mysql connect succes")#測試語句,這在程式執行時非常有效的理解程式是否執行到這一步
	 	#sql="insert into gamerank (rank,g_name,g_type,g_status,g_hot) values(%s,%s,%s,%s,%s)" % (item['rank'],item['game'],item['type'],item['status'],item['hot'])
	 	try:
	 		cue.execute("insert into gamerank (rank,g_name,g_type,g_status,g_hot) values(%s,%s,%s,%s,%s)",[ item['rank'],item['game'],item['type'],item['status'],item['hot'] ])
	 		print("insert success")#測試語句
	 	except Exception as e:
	 		print('Insert error:',e)
	 		con.rollback()
	 	else:
	 		con.commit()
	 	con.close()
	 	return item