Python3.6實現scrapy框架爬取資料並將資料插入MySQL與存入文件中
阿新 • • 發佈:2019-01-01
# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html import pymysql ####注意一定要匯入配置,因為資料庫的一些連線資訊寫在settings檔案裡的 #此類是把資訊寫入文件,寫入時末尾都加了一個逗號,是為了資料的觀看與直觀性 #也方便以後用mysql語言直接匯入資料 from scrapy.conf import settings # class GamerankPipeline(object): # def process_item(self, item, spider): # with open('games_list.txt','a',encoding='utf-8') as f: # f.write(item['rank']+',') # f.write(item['game']+',') # f.write(item['type']+',') # f.write(item['status']+',') # f.write(item['hot']) # f.write('\n') #下面是將爬取到的資訊插入到MySQL資料庫中 class Gamerank_Pipeline(object): def process_item(self, item, spider): host = settings['MYSQL_HOSTS'] user = settings['MYSQL_USER'] psd = settings['MYSQL_PASSWORD'] db = settings['MYSQL_DB'] c=settings['CHARSET'] port=settings['MYSQL_PORT'] #資料庫連線 con=pymysql.connect(host=host,user=user,passwd=psd,db=db,charset=c,port=port) #資料庫遊標 cue=con.cursor() print("mysql connect succes")#測試語句,這在程式執行時非常有效的理解程式是否執行到這一步 #sql="insert into gamerank (rank,g_name,g_type,g_status,g_hot) values(%s,%s,%s,%s,%s)" % (item['rank'],item['game'],item['type'],item['status'],item['hot']) try: cue.execute("insert into gamerank (rank,g_name,g_type,g_status,g_hot) values(%s,%s,%s,%s,%s)",[ item['rank'],item['game'],item['type'],item['status'],item['hot'] ]) print("insert success")#測試語句 except Exception as e: print('Insert error:',e) con.rollback() else: con.commit() con.close() return item