1. 程式人生 > >pandas中的to_sql()函式將csv檔案寫入到MySQL資料庫

pandas中的to_sql()函式將csv檔案寫入到MySQL資料庫

掌握了利用pandas中的to_sql函式將csv檔案儲存到MySQL資料庫中.具體方法如下:

首先,在資料庫中建立表格,這裡是使用資料庫操作語言,並非python.

CREATE TABLE cars (
1 bigint(20) DEFAULT NULL,
0 bigint(20) DEFAULT NULL,
寶馬 text,
1.1 bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

其中,‘cars’是所要建立的表的名字。上面的程式碼是在pycharm中登入資料庫後,雙擊某個具體的資料庫開啟的編輯介面中輸入的。

然後,利用python中pandas庫中的to_sql函式。

def create_table(self, table_name, csv_filename):
engine = reate_engine(str(r’mysql+mysqldb://%s:’+’%s’+’@%s/%s?charset=utf8’)
%(self.user,self.password,self.host,self.db))
try:
data = pd.read_csv(csv_filename, sep=’,’, encoding=’utf-8’) # , sep=’\t’
data.to_sql(table_name, con=engine, if_exists=’append’, index=False)
except Exception as e:
print(e)

對於資料庫的操作來說,整體寫為一個類(宇軒教的):

-- coding: UTF-8 --

import pandas as pd
from sqlalchemy import create_engine
import pymysql
from pymysql import IntegrityError
import settings
import uuid
from sqlalchemy import create_engine

class Data_Mysql () :

def __init__ (self, MYSQL_DB = None, MYSQL_HOST = None, 
              MYSQL_USER = None, MYSQL_PASSWORD = None, mult = False,port=3306,connect_timeout = 10):
    #self.engine = create_engine('mysql://root:
[email protected]
?charset=utf8')#用sqlalchemy建立引擎 #df.to_sql('tick_data',engine,if_exists='append')#存入資料庫,這句有時候執行一次報錯,執行第二次就不報錯了,不知道為什麼 #df1 = pd.read_sql('tick_data',engine)#從資料庫中讀取表存為DataFrame #self.host = settings.MYSQL_HOST #self.user = settings.MYSQL_USER #self.password = settings.MYSQL_PASSWORD self.host = (not MYSQL_HOST and settings.MYSQL_HOST) or (MYSQL_HOST) self.user = (not MYSQL_USER and settings.MYSQL_USER) or (MYSQL_USER) self.password = (not MYSQL_PASSWORD and settings.MYSQL_PASSWORD) or (MYSQL_PASSWORD) self.db = (not MYSQL_DB and settings.MYSQL_DB) or (MYSQL_DB) self.port = port self.connect_timeout = connect_timeout def setMultMode (): #不指定連線資料庫,允許聯合多資料庫進行查詢 self.db = None mult and setMultMode() self.conn = None self.cursor = None #p = pymysql() def __enter__ (self): return self def __exit__ (self , type, value, trace): try: self.close() except: pass def read (self, table, column = '*', LIMIT = ' '): if not self.conn: self.connect () sql = "select " + str (column) + " from " + str (table) + ' ' + LIMIT print ('sql = ', sql) df = pd.read_sql (sql, self.conn)

self.close ()

    return df

def write (self, sql, values ):
    if not self.conn:
        self.connect ()
    print(sql)
    print(values)
    sta = self.cursor.execute(sql, values)
    self.conn.commit();  
    return sta;         

def connect (self):

    self.conn = pymysql.connect(host=self.host, user=self.user, 
                                password=self.password, db=self.db, 
                                charset='utf8', use_unicode=True,port = self.port,
                                connect_timeout = self.connect_timeout)
    self.cursor = self.conn.cursor ()
    pass

'''在指定的資料庫中建立表,表的內容由自己的csv檔案匯入'''

def create_table(self, table_name, csv_filename):
    engine = create_engine(str(r'mysql+mysqldb://%s:'+'%s'+'@%s/%s?charset=utf8')%(self.user,self.password,self.host,self.db))
    try:
        data = pd.read_csv(csv_filename, sep=',', encoding='utf-8') # , sep='\t'
        # df = pd.DataFrame(data)
        # print(df)
        data.to_sql(table_name, con=engine, if_exists='append', index=False)
    except Exception as e:
        print(e)
    #self.cursor.execute()

def close(self):
    self.conn.close()    

if name == “main“:

readMysql = Data_Mysql(MYSQL_DB='KnowledgeGraph', MYSQL_HOST='192.168.100.244',MYSQL_PASSWORD='[email protected]',MYSQL_USER='pcm')
readMysql.create_table('cars', 'cars.csv')

需要注意的是:data.to_sql的引數中if_exists = ‘append’的選項。因為第一步中建立了表,所以在這裡要用append. 如果換成replace, class中的建表函式會自己在資料庫中建表,但是表的格式是資料庫中特定的格式,不能解析中文字元。所以,這裡採用先建表,再append的方式。

本文是為了個人工作記錄使用,所以格式上沒有好好規範。