pandas中的to_sql()函式將csv檔案寫入到MySQL資料庫
掌握了利用pandas中的to_sql函式將csv檔案儲存到MySQL資料庫中.具體方法如下:
首先,在資料庫中建立表格,這裡是使用資料庫操作語言,並非python.
CREATE TABLE cars
(
1
bigint(20) DEFAULT NULL,
0
bigint(20) DEFAULT NULL,
寶馬
text,
1.1
bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
其中,‘cars’是所要建立的表的名字。上面的程式碼是在pycharm中登入資料庫後,雙擊某個具體的資料庫開啟的編輯介面中輸入的。
然後,利用python中pandas庫中的to_sql函式。
def create_table(self, table_name, csv_filename):
engine = reate_engine(str(r’mysql+mysqldb://%s:’+’%s’+’@%s/%s?charset=utf8’)
%(self.user,self.password,self.host,self.db))
try:
data = pd.read_csv(csv_filename, sep=’,’, encoding=’utf-8’) # , sep=’\t’
data.to_sql(table_name, con=engine, if_exists=’append’, index=False)
except Exception as e:
print(e)
對於資料庫的操作來說,整體寫為一個類(宇軒教的):
-- coding: UTF-8 --
import pandas as pd
from sqlalchemy import create_engine
import pymysql
from pymysql import IntegrityError
import settings
import uuid
from sqlalchemy import create_engine
class Data_Mysql () :
def __init__ (self, MYSQL_DB = None, MYSQL_HOST = None,
MYSQL_USER = None, MYSQL_PASSWORD = None, mult = False,port=3306,connect_timeout = 10):
#self.engine = create_engine('mysql://root: [email protected]?charset=utf8')#用sqlalchemy建立引擎
#df.to_sql('tick_data',engine,if_exists='append')#存入資料庫,這句有時候執行一次報錯,執行第二次就不報錯了,不知道為什麼
#df1 = pd.read_sql('tick_data',engine)#從資料庫中讀取表存為DataFrame
#self.host = settings.MYSQL_HOST
#self.user = settings.MYSQL_USER
#self.password = settings.MYSQL_PASSWORD
self.host = (not MYSQL_HOST and settings.MYSQL_HOST) or (MYSQL_HOST)
self.user = (not MYSQL_USER and settings.MYSQL_USER) or (MYSQL_USER)
self.password = (not MYSQL_PASSWORD and settings.MYSQL_PASSWORD) or (MYSQL_PASSWORD)
self.db = (not MYSQL_DB and settings.MYSQL_DB) or (MYSQL_DB)
self.port = port
self.connect_timeout = connect_timeout
def setMultMode ():
#不指定連線資料庫,允許聯合多資料庫進行查詢
self.db = None
mult and setMultMode()
self.conn = None
self.cursor = None
#p = pymysql()
def __enter__ (self):
return self
def __exit__ (self , type, value, trace):
try:
self.close()
except:
pass
def read (self, table, column = '*', LIMIT = ' '):
if not self.conn:
self.connect ()
sql = "select " + str (column) + " from " + str (table) + ' ' + LIMIT
print ('sql = ', sql)
df = pd.read_sql (sql, self.conn)
self.close ()
return df
def write (self, sql, values ):
if not self.conn:
self.connect ()
print(sql)
print(values)
sta = self.cursor.execute(sql, values)
self.conn.commit();
return sta;
def connect (self):
self.conn = pymysql.connect(host=self.host, user=self.user,
password=self.password, db=self.db,
charset='utf8', use_unicode=True,port = self.port,
connect_timeout = self.connect_timeout)
self.cursor = self.conn.cursor ()
pass
'''在指定的資料庫中建立表,表的內容由自己的csv檔案匯入'''
def create_table(self, table_name, csv_filename):
engine = create_engine(str(r'mysql+mysqldb://%s:'+'%s'+'@%s/%s?charset=utf8')%(self.user,self.password,self.host,self.db))
try:
data = pd.read_csv(csv_filename, sep=',', encoding='utf-8') # , sep='\t'
# df = pd.DataFrame(data)
# print(df)
data.to_sql(table_name, con=engine, if_exists='append', index=False)
except Exception as e:
print(e)
#self.cursor.execute()
def close(self):
self.conn.close()
if name == “main“:
readMysql = Data_Mysql(MYSQL_DB='KnowledgeGraph', MYSQL_HOST='192.168.100.244',MYSQL_PASSWORD='[email protected]',MYSQL_USER='pcm')
readMysql.create_table('cars', 'cars.csv')
需要注意的是:data.to_sql的引數中if_exists = ‘append’的選項。因為第一步中建立了表,所以在這裡要用append. 如果換成replace, class中的建表函式會自己在資料庫中建表,但是表的格式是資料庫中特定的格式,不能解析中文字元。所以,這裡採用先建表,再append的方式。
本文是為了個人工作記錄使用,所以格式上沒有好好規範。