利用Python資料分析:資料載入、儲存與檔案格式(一)
阿新 • • 發佈:2019-01-11
import pandas as pd from pandas import DataFrame,Series import sys import numpy as np df = pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex1.csv') df pd.read_table('E:\Python for Data Analysis\pydata-book-master\ch06\ex1.csv',sep =',') # read_table需要指定分隔符 pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex2.csv',header = None) # 讀取沒有標題行的 pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex2.csv',names=['a','b','c','d','message']) # 指定列名 names = ['a','b','c','d','message'] pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex2.csv',names= names,index_col='message') #把message列作為索引 parsed = pd.read_csv('pydata-book-master/ch06/csv_mindex.csv',index_col=['key1','key2']) # 層次化的索引 parsed # 使用正則表示式作為read_table的分隔符 list(open('pydata-book-master/ch06/ex6.csv')) result = pd.read_table('pydata-book-master/ch06/ex6.csv',sep='\s+') result pd.read_csv('pydata-book-master/ch06/ex4.csv',skiprows=[0,2,3]) # 跳過指定的行 result = pd.read_csv('pydata-book-master/ch06/ex5.csv') result result = pd.read_csv('pydata-book-master/ch06/ex5.csv',na_values=['NULL']) result # 使用一個字典為各列指定不同的NA標記的值 sentinels = {'message':['foo','NA'],'someting':['two']} pd.read_csv('pydata-book-master/ch06/ex5.csv',na_values=sentinels) #逐塊讀取文字檔案 result = pd.read_csv('pydata-book-master/ch06/ex6.csv') result pd.read_csv('pydata-book-master/ch06/ex5.csv',nrows=5) # 只讀取前五行 chunker = pd.read_csv('pydata-book-master/ch06/ex5.csv',chunksize=1000) #分成1000塊 chunker tot = Series([]) # 定義一個Series來儲存後面的數字 for piece in chunker : tot = tot.add(piece['key'].value_counts(),fill_value = 0) tot = tot.order(ascending=False) tot[:10] # 將資料寫出到文字格式 data = pd.read_csv('pydata-book-master/ch06/ex5.csv') data data.to_csv('pydata-book-master/ch06/out.csv') data.to_csv(sys.stdout,sep='|') #以指定的分隔符輸出 data.to_csv(sys.stdout,na_rep='NULL') # 缺失值在結果中表示為空白符,也可以自己指定對應的符號 # 不輸出行列的標籤data.to_csv(sys.stdout,index=False,header=False) data.to_csv(sys.stdout,index=False,cols=['a','b','c']) # 只寫出一部分列並且指定列排序 dates = pd.date_range('1/1/2000',periods=7) ts = Series(np.arange(7),index=dates) ts.to_csv('pydata-book-master/ch06/tseries.csv') # Series to_csv方法