1. 程式人生 > >利用Python資料分析:資料載入、儲存與檔案格式(一)

利用Python資料分析:資料載入、儲存與檔案格式(一)

import pandas as pd
from pandas import DataFrame,Series
import sys
import numpy as np
df = pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex1.csv')
df
pd.read_table('E:\Python for Data Analysis\pydata-book-master\ch06\ex1.csv',sep =',') # read_table需要指定分隔符
pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex2.csv',header = None) # 讀取沒有標題行的
pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex2.csv',names=['a','b','c','d','message'])
# 指定列名
names = ['a','b','c','d','message']
pd.read_csv('E:\Python for Data Analysis\pydata-book-master\ch06\ex2.csv',names= names,index_col='message')
#把message列作為索引
parsed = pd.read_csv('pydata-book-master/ch06/csv_mindex.csv',index_col=['key1','key2'])
# 層次化的索引
parsed
# 使用正則表示式作為read_table的分隔符
list(open('pydata-book-master/ch06/ex6.csv'))
result = pd.read_table('pydata-book-master/ch06/ex6.csv',sep='\s+')
result
pd.read_csv('pydata-book-master/ch06/ex4.csv',skiprows=[0,2,3]) # 跳過指定的行
result = pd.read_csv('pydata-book-master/ch06/ex5.csv')
result
result = pd.read_csv('pydata-book-master/ch06/ex5.csv',na_values=['NULL'])
result
# 使用一個字典為各列指定不同的NA標記的值
sentinels = {'message':['foo','NA'],'someting':['two']}
pd.read_csv('pydata-book-master/ch06/ex5.csv',na_values=sentinels)
#逐塊讀取文字檔案
result = pd.read_csv('pydata-book-master/ch06/ex6.csv')
result
pd.read_csv('pydata-book-master/ch06/ex5.csv',nrows=5) # 只讀取前五行
chunker = pd.read_csv('pydata-book-master/ch06/ex5.csv',chunksize=1000) #分成1000塊
chunker
tot = Series([]) # 定義一個Series來儲存後面的數字
for piece in chunker :
    tot = tot.add(piece['key'].value_counts(),fill_value = 0)
tot = tot.order(ascending=False)
tot[:10]
# 將資料寫出到文字格式
data = pd.read_csv('pydata-book-master/ch06/ex5.csv')
data
data.to_csv('pydata-book-master/ch06/out.csv')
data.to_csv(sys.stdout,sep='|') #以指定的分隔符輸出
data.to_csv(sys.stdout,na_rep='NULL') # 缺失值在結果中表示為空白符,也可以自己指定對應的符號
# 不輸出行列的標籤data.to_csv(sys.stdout,index=False,header=False)
data.to_csv(sys.stdout,index=False,cols=['a','b','c']) # 只寫出一部分列並且指定列排序
dates = pd.date_range('1/1/2000',periods=7)
ts = Series(np.arange(7),index=dates)
ts.to_csv('pydata-book-master/ch06/tseries.csv') # Series to_csv方法