1. 程式人生 > >Python中Pandas的相關使用介紹(三)

Python中Pandas的相關使用介紹(三)

本篇介紹以下幾個知識點:
(1)利用pandas讀取檔案
(2)利用concat合併檔案
(3)利用append合併檔案
(4)利用append合併DataFrame與Series
(5)利用merge合併DataFrame
(6)merge處理重疊區域,handle overlapping

(1)利用pandas讀取檔案
1.1 沒有指定檔案的路徑

import pandas as pd
import numpy as np

data = pd.read_csv('zuobiao.csv') #read_csv可以讀取txt檔案,也可以讀取excel的csv格式。
print
(data)

1.2 指定檔案的具體路徑

import pandas as pd
import numpy as np

data = pd.read_csv('F:/Python/poem.txt') #指定了檔案的具體路徑,注意斜槓的形式。
print(data)

(2)利用pandas的concat合併檔案
2.1 concat的ignore_index元素

import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3
,4))*1,columns = ['a','b','c','d']) df3 = pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d']) print(df1,"\n",df2,'\n',df3) #合併 res1 = pd.concat([df1,df2,df3],axis = 0) #axis = 1是橫向合併 print(res1) res2 = pd.concat([df1,df2,df3],axis = 0,ignore_index = True) #ignore_index = True意思為:索引按順序來 print(res2)

2.2 concat的join元素

import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])


print(df1)
print("\n"*2)
print(df2)
res = pd.concat([df1,df2],join = 'inner',ignore_index = True) #outer
print(res)

2.3 concat的join_axes元素

import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
res = pd.concat([df1,df2],axis = 1,join_axes = [df1.index])##不要join_axes

print(df1)
print(df2)
print("\n"*2)
print(res)

(3)利用pandas的append合併檔案

import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
res = df1.append(df2,ignore_index = True)

print(df1)
print(df2)
print("\n"*2)
print(res)

append直接在第一個的結尾附加第二個

(4)利用append合併DataFrame與Series

import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
s1 = pd.Series([1,2,3,4],index = ['a','b','c','d'])
res = df1.append(s1,ignore_index = True)

print(df1)
print(s1)
print("\n"*2)
print(res)

(5)利用merge合併DataFrame

left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})

right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'C':['C0','C1','C2','C3'],
                     'D':['D0','D1','D2','D3']})
res = pd.merge(left,right,on = 'key')

print(left)
print(right)
print("\n"*2)
print(res)
import numpy as np
import pandas as pd

left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})

right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'C':['C0','C1','C2','C3'],
                     'D':['D0','D1','D2','D3']})
res = pd.merge(left,right,on = 'key')

##res = pd.merge(left,right,on = ['key1','key2']) ##只考慮相同的值,交集
##print(res)

##res = pd.merge(left,right,on = ['key1','key2'],how = 'outer') ##不論相同與否,合併兩個key,並集。不同的用nan
##print(res)

##res = pd.merge(left,right,on = ['key1','key2'],how = 'right') #基於right
##print(res)

print(left)
print(right)
print("\n"*2)
print(res)

merge的indicator元素

import numpy as np
import pandas as pd

df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
res = pd.merge(df1,df2,on = 'col1',how = 'outer',indicator = 'df')#indicator = True

print(df1)
print(df2)
print("\n"*2)
print(res)

merged by index

import numpy as np
import pandas as pd

left = pd.DataFrame({'A':['A0','A1','A2'],
                     'B':['B0','B1','B2']},
                    index = ['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C1','C2'],
                     'D':['D0','D1','D2']},
                    index = ['K0','K1','K2'])

res = pd.merge(left,right,left_index = True,right_index = True,how = 'outer')
#res = pd.merge(left,right,left_index = True,right_index = True,how = 'inner')

print(left)
print(right)
print("\n"*2)
print(res)

(6)處理重疊區域,handle overlapping

import numpy as np
import pandas as pd

#handle overlapping
boys =  pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls =  pd.DataFrame({'k':['K0','K1','K2'],'age':[4,5,6]})
res = pd.merge(boys,girls,on = 'k',suffixes = ['_boy','_girl'],how = 'inner') #outer
#兩個都有k,合併後有一個公共的k

print(boys)
print(girls)
print("\n"*2)
print(res)

附加本次學習的所有原始程式碼:

'''
import pandas as pd
import numpy as np

data = pd.read_csv('zuobiao.csv')
print(data)

data.to_pickle('student.pickle')


#合併DataFrame,concatenaing
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])

#print(df1,"\n",df2,'\n',df3)

#上下合併
res = pd.concat([df1,df2,df3],axis = 0) #1是橫向
##print(res)
##
##res2 = pd.concat([df1,df2,df3],axis = 0,ignore_index = True) #0是橫向
##print(res2)


# join,['inner','outer']

##df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
##df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
##res = pd.concat([df1,df2],join = 'inner',ignore_index = True) #outer
##print(res)


## join_axes
##df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
##df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
##res = pd.concat([df1,df2],axis = 1,join_axes = [df1.index])##不要join_axes
##print(res)


##append
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
res = df1.append(df2,ignore_index = True)
print(res)


##DataFrame加Series
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
s1 = pd.Series([1,2,3,4],index = ['a','b','c','d'])
res = df1.append(s1,ignore_index = True)
print(res)

'''





# 合併merge
import pandas as pd
import numpy as np

'''
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})

right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'C':['C0','C1','C2','C3'],
                     'D':['D0','D1','D2','D3']})

print(left)
print(right)

print("\n"*3)

res = pd.merge(left,right,on = 'key')
print(res)
'''




##有兩個key
##left = pd.DataFrame({'key1':['K0','K1','K2','K3'],
##                     'key2':['K0','K1','K0','K1'],
##                     'A':['A0','A1','A2','A3'],
##                     'B':['B0','B1','B2','B3']})
##
##right = pd.DataFrame({'key1':['K0','K1','K2','K3'],
##                      'key2':['K0','K0','K0','K0'],
##                     'C':['C0','C1','C2','C3'],
##                     'D':['D0','D1','D2','D3']})
##
##print(left)
##print(right)
##print('\n'*3)

##res = pd.merge(left,right,on = ['key1','key2']) ##只考慮相同的值,交集
##print(res)

##res = pd.merge(left,right,on = ['key1','key2'],how = 'outer') ##,基於兩個的key,並集。不同的用nan
##print(res)

##res = pd.merge(left,right,on = ['key1','key2'],how = 'right') #基於right
##print(res)



###indicator引數
##df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
##df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
##print(df1)
##print(df2)
##print('\n'*3)
##
##res = pd.merge(df1,df2,on = 'col1',how = 'outer',indicator = 'df')#indicator = True
##print(res )




#merged by index

##left = pd.DataFrame({'A':['A0','A1','A2'],
##                     'B':['B0','B1','B2']},
##                    index = ['K0','K1','K2'])
##right = pd.DataFrame({'C':['C0','C1','C2'],
##                     'D':['D0','D1','D2']},
##                    index = ['K0','K1','K2'])
##
##print(left)
##print(right)
##print("\n"*3)
##res = pd.merge(left,right,left_index = True,right_index = True,how = 'outer')
###res = pd.merge(left,right,left_index = True,right_index = True,how = 'inner')
##print(res)



#handle overlapping
boys =  pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls =  pd.DataFrame({'k':['K0','K1','K2'],'age':[4,5,6]})

res = pd.merge(boys,girls,on = 'k',suffixes = ['_boy','_girl'],how = 'inner') #outer

print(res)


#join功能

#plt模組畫圖