1. 程式人生 > >Python學習在pandas(五)合併資料merge、視覺化資料plt

Python學習在pandas(五)合併資料merge、視覺化資料plt

1、依據一組key合併

import pandas as pd
#定義資料集並打印出
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                             'A': ['A0', 'A1', 'A2', 'A3'],
                             'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                              'C': ['C0', 'C1', 'C2', 'C3'],
                              'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on='key')#合併key列
print(res)

結果:

2、依據key1與key2 columns進行合併,並打印出四種結果['left', 'right', 'outer', 'inner']

import pandas as pd
#定義資料集並打印出
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                      'key2': ['K0', 'K1', 'K0', 'K1'],
                      'A': ['A0', 'A1', 'A2', 'A3'],
                      'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                       'key2': ['K0', 'K0', 'K0', 'K0'],
                       'C': ['C0', 'C1', 'C2', 'C3'],
                       'D': ['D0', 'D1', 'D2', 'D3']})

print(left)
print(right)
#依據key1與key2 columns進行合併,並打印出四種結果['left', 'right', 'outer', 'inner']
res = pd.merge(left, right, on=['key1', 'key2'], how='inner')
print(res)
res = pd.merge(left, right, on=['key1', 'key2'], how='outer')
print(res)
res = pd.merge(left, right, on=['key1', 'key2'], how='left')
print(res)
res = pd.merge(left, right, on=['key1', 'key2'], how='right')
print(res)



結果:

  key1 key2   A   B
0   K0   K0  A0  B0
1   K0   K1  A1  B1
2   K1   K0  A2  B2
3   K2   K1  A3  B3
  key1 key2   C   D
0   K0   K0  C0  D0
1   K1   K0  C1  D1
2   K1   K0  C2  D2
3   K2   K0  C3  D3
#相同的擷取
  key1 key2   A   B   C   D
0   K0   K0  A0  B0  C0  D0
1   K1   K0  A2  B2  C1  D1
2   K1   K0  A2  B2  C2  D2
  key1 key2    A    B    C    D
0   K0   K0   A0   B0   C0   D0
1   K0   K1   A1   B1  NaN  NaN
2   K1   K0   A2   B2   C1   D1
3   K1   K0   A2   B2   C2   D2
4   K2   K1   A3   B3  NaN  NaN
5   K2   K0  NaN  NaN   C3   D3
  key1 key2   A   B    C    D
0   K0   K0  A0  B0   C0   D0
1   K0   K1  A1  B1  NaN  NaN
2   K1   K0  A2  B2   C1   D1
3   K1   K0  A2  B2   C2   D2
4   K2   K1  A3  B3  NaN  NaN
  key1 key2    A    B   C   D
0   K0   K0   A0   B0  C0  D0
1   K1   K0   A2   B2  C1  D1
2   K1   K0   A2   B2  C2  D2
3   K2   K0  NaN  NaN  C3  D3

3、關於merge我不太明白,具體在這裡有介紹:merge

4、視覺化

import pandas as pd
import numpy as np
import  matplotlib.pyplot as plt

data = pd.DataFrame(
    np.random.randn(1000,4),
    index=np.arange(1000),
    columns=list("ABCD")
    )
#data.cumsum()
data.plot()#類似於matplotlib中的plt.plot
plt.show()

結果:

程式碼:

import pandas as pd
import numpy as np
import  matplotlib.pyplot as plt

data = pd.DataFrame(
    np.random.randn(1000,4),
    index=np.arange(1000),
    columns=list("ABCD")
    )#生成資料
data.cumsum()#用於累加
ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label='Class1')
# 將之下這個 data 畫在上一個 ax 上面
data.plot.scatter(x='A',y='C',color='LightGreen',label='Class2',ax=ax)#ax = ax表在第一張圖片上繼續畫圖,把data附在ax中
plt.show()

結果: