1. 程式人生 > >pandas中apply()方法的用法

pandas中apply()方法的用法

apply有點像map的用法,可以傳入一個函式。

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

df = pd.read_csv('apply_demo.csv').head()
print(df.size)  # .size 如果是series返回行數,如果是dataframe返回行數乘以列數
print(df)
''' 原始資料
         time                                data
0  1473411962   Symbol: APPL Seqno: 0 Price: 1623
1  1473411962   Symbol: APPL Seqno: 0 Price: 1623
2  1473411963   Symbol: APPL Seqno: 0 Price: 1623
3  1473411963   Symbol: APPL Seqno: 0 Price: 1623
4  1473411963   Symbol: APPL Seqno: 1 Price: 1649
'''
s1 = Series(['a'] * 5) df['A'] = s1 print(df) ''' time data A 0 1473411962 Symbol: APPL Seqno: 0 Price: 1623 a 1 1473411962 Symbol: APPL Seqno: 0 Price: 1623 a 2 1473411963 Symbol: APPL Seqno: 0 Price: 1623 a 3 1473411963 Symbol: APPL Seqno: 0 Price: 1623 a 4 1473411963 Symbol: APPL Seqno: 1 Price: 1649 a '''
df['A'] = df['A'].apply(str.upper) print(df) ''' time data A 0 1473411962 Symbol: APPL Seqno: 0 Price: 1623 A 1 1473411962 Symbol: APPL Seqno: 0 Price: 1623 A 2 1473411963 Symbol: APPL Seqno: 0 Price: 1623 A 3 1473411963 Symbol: APPL Seqno: 0 Price: 1623 A 4 1473411963 Symbol: APPL Seqno: 1 Price: 1649 A '''
l = df['data'][0].strip().split(' ') print(l) # ['Symbol:', 'APPL', 'Seqno:', '0', 'Price:', '1623'] def foo(line): items = line.strip().split(' ') return Series([items[1], items[3], items[5]]) df_tmp = df['data'].apply(foo) print(df_tmp) ''' 0 1 2 0 APPL 0 1623 1 APPL 0 1623 2 APPL 0 1623 3 APPL 0 1623 4 APPL 1 1649 ''' df_tmp = df_tmp.rename(columns = {0:'Symbol', 1:'Seqno', 2:'Price'}) print(df_tmp) ''' Symbol Seqno Price 0 APPL 0 1623 1 APPL 0 1623 2 APPL 0 1623 3 APPL 0 1623 4 APPL 1 1649 ''' print(df.combine_first(df_tmp).drop(['data', 'A'], axis=1)) ''' Price Seqno Symbol time 0 1623.0 0.0 APPL 1473411962 1 1623.0 0.0 APPL 1473411962 2 1623.0 0.0 APPL 1473411963 3 1623.0 0.0 APPL 1473411963 4 1649.0 1.0 APPL 1473411963 ''' df.combine_first(df_tmp).drop(['data', 'A'], axis=1).to_csv('./demo_duplicate.csv', index=False)