1. 程式人生 > >005.python科學計算庫pandas(下)

005.python科學計算庫pandas(下)

series

import pandas as pd
from pandas import Series

fandango = pd.read_csv('fandango_score_comparison.csv')
series_film = fandango['FILM']
print(series_film[0:5])
print("----------------------------------")
series_rt = fandango['RottenTomatoes']
print(series_rt[0:5])
print("----------------------------------"
) film_names = series_film.values rt_scores = series_rt.values # 帶軸標的一維ndarray(包括時間序列)。 series_custom = Series(rt_scores, index=film_names) print(series_custom[['Minions (2015)', 'Leviathan (2014)']]) print("----------------------------------") print(series_custom[4:6])

sort

import pandas as
pd from pandas import Series fandango = pd.read_csv('fandango_score_comparison.csv') series_film = fandango['FILM'] series_rt = fandango['RottenTomatoes'] film_names = series_film.values rt_scores = series_rt.values # 帶軸標的一維ndarray(包括時間序列)。 series_custom = Series(rt_scores, index=film_names) original_index = series_custom.index.tolist() # sorted 以升序返回一個包含迭代中所有項的新列表。
sorted_index = sorted(original_index) print(sorted_index) print("----------------------------------") # 按照已排序後的sorted_index來排序series_custom (sorted_index 和 series_custom.index 元素需保持一致) sorted_by_index = series_custom.reindex(sorted_index) print(sorted_by_index)

import pandas as pd
from pandas import Series

fandango = pd.read_csv('fandango_score_comparison.csv')
series_film = fandango['FILM']
series_rt = fandango['RottenTomatoes']
film_names = series_film.values
rt_scores = series_rt.values
# 帶軸標的一維ndarray(包括時間序列)。
series_custom = Series(rt_scores, index=film_names)
sc2 = series_custom.sort_index()
print(sc2[0:4])
print("----------------------------------")
sc3 = series_custom.sort_values()
print(sc3[0:4])

series算術運算

import pandas as pd
from pandas import Series
import numpy as np

fandango = pd.read_csv('fandango_score_comparison.csv')
series_film = fandango['FILM']
series_rt = fandango['RottenTomatoes']
film_names = series_film.values
rt_scores = series_rt.values
# 帶軸標的一維ndarray(包括時間序列)。
series_custom = Series(rt_scores, index=film_names)
print(series_custom[0:3])
print("----------------------------------")
# 將每個值相加
print(np.add(series_custom, series_custom)[0:3])
print("----------------------------------")
# 對每個值應用正弦函式
print(np.sin(series_custom)[0:3])
print("----------------------------------")
# 返回最高值(將返回單個值而不是一系列值)
print(np.max(series_custom))

import pandas as pd
from pandas import Series

fandango = pd.read_csv('fandango_score_comparison.csv')
series_film = fandango['FILM']
series_rt = fandango['RottenTomatoes']
film_names = series_film.values
rt_scores = series_rt.values
# 帶軸標的一維ndarray(包括時間序列)。
series_custom = Series(rt_scores, index=film_names)
# series_custom > 50 會為每個film返回一個帶有布林值的系列物件
series_greater_than_50 = series_custom[series_custom > 50]
print(series_greater_than_50[0:5])
print("-----------------------------------")
both_criteria = series_custom[(series_custom > 50) & (series_custom < 75)]
print(both_criteria[0:5])

import pandas as pd
from pandas import Series

fandango = pd.read_csv('fandango_score_comparison.csv')
rt_critics = Series(fandango['RottenTomatoes'].values, index=fandango['FILM'])
print(rt_critics[0:3])
rt_users = Series(fandango['RottenTomatoes_User'].values, index=fandango['FILM'])
print(rt_users[0:3])
rt_mean = (rt_critics + rt_users) / 2
print(rt_mean[0:3])

set_index

import pandas as pd
from pandas import Series

fandango = pd.read_csv('fandango_score_comparison.csv')
# set_index 使用一個或多個現有列設定DataFrame索引(行標籤)。預設情況下,生成一個新物件。
#       drop : boolean, default True 刪除要用作新索引的列
fandango_films = fandango.set_index('FILM', drop=False)
print(fandango_films[0:3])

  • 當選擇多個行時,返回一個DataFrame, 但當選擇單個行時,返回的是一個Series物件
import pandas as pd

fandango = pd.read_csv('fandango_score_comparison.csv')
fandango_films = fandango.set_index('FILM', drop=False)
# 使用括號表示法或loc[]進行切片
sub_films = fandango_films["Avengers: Age of Ultron (2015)":"Ant-Man (2015)"]
print(sub_films)
print(type(sub_films))
print("----------------------------------------------------")
sub_films = fandango_films.loc["Avengers: Age of Ultron (2015)":"Ant-Man (2015)"]
print(sub_films)
print(type(sub_films))
print("----------------------------------------------------")
# 查詢特定的 movie
film = fandango_films.loc['Kumiko, The Treasure Hunter (2015)']
print(type(film))
print("----------------------------------------------------")
# 查詢特定的 movie 列表
movies = ['Kumiko, The Treasure Hunter (2015)', 'Do You Believe? (2015)']
print(fandango_films.loc[movies])
print(type(fandango_films.loc[movies]))
# 當選擇多個行時,返回一個DataFrame,
# 但當選擇單個行時,返回的是一個Series物件

import pandas as pd
import numpy as np

fandango = pd.read_csv('fandango_score_comparison.csv')
fandango_films = fandango.set_index('FILM', drop=False)
# panda中的apply()方法允許我們指定Python邏輯
# apply()方法需要傳入一個向量化操作
# 可以應用於每個系列物件。
# 以Series的形式返回資料型別
types = fandango_films.dtypes
# print(types) 返回所有的列名稱
print(type(types))
print("----------------------------------------------------")
# 過濾資料型別為floats,索引屬性只返回列名
float_columns = types[types.values == 'float64'].index
# 使用括號表示法過濾列,使其只是float列
float_df = fandango_films[float_columns]
print(float_df[0:3])
print(type(float_df))
print("----------------------------------------------------")
# “x”是表示列的系列物件
# numpy.std 計算沿指定軸的標準差。
deviations = float_df.apply(lambda x: np.std(x))
print(deviations[0:3])
print(type(deviations))
print("----------------------------------------------------")
rt_mt_user = float_df[['RT_user_norm', 'Metacritic_user_nom']]
deviations = rt_mt_user.apply(lambda x: np.std(x), axis=1)
print(deviations[0:3])