1. 程式人生 > >資料科學和人工智慧技術筆記 二十、資料視覺化

資料科學和人工智慧技術筆記 二十、資料視覺化

二十、資料視覺化

作者:Chris Albon

譯者:飛龍

協議:CC BY-NC-SA 4.0

MatPlotLib 中的雙向條形圖

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 建立資料幀
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score'
: [25, 94, 57, 62, 70], 'post_score': [5, 43, 23, 23, 51]} df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score']) df
first_name pre_score mid_score post_score
0 Jason 4 25 5
1 Molly 24 94 43
2 Tina 31 57 23
3 Jake 2 62 23
4 Amy 3 70 51
# 輸入資料,特別是第二和
# 第三行,跳過第一列
x1 = df.ix[1, 1:]
x2 = df.ix[2, 1:]

# 建立條形標籤
bar_labels = [
'Pre Score', 'Mid Score', 'Post Score'] # 建立圖形 fig = plt.figure(figsize=(8,6)) # 設定 y 的位置 y_pos = np.arange(len(x1)) y_pos = [x for x in y_pos] plt.yticks(y_pos, bar_labels, fontsize=10) # 在 y_pos 的位置上建立水平條形 plt.barh(y_pos, # 使用資料 x1 x1, # 中心對齊 align='center', # 透明度為 0.4 alpha=0.4, # 顏色為綠色 color='#263F13') # 在 y_pos 的位置上建立水平條形 plt.barh(y_pos, # 使用資料 -x2 -x2, # 中心對齊 align='center', # 透明度為 0.4 alpha=0.4, # 顏色為綠色 color='#77A61D') # 註解和標籤 plt.xlabel('Tina\'s Score: Light Green. Molly\'s Score: Dark Green') t = plt.title('Comparison of Molly and Tina\'s Score') plt.ylim([-1,len(x1)+0.1]) plt.xlim([-max(x2)-10, max(x1)+10]) plt.grid() plt.show()

png

MatPlotLib 中的條形圖

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 建立資料幀
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
first_name pre_score mid_score post_score
0 Jason 4 25 5
1 Molly 24 94 43
2 Tina 31 57 23
3 Jake 2 62 23
4 Amy 3 70 51
# 為每個變數建立得分均值的列表
mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]

# 建立變動列表,設為得分上下 .25
variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25]

# 設定條形標籤
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']

# 建立條形的 x 位置
x_pos = list(range(len(bar_labels)))

# 在 x 位置上建立條形圖
plt.bar(x_pos,
        # 使用 mean_values 中的資料
        mean_values, 
        # y-error 直線設定為變動
        yerr=variance, 
        # 中心對齊
        align='center',
        # 顏色
        color='#FFC222',
        # 透明度為 0.5
        alpha=0.5)

# 新增網格
plt.grid()

# 設定 y 軸高度
max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5)
plt.ylim([0, (max_y[0] + max_y[1]) * 1.1])

# 設定軸標籤和標題
plt.ylabel('Score')
plt.xticks(x_pos, bar_labels)
plt.title('Mean Scores For Each Test')

plt.show()

png

Seaborn 中的調色盤

import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# 建立資料幀
data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
                                   'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)

sns.palplot(sns.color_palette("deep", 10))

png

sns.palplot(sns.color_palette("muted", 10))

png

sns.palplot(sns.color_palette("bright", 10))

png

sns.palplot(sns.color_palette("dark", 10))

png

sns.palplot(sns.color_palette("colorblind", 10))

png

sns.palplot(sns.color_palette("Paired", 10))

png

sns.palplot(sns.color_palette("BuGn", 10))

png

sns.palplot(sns.color_palette("GnBu", 10))

png

sns.palplot(sns.color_palette("OrRd", 10))

png

sns.palplot(sns.color_palette("PuBu", 10))

png

sns.palplot(sns.color_palette("YlGn", 10))

png

sns.palplot(sns.color_palette("YlGnBu", 10))

png

sns.palplot(sns.color_palette("YlOrBr", 10))

png

sns.palplot(sns.color_palette("YlOrRd", 10))

png

sns.palplot(sns.color_palette("BrBG", 10))

png

sns.palplot(sns.color_palette("PiYG", 10))

png

sns.palplot(sns.color_palette("PRGn", 10))

png

sns.palplot(sns.color_palette("PuOr", 10))

png

sns.palplot(sns.color_palette("RdBu", 10))

png

sns.palplot(sns.color_palette("RdGy", 10))

png

sns.palplot(sns.color_palette("RdYlBu", 10))

png

sns.palplot(sns.color_palette("RdYlGn", 10))

png

sns.palplot(sns.color_palette("Spectral", 10))

png

# 建立調色盤並將其設為當前調色盤
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.set_palette(flatui)
sns.palplot(sns.color_palette())

png

# 設定繪圖顏色
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")

# <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70> 

png

使用 Seaborn 和 pandas 建立時間序列繪圖

import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
                                   'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)

sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred")

# <matplotlib.axes._subplots.AxesSubplot at 0x1140be780> 

png

# 帶有置信區間直線,但是沒有直線的時間序列繪圖
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)

# <matplotlib.axes._subplots.AxesSubplot at 0x116400668> 

png

使用 Seaborn 建立散點圖

import pandas as pd
%matplotlib inline
import random
import matplotlib.pyplot as plt
import seaborn as sns

# 建立空資料幀
df = pd.DataFrame()

# 新增列
df['x'] = random.sample(range(1, 1000), 5)
df['y'] = random.sample(range(1, 1000), 5)
df['z'] = [1,0,0,1,0]
df['k'] = ['male','male','male','female','female']

# 檢視前幾行資料
df.head()
x y z k
0 466 948 1 male
1 832 481 0 male
2 978 465 0 male
3 510 206 1 female
4 848 357 0 female
# 設定散點圖樣式
sns.set_context
            
           

相關推薦

no