常見圖形:(箱線圖,柱狀圖,散點圖,折線圖...)searborn+ matplotlib
阿新 • • 發佈:2019-02-10
# -*- coding: utf-8 -*- import seaborn as sns import numpy as np #------------------------顯示中文---------------------------------# import matplotlib as mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] # 指定預設字型 mpl.rcParams['axes.unicode_minus'] = False # 解決儲存影象是負號'-'顯示為方塊的問題 #----------------------------------繪製箱線圖-----------------------------------# ''' 箱線圖的作用: 檢驗連續資料是否存在離群點,以及資料分佈的範圍(4分位數) 必須要一個引數 y: 需要判斷的資料序列 x: 分類的標籤的資料 hue: 分組因子 palette: 調色盤...Set1, Set2, Set3 linwidth: 線寬(2.5相當於加粗的線) order: 排序的序列.例如 order = ['Dinner', 'Lunch'] orient = 'h' 對df中的每個數值型的資料都做一個箱線圖 whis 引數設定是否顯示箱線圖的離群點, whis = np.inf 表示不顯示 ''' sns.set_style("whitegrid") tips = sns.load_dataset("tips") tips ax = sns.boxplot(x = 'time', y = 'total_bill',hue = 'smoker', data = tips, order = ['Dinner', 'Lunch'], linewidth= 1.5,palette = 'Set3') ax = sns.boxplot(data = tips, orient = 'h', palette = 'Set3') #箱線圖+ 有分佈趨勢的散點圖 #圖形組合也就是兩條繪圖語句一起執行就可以了,相當於圖形覆蓋了 ax = sns.boxplot(x='day', y = 'total_bill', data = tips) ax = sns.swarmplot(x = 'day', y = 'total_bill', data = tips, color = '.25') #plt.scatter(x = iris['sepal_length'], y = iris['sepal_width']) #-----------------------------------barplot 帶分佈的散點圖--------------------------# ''' estimator: 統計引數預設為 np.mean,可自定義: np.sum, np.count_nonzero, np.median... palette: 主題的顏色 palette = 'Blues_d' ''' #統計引數預設實mean ax = sns.barplot(x = "day", y = "total_bill", hue = 'sex', data = tips, estimator= np.sum, ci = 0, palette = "Blues_d") #--------------------------------countplot 計數統計圖----------------------------# #對因子變數來說這個實很重要的 ax = sns.countplot(x = 'day', data = tips, hue = 'time') #--------------------------factorplot/FacetGrid 聯合繪圖------------------------------# ''' hue : 對應分組變數 kind: 對應的圖片型別, bar/violin/count col_wrap = 2 每行畫2個子圖 size: 每個小圖的圖片大小 ''' import matplotlib.pyplot as plt #col用於分面繪圖 g = sns.FacetGrid(tips, col = "day", col_wrap = 2, size = 3) g.map(plt.scatter, 'total_bill', "tip").add_legend() #折線圖 g = sns.FacetGrid(tips, col = "day", col_wrap = 2, size = 3) g.map(plt.plot,'total_bill') #hue:用於分組繪圖 g = sns.FacetGrid(tips, hue = "day", size = 5) g.map(plt.scatter, 'total_bill', "tip").add_legend() g.set(ylabel = "tip", xlabel = "total_bill", title = "day of total_bill") #分組柱狀圖 g = sns.factorplot(x = "sex", col = "day", data = tips, kind = "count", col_wrap=2, size = 3) #分組箱線圖,產看不同變數下total_bill的分佈情況,col為分子繪圖,col_wrap 每行畫3個子圖 g = sns.factorplot(y = "total_bill", col = "day", col_wrap = 2,data = tips, kind = "box", size = 3, aspect = 1) #--------------------------迴歸圖 lmplot-------------------------------# ''' markers = ["o", "x"] 用不同的標記標記資料點 jitter控制散點抖動程度 row也是可以控制分組子圖的 ''' #分組繪圖, 不同的組用不同的形狀標記 g = sns.lmplot(x = 'total_bill', y = 'tip', hue = 'smoker', data = tips, markers = ["o", "x"]) #不僅分組,還分開不同的子圖繪製,用col引數控制 g = sns.lmplot(x = 'total_bill', y = 'tip', col = 'smoker', data = tips) # col + hue 雙分組引數,既分組又分子圖繪製,jitter控制散點抖動程度 g = sns.lmplot(x = "size", y = "total_bill", hue = "day", col = "day", data = tips, , col_wrap= 2, size = 3, aspect= .4, x_jitter = .1) 分組子圖,那麼row也是可以控制分組子圖的 g = sns.lmplot(x = "total_bill", y = "tip", row = "sex", col = "time", data = tips, size = 4) #-------------------------------迴歸圖 regplot------------------------------# ''' ci: 控制迴歸的置信度 ''' ax = sns.regplot(x = "total_bill", y = "tip", data = tips, color="g", marker="+", ci = 90) #上面的都是擬合一次曲線,擬合二次曲線通過order = 2設定, 擬合一次曲線相當於 order = 1 ans = sns.load_dataset("anscombe") ax = sns.regplot(x = "x", y = "y", data= ans.loc[ans.dataset == "II"], scatter_kws={"s": 80}, order = 2, ci = None, truncate = True) #-------------------------------數值分佈繪圖------------------------# ''' 繪製數值變數的密度分佈圖, 預設既繪製概率密度曲線,也繪製直方圖 hist: True/False 直方圖是否顯示 vertical: True/Fasle 是否垂直顯示 ''' ax = sns.distplot(tips["total_bill"],rug = True, hist = True, vertical=False) #-----------------------------核密度圖kdeplot-----------------------------# ''' 單變數下核密度圖和折線圖顯示的結果實一致的 ''' #分組繪製雙變數的核密度圖, 相當於繪製兩個核密度圖,通過圖可以看到密度中心,類似挖掘演算法中的聚類中心繪圖 iris = sns.load_dataset("iris") setosa = iris.loc[iris.species == 'setosa'] #組1 virgnica = iris.loc[iris.species == 'virginica'] # 組2 ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length, cmap = "Reds", shade=True, shade_lowest=False) ax = sns.kdeplot(virgnica.sepal_width, virgnica.sepal_length, cmap = "Blues", shade = True, shade_lowest=False) #-----------------------------------------雙變數關係圖, joinplot----------------------------------# ''' kind = reg繪製迴歸線 kind = kde繪製核密度圖 ''' #預設繪製雙變數的散點圖,計算兩個變數的直方圖,計算兩個變數的相關係數和置信度 np.random.seed(0) sns.set(style = "white", color_codes = True) g = sns.jointplot(x = "total_bill", y = "tip", data = tips) #通過kind 引數,除了繪製散點圖,還要繪製擬合的直線,擬合的核密度 g = sns.jointplot("total_bill", "tip", data = tips, kind = "reg") #使用六角形代替點圖 g = sns.jointplot("total_bill", "tip", data = tips, kind = "hex") #繪製核密度圖 g = sns.jointplot("sepal_width", "petal_length", data = iris, kind = "kde", space = 0, color = "g") #控制圖形的大小和顏色 g = sns.jointplot("total_bill", "tip", data = tips, size = 5, ratio=3, color="g") #--------------------------------變數關係組圖,pairplot----------------------------------------# ''' var = ["sepal_width", "sepal_length"] 指定使用的資料列名 diag_kind: 制定對角線的圖形,預設為直方圖,"kde": 核密度圖 markers = ["o", "s", "D"] 指定點的型別 ''' # x-y 的散點圖 畫迴歸線,畫散點圖, scatter= True, fit_reg = True g = sns.lmplot(x = "total_bill", y = "tip", data = tips, fit_reg = True, hue = "smoker", scatter = True) #分組的變數關係圖,不同的組用不同的形狀標記,對角線預設繪製直方圖,當然也可以繪製核密度圖 g = sns.pairplot(iris, hue = "species", markers = ["o", "s", "D"], kind = "kde") #只取dataframe中的一部分變數繪圖 g = sns.pairplot(iris, vars = ["sepal_width", "sepal_length"]) #-------------------------------------------熱力圖 heatmap------------------------------------# ''' vmin顏色值對映的最小值 vmax顏色值對映的最大值 cbar:每個變數的顏色棒是否顯示 annot = True 將數值顯示到圖上 ''' ax = sns.heatmap(iris.isnull(), yticklabels=False, cbar= False, cmap = "Blues") #繪製熱力圖,還要將數值寫到熱力圖上 flights = sns.load_dataset("flights") flights = flights.pivot("month", "year", "passengers") ax = sns.heatmap(flights,annot = True, fmt = "d") #--------------------------------------tsplot 時序圖--------------------------# ''' estimator 預設為 mean ''' np.random.seed(22) sns.set(color_codes = True) x = np.linspace(0, 15, 31) data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10,1) # 繪製不同的置信度擬合圖 ax = sns.tsplot(data = data, ci = [68, 95], color = "m") #tsplot gammas = sns.load_dataset("gammas") ax= sns.tsplot(time = "timepoint", value = "BOLD signal" , data = gammas, unit = "subject", condition = "ROI") #--------------------------雙座標軸--------------------------------------# ''' twinx 或者 twiny 函式 設定雙座標軸 ''' import pandas as pd import matplotlib.pyplot as plt sale=pd.Series(np.random.random(10)*100).map(int) ax = plt.subplot(111) tips['tip'].plot(ax = ax, color = "b") ax.set(xlabel = 'time', ylabel = 'tip') ax2 = ax.twinx() tips["total_bill"].plot(ax = ax2, color = "r") ax2.set_ylabel = ("total_bill") plt.title("example of double series figure")
plt 結合sns 制定圖形大小,及分組排.
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(15,5))
#我們在這裡生成一個fig和三個axes,在下面繪圖的時候只需要用ax引數來指定特定的axes就可以了
sns.barplot(x='class', y='age', data=titanic, ax=ax1)
sns.countplot(x='sex', data=titanic, ax=ax2)
sns.distplot(titanic['age'], ax=ax3)
plt.show()