1. 程式人生 > >常見圖形:(箱線圖,柱狀圖,散點圖,折線圖...)searborn+ matplotlib

常見圖形:(箱線圖,柱狀圖,散點圖,折線圖...)searborn+ matplotlib

# -*- coding: utf-8 -*-

import seaborn as sns
import numpy as np

#------------------------顯示中文---------------------------------#
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']  # 指定預設字型
mpl.rcParams['axes.unicode_minus'] = False  # 解決儲存影象是負號'-'顯示為方塊的問題


#----------------------------------繪製箱線圖-----------------------------------#
'''
箱線圖的作用:
檢驗連續資料是否存在離群點,以及資料分佈的範圍(4分位數)
必須要一個引數 
y: 需要判斷的資料序列
x: 分類的標籤的資料
hue: 分組因子
palette: 調色盤...Set1, Set2, Set3
linwidth: 線寬(2.5相當於加粗的線)
order: 排序的序列.例如 order = ['Dinner', 'Lunch']
orient = 'h' 對df中的每個數值型的資料都做一個箱線圖
whis 引數設定是否顯示箱線圖的離群點, whis = np.inf 表示不顯示
'''
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
tips
ax = sns.boxplot(x = 'time', y = 'total_bill',hue = 'smoker', data = tips, order = ['Dinner', 'Lunch'],
                 linewidth= 1.5,palette = 'Set3')
ax = sns.boxplot(data = tips, orient = 'h', palette = 'Set3')

#箱線圖+ 有分佈趨勢的散點圖
#圖形組合也就是兩條繪圖語句一起執行就可以了,相當於圖形覆蓋了
ax = sns.boxplot(x='day', y = 'total_bill', data = tips)
ax = sns.swarmplot(x = 'day', y = 'total_bill', data = tips, color = '.25')

#plt.scatter(x = iris['sepal_length'], y = iris['sepal_width'])


#-----------------------------------barplot 帶分佈的散點圖--------------------------#
'''
estimator: 統計引數預設為 np.mean,可自定義: np.sum, np.count_nonzero, np.median...
palette: 主題的顏色 palette = 'Blues_d'
'''
#統計引數預設實mean
ax = sns.barplot(x = "day", y = "total_bill", hue = 'sex', data = tips, estimator= np.sum, ci = 0, palette = "Blues_d")

#--------------------------------countplot 計數統計圖----------------------------#
#對因子變數來說這個實很重要的
ax = sns.countplot(x = 'day', data = tips, hue = 'time')

#--------------------------factorplot/FacetGrid 聯合繪圖------------------------------#
'''
hue : 對應分組變數
kind: 對應的圖片型別, bar/violin/count
col_wrap = 2  每行畫2個子圖
size: 每個小圖的圖片大小
'''
import matplotlib.pyplot as plt
#col用於分面繪圖
g = sns.FacetGrid(tips, col = "day", col_wrap = 2, size = 3)
g.map(plt.scatter, 'total_bill', "tip").add_legend()
#折線圖
g = sns.FacetGrid(tips, col = "day", col_wrap = 2, size = 3)
g.map(plt.plot,'total_bill')
#hue:用於分組繪圖
g = sns.FacetGrid(tips, hue = "day", size = 5)
g.map(plt.scatter, 'total_bill', "tip").add_legend()
g.set(ylabel = "tip", xlabel = "total_bill", title = "day of total_bill")
#分組柱狀圖
g = sns.factorplot(x = "sex", col = "day", data = tips, kind = "count", col_wrap=2, size = 3)
#分組箱線圖,產看不同變數下total_bill的分佈情況,col為分子繪圖,col_wrap 每行畫3個子圖
g = sns.factorplot(y = "total_bill", col = "day", col_wrap = 2,data = tips, kind = "box", size = 3, aspect = 1)

#--------------------------迴歸圖 lmplot-------------------------------#
'''
markers = ["o", "x"] 用不同的標記標記資料點
jitter控制散點抖動程度
row也是可以控制分組子圖的

'''
#分組繪圖, 不同的組用不同的形狀標記
g = sns.lmplot(x = 'total_bill', y = 'tip', hue = 'smoker', data = tips, markers = ["o", "x"])
#不僅分組,還分開不同的子圖繪製,用col引數控制
g = sns.lmplot(x = 'total_bill', y = 'tip', col = 'smoker', data = tips)
# col + hue 雙分組引數,既分組又分子圖繪製,jitter控制散點抖動程度
g = sns.lmplot(x = "size", y = "total_bill", hue = "day", col = "day", data = tips,
               , col_wrap= 2, size = 3, aspect= .4, x_jitter = .1)
分組子圖,那麼row也是可以控制分組子圖的
g = sns.lmplot(x = "total_bill", y = "tip", row = "sex", col = "time", data = tips, size = 4)

#-------------------------------迴歸圖  regplot------------------------------#
'''
ci: 控制迴歸的置信度
'''
ax = sns.regplot(x = "total_bill", y = "tip", data = tips, color="g", marker="+", ci = 90)
#上面的都是擬合一次曲線,擬合二次曲線通過order = 2設定, 擬合一次曲線相當於 order = 1
ans = sns.load_dataset("anscombe")
ax = sns.regplot(x = "x", y = "y", data= ans.loc[ans.dataset == "II"], 
                 scatter_kws={"s": 80}, order = 2, ci = None, truncate = True)


#-------------------------------數值分佈繪圖------------------------#
''' 繪製數值變數的密度分佈圖, 預設既繪製概率密度曲線,也繪製直方圖
hist: True/False 直方圖是否顯示
vertical: True/Fasle 是否垂直顯示
'''
ax = sns.distplot(tips["total_bill"],rug = True, hist = True, vertical=False)

#-----------------------------核密度圖kdeplot-----------------------------#
'''
單變數下核密度圖和折線圖顯示的結果實一致的
'''
#分組繪製雙變數的核密度圖, 相當於繪製兩個核密度圖,通過圖可以看到密度中心,類似挖掘演算法中的聚類中心繪圖
iris = sns.load_dataset("iris")
setosa = iris.loc[iris.species == 'setosa'] #組1
virgnica = iris.loc[iris.species == 'virginica'] # 組2
ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length, cmap = "Reds", shade=True, shade_lowest=False)
ax = sns.kdeplot(virgnica.sepal_width, virgnica.sepal_length, cmap = "Blues", shade = True, shade_lowest=False)

#-----------------------------------------雙變數關係圖, joinplot----------------------------------#
'''
kind = reg繪製迴歸線
kind = kde繪製核密度圖
'''
#預設繪製雙變數的散點圖,計算兩個變數的直方圖,計算兩個變數的相關係數和置信度
np.random.seed(0)
sns.set(style = "white", color_codes = True)
g = sns.jointplot(x = "total_bill", y = "tip", data = tips)
#通過kind 引數,除了繪製散點圖,還要繪製擬合的直線,擬合的核密度
g = sns.jointplot("total_bill", "tip", data = tips, kind = "reg")
#使用六角形代替點圖
g = sns.jointplot("total_bill", "tip", data = tips, kind = "hex")
#繪製核密度圖
g = sns.jointplot("sepal_width", "petal_length", data = iris, kind = "kde", space = 0, color = "g")
#控制圖形的大小和顏色
g = sns.jointplot("total_bill", "tip", data = tips, size = 5, ratio=3, color="g")

#--------------------------------變數關係組圖,pairplot----------------------------------------#
'''
var = ["sepal_width", "sepal_length"] 指定使用的資料列名
diag_kind: 制定對角線的圖形,預設為直方圖,"kde": 核密度圖
markers = ["o", "s", "D"] 指定點的型別
'''
# x-y 的散點圖 畫迴歸線,畫散點圖, scatter= True, fit_reg = True
g = sns.lmplot(x = "total_bill", y = "tip", data = tips, fit_reg = True, hue = "smoker", scatter = True)
#分組的變數關係圖,不同的組用不同的形狀標記,對角線預設繪製直方圖,當然也可以繪製核密度圖
g = sns.pairplot(iris, hue = "species", markers = ["o", "s", "D"], kind = "kde")
#只取dataframe中的一部分變數繪圖
g = sns.pairplot(iris, vars = ["sepal_width", "sepal_length"])


#-------------------------------------------熱力圖 heatmap------------------------------------#
'''
vmin顏色值對映的最小值
vmax顏色值對映的最大值
cbar:每個變數的顏色棒是否顯示
annot = True 將數值顯示到圖上
'''
ax = sns.heatmap(iris.isnull(), yticklabels=False, cbar= False, cmap = "Blues")
#繪製熱力圖,還要將數值寫到熱力圖上
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
ax = sns.heatmap(flights,annot = True, fmt = "d")

#--------------------------------------tsplot 時序圖--------------------------#
'''
estimator 預設為 mean 
'''
np.random.seed(22)
sns.set(color_codes = True)
x = np.linspace(0, 15, 31)
data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10,1)
# 繪製不同的置信度擬合圖
ax = sns.tsplot(data = data, ci = [68, 95], color = "m")
#tsplot
gammas = sns.load_dataset("gammas")
ax= sns.tsplot(time = "timepoint", value = "BOLD signal" , data = gammas, unit = "subject", condition = "ROI")

#--------------------------雙座標軸--------------------------------------#
'''
twinx 或者 twiny 函式 設定雙座標軸
'''

import pandas as pd
import matplotlib.pyplot as plt
sale=pd.Series(np.random.random(10)*100).map(int)

ax = plt.subplot(111)
tips['tip'].plot(ax = ax,  color = "b")
ax.set(xlabel = 'time', ylabel = 'tip')
ax2 = ax.twinx()
tips["total_bill"].plot(ax = ax2, color = "r")
ax2.set_ylabel = ("total_bill")
plt.title("example of double series figure")


plt 結合sns 制定圖形大小,及分組排.

fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(15,5))
#我們在這裡生成一個fig和三個axes,在下面繪圖的時候只需要用ax引數來指定特定的axes就可以了
sns.barplot(x='class', y='age', data=titanic, ax=ax1)
sns.countplot(x='sex', data=titanic, ax=ax2)
sns.distplot(titanic['age'], ax=ax3)
plt.show()