python之資料視覺化
阿新 • • 發佈:2018-11-19
各種圖形簡介
線性圖:plt.plot(x,y,*argv)
條形圖:plt.bar(x,y)x和y的長度應相等
水平條形圖:plt.barh(x,y)x軸成垂直,y軸水平而已
條形圖高度表示某專案內的資料個數,由於分組資料具有連續性,直方圖的各矩形通常是連續排列,而條形圖則是分開排列
直方圖:plt.hist(x),資料集種各資料出現的頻數/頻率圖
2d直方圖:plt.hist2d(x,y)
直方圖是用面積表示各組頻數的多少,矩形的高度表示每一組的頻數或頻率,寬度則表示各組的組距,其高度與寬度均有意義
餅狀圖:plt.pie(a,labels=list('abcde'),autopct='%.2f%%'),
散點圖:plt.scatter(x,y,*argv)
箱形圖:plt.boxplot(x)
詞雲圖:wordcloud.WordCloud(*argv)
根據詞頻和背景圖產生的圖
直方圖/分佈:sns.distplot()
製圖例項
In [79]: import numpy as np In [80]: import pandas as pd In [81]: import matplotlib.pyplot as plt In [82]: import wordcloud In [83]: import seaborn plt.rcParams['font.serif'] = ['KaiTi'] plt.rcParams['axes.unicode_minus'] = False names = ['mpg','cylinders','displacement','horsepower','weight','acceleration','model_year','origin','car_name'] df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", sep='\s+', names=names) In [154]: df['maker'] = df.car_name.apply(lambda x: x.split()[0]).str.title() ...: df['origin'] = df.origin.map({1: 'America', 2: 'Europe', 3: 'Asia'}) ...: df=df.applymap(lambda x: np.nan if x == '?' else x).dropna() ...: df['horsepower'] = df.horsepower.astype(float)
1.雲詞
names = ['mpg','cylinders','displacement','horsepower','weight','acceleration','model_year','origin','car_name'] df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", sep='\s+', names=names) word_dict=dict(df['car_name']) background=plt.imread('data/back.jpg') wc = wordcloud.WordCloud( background_color='white',#背景顏色 font_path='data/simhei.ttf',#字型 mask=background,#背景圖 max_words=1000,#最大的字數 max_font_size=100,#最大的字型 colormap='hsv',#色譜 random_state=100#隨機種子 ) wc.generate_from_frequencies(word_dict)#根據詞頻生成wordcloud plt.imshow(wc)#將wordcloud投影到plt上 plt.axis('off')#去除座標 plt.savefig('image/DesriptionWordCloud.png', dpi=400, bbox_inches='tight')
2.線性圖
In [100]: plt.plot(df.displacement.index,df.displacement.values)
Out[100]: [<matplotlib.lines.Line2D at 0x7f1378501c50>]
In [101]: plt.show()
3.條形圖
In [104]: plt.bar(df.displacement.index[:10],df.displacement.values[:10])
Out[104]: <BarContainer object of 10 artists>
In [105]: plt.show()
4.水平條形圖
In [106]: plt.barh(df.displacement.index[:10],df.displacement.values[:10])
Out[106]: <BarContainer object of 10 artists>
In [107]: plt.show()
5.直方圖
In [116]: a=pd.Series([1,2,3,1,2,3,3,4,2,1])
In [117]: plt.hist(a)
Out[117]:
(array([3., 0., 0., 3., 0., 0., 3., 0., 0., 1.]),
array([1. , 1.3, 1.6, 1.9, 2.2, 2.5, 2.8, 3.1, 3.4, 3.7, 4. ]),
<a list of 10 Patch objects>)
In [118]: plt.show()
6.餅狀圖
In [124]: data=[0.2,0.1,0.33,0.27,0.1]
In [125]: plt.pie(data,autopct='%.2f%%',labels=list('abcde'))
Out[125]:
([<matplotlib.patches.Wedge at 0x7f136fe5af28>,
<matplotlib.patches.Wedge at 0x7f136fe636a0>,
<matplotlib.patches.Wedge at 0x7f136fe63da0>,
<matplotlib.patches.Wedge at 0x7f136fe6c4e0>,
<matplotlib.patches.Wedge at 0x7f136fe6cbe0>],
[Text(0.889919,0.646564,'a'),
Text(-2.57474e-08,1.1,'b'),
Text(-1.07351,0.239957,'c'),
Text(0.103519,-1.09512,'d'),
Text(1.04616,-0.339919,'e')],
[Text(0.48541,0.352671,'20.00%'),
Text(-1.4044e-08,0.6,'10.00%'),
Text(-0.58555,0.130886,'33.00%'),
Text(0.0564651,-0.597337,'27.00%'),
Text(0.570634,-0.18541,'10.00%')])
In [126]: plt.show()
7.散點圖
In [130]: plt.scatter(df.displacement.index,df.displacement.values,color='red')
Out[130]: <matplotlib.collections.PathCollection at 0x7f136faf9470>
In [131]: plt.show()
8.箱形圖
In [147]: plt.boxplot(df.iloc[[1,2,3],[1,6]])
Out[147]:
{'whiskers': [<matplotlib.lines.Line2D at 0x7f136f0d9e48>,
<matplotlib.lines.Line2D at 0x7f136f0d9f60>,
<matplotlib.lines.Line2D at 0x7f136f0e8d68>,
<matplotlib.lines.Line2D at 0x7f136f0e8e80>,
<matplotlib.lines.Line2D at 0x7f136f0f8c88>,
<matplotlib.lines.Line2D at 0x7f136f0f8da0>],
'caps': [<matplotlib.lines.Line2D at 0x7f136f0e0748>,
<matplotlib.lines.Line2D at 0x7f136f0e0ba8>,
<matplotlib.lines.Line2D at 0x7f136f0f1668>,
<matplotlib.lines.Line2D at 0x7f136f0f1ac8>,
<matplotlib.lines.Line2D at 0x7f136f100588>,
<matplotlib.lines.Line2D at 0x7f136f1009e8>],
'boxes': [<matplotlib.lines.Line2D at 0x7f136f0d9898>,
<matplotlib.lines.Line2D at 0x7f136f0e8908>,
<matplotlib.lines.Line2D at 0x7f136f0f8828>],
'medians': [<matplotlib.lines.Line2D at 0x7f136f0e0cc0>,
<matplotlib.lines.Line2D at 0x7f136f0f1f28>,
<matplotlib.lines.Line2D at 0x7f136f100e48>],
'fliers': [<matplotlib.lines.Line2D at 0x7f136f0e84a8>,
<matplotlib.lines.Line2D at 0x7f136f0f83c8>,
<matplotlib.lines.Line2D at 0x7f136f100f60>],
'means': []}
In [148]: plt.show()
8.直方分佈圖
#方法一
In [150]: sns.distplot(df.displacement.values)
/home/zelin/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[150]: <matplotlib.axes._subplots.AxesSubplot at 0x7f136f0c7668>
#方法二
In [166]: g = sns.FacetGrid(df, col="origin")
...: g.map(sns.distplot, "mpg")
...:
...:
/home/zelin/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[166]: <seaborn.axisgrid.FacetGrid at 0x7f136e0e7f98>
In [151]: plt.show()
9.關係圖
#根據兩個維度繪製關係圖,即DataFrame的兩列
In [155]: sns.factorplot(data=df,x='model_year',y='mpg')
#根據三各維度繪製關係圖
sns.factorplot(data=df,x='model_year',y='mpg',col='origin')
#從折線圖切成柱狀圖
sns.factorplot(data=df, x="model_year", y="mpg", col="origin",kind='bar')
10.繪圖同時還做迴歸
In [168]: g = sns.FacetGrid(df, col="origin")
...: g.map(sns.regplot, "horsepower", "mpg")
...: plt.xlim(0, 250)#x軸刻度最大值
...: plt.ylim(0, 60)#y軸刻度最大值
...:
...:
/home/zelin/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[168]: (0, 60)
11.等高線圖
In [170]: df['tons'] = (df.weight/2000).astype(int)
...: g = sns.FacetGrid(df, col="origin", row="tons")
...: g.map(sns.kdeplot, "horsepower", "mpg")
...: plt.xlim(0, 250)
...: plt.ylim(0, 60)
12.按照兩個維度展開畫圖
g = sns.FacetGrid(df, col="origin", row="tons")
g.map(plt.hist, "mpg", bins=np.linspace(0, 50, 11))
13.多個維度兩兩組合繪圖
g = sns.pairplot(df[["mpg", "horsepower", "weight", "origin"]], hue="origin", diag_kind="hist")
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
14.組合繪圖時做迴歸
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]], hue="origin")
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.add_legend()
g.set(alpha=0.5)
15.聯合繪圖(等高圖)
sns.jointplot("mpg", "horsepower", data=df, kind='kde')
16.聯合繪圖加回歸(散點圖)
sns.jointplot("horsepower", "mpg", data=df, kind="reg")