python模擬概率論中偏度和峰度計算
阿新 • • 發佈:2019-02-10
在概率學中我們用偏度和峰度去刻畫分佈的情況:
偏度描述的是分佈的對稱性程度,如上面,右偏表示在u值的右側分佈佔多數,左偏則反向,並且通過陰影的面積去刻畫概率。而峰度是描述分佈的最高值的情況,在常用情況下,減去3的原因在於正態分佈的超值峰度恰好為3。
下面使用python代入公式計算和呼叫函式庫計算進行比較:
#!/usr/bin/python #coding:utf8 #coding=utf8 #encoding:utf8 #encoding=utf8 #_*_ coding:utf8 _*_ # -*- coding:utf-8 -*- import numpy as np from scipy import stats import math import matplotlib as mpl import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm def calc_statistics(x): n = x.shape[0] # 樣本個數 # 手動計算 # 分別表示各個k階矩 m = 0 m2 = 0 m3 = 0 m4 = 0 for t in x: m += t m2 += t*t m3 += t**3 m4 += t**4 m /= n m2 /= n m3 /= n m4 /= n # 代入公式求個值 mu = m sigma = np.sqrt(m2 - mu*mu) skew = (m3 - 3*mu*m2 + 2*mu**3) / sigma**3 kurtosis = (m4 - 4*mu*m3 + 6*mu*mu*m2 - 4*mu**3*mu + mu**4) / sigma**4 - 3 print('手動計算均值、標準差、偏度、峰度:', mu, sigma, skew, kurtosis) # 使用系統函式驗證 mu = np.mean(x, axis=0) sigma = np.std(x, axis=0) skew = stats.skew(x) kurtosis = stats.kurtosis(x) return mu, sigma, skew, kurtosis if __name__ == '__main__': d = np.random.randn(100000) print(d) mu, sigma, skew, kurtosis = calc_statistics(d) print('函式庫計算均值、標準差、偏度、峰度:', mu, sigma, skew, kurtosis) # 一維直方圖 mpl.rcParams[u'font.sans-serif'] = 'SimHei' mpl.rcParams[u'axes.unicode_minus'] = False y1, x1, dummy = plt.hist(d, bins=50, normed=True, color='g', alpha=0.75) t = np.arange(x1.min(), x1.max(), 0.05) y = np.exp(-t**2 / 2) / math.sqrt(2*math.pi) plt.plot(t, y, 'r-', lw=2) plt.title(u'高斯分佈,樣本個數:%d' % d.shape[0]) plt.grid(True) plt.show() d = np.random.randn(100000, 2) mu, sigma, skew, kurtosis = calc_statistics(d) print('函式庫計算均值、標準差、偏度、峰度:', mu, sigma, skew, kurtosis) # 二維影象 N = 30 density, edges = np.histogramdd(d, bins=[N, N]) print('樣本總數:', np.sum(density)) density /= density.max() x = y = np.arange(N) # 高斯分佈 t = np.meshgrid(x, y) fig = plt.figure(facecolor='w') ax = fig.add_subplot(111, projection='3d') ax.scatter(t[0], t[1], density, c='r', s=15*density, marker='o', depthshade=True) ax.plot_surface(t[0], t[1], density, cmap=cm.Accent, rstride=2, cstride=2, alpha=0.9, lw=0.75) ax.set_xlabel(u'X') ax.set_ylabel(u'Y') ax.set_zlabel(u'Z') plt.title(u'二元高斯分佈,樣本個數:%d' % d.shape[0], fontsize=20) plt.tight_layout(0.1) plt.show()