機器學習sklearn19.0——線性迴歸演算法(應用案例)
阿新 • • 發佈:2019-01-07
一、sklearn中的線性迴歸的使用
二、線性迴歸——家庭用電預測
(1)時間與功率之間的關係
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:ZhengzhengLiu #線性迴歸——家庭用電預測(時間與功率之間的關係) #匯入模組 import sklearn from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame import time #匯入資料 path = "datas/household_power_consumption_1000.txt" data = pd.read_csv(path,sep=";") #檢視資料 print(data.head()) #檢視頭資訊,預設前5行的資料 #iloc進行行列切片只能用數字下標,取出X的原始值(所有行與一、二列的表示時間的資料) xdata = data.iloc[:,0:2] # print(xdata) y = data.iloc[:,2] #取出Y的資料(功率) #y = data["Global_active_power"] #等價上面一句 # print(ydata) #建立時間處理的函式 def time_format(x): #join方法取出的兩列資料用空格合併成一列 #用strptime方法將字串形式的時間轉換成時間元祖struct_time t = time.strptime(" ".join(x), "%d/%m/%Y %H:%M:%S") #日月年時分秒的格式 # 分別返回年月日時分秒並放入到一個元組中 return (t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec) #apply方法表示對xdata應用後面的轉換形式 x = xdata.apply(lambda x:pd.Series(time_format(x)),axis=1) print("======處理後的時間格式=======") print(x.head()) #劃分測試集和訓練集,random_state是隨機數發生器使用的種子 x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=1) # print("========x_train=======") # print(x_train) # print("========x_text=======") # print(x_test) # print("========y_train=======") # print(y_train) # print("========y_text=======") # print(y_test) #對資料的訓練集和測試集進行標準化 ss = StandardScaler() #fit做運算,計算標準化需要的均值和方差;transform是進行轉化 x_train = ss.fit_transform(x_train) x_test = ss.transform(x_test) #建立線性模型 lr = LinearRegression() lr.fit(x_train,y_train) #訓練 print("準確率:",lr.score(x_train,y_train)) #列印預測的決定係數,該值越接近於1越好 y_predict = lr.predict(x_test) #預測 # print(lr.score(x_text,y_predict)) #模型效果判斷 mse = np.average((y_predict-np.array(y_test))**2) rmse = np.sqrt(mse) print("均方誤差平方和:",mse) print("均方誤差平方和的平方根:",rmse) #模型的儲存與持久化 from sklearn.externals import joblib joblib.dump(ss,"data_ss.model") #將標準化模型儲存 joblib.dump(lr,"data_lr.model") #將訓練後的線性模型儲存 joblib.load("data_ss.model") #載入模型,會儲存該model檔案 joblib.load("data_lr.model") #載入模型 #預測值和實際值畫圖比較 #解決中文問題 mpl.rcParams["font.sans-serif"] = [u"SimHei"] mpl.rcParams["axes.unicode_minus"] = False t = np.arange(len(x_test)) plt.figure(facecolor="w") #建立畫布,facecolor為背景色,w是白色(預設) plt.plot(t,y_test,"r-",linewidth = 2,label = "真實值") plt.plot(t,y_predict,"g-",linewidth = 2,label = "預測值") plt.legend(loc = "upper right") #顯示圖例,設定圖例的位置 plt.title("線性迴歸預測時間和功率之間的關係",fontsize=20) plt.grid(b=True) plt.savefig("線性迴歸預測時間和功率之間的關係.png") #儲存圖片 plt.show() #執行結果: Date Time Global_active_power Global_reactive_power Voltage \ 0 16/12/2006 17:24:00 4.216 0.418 234.84 1 16/12/2006 17:25:00 5.360 0.436 233.63 2 16/12/2006 17:26:00 5.374 0.498 233.29 3 16/12/2006 17:27:00 5.388 0.502 233.74 4 16/12/2006 17:28:00 3.666 0.528 235.68 Global_intensity Sub_metering_1 Sub_metering_2 Sub_metering_3 0 18.4 0.0 1.0 17.0 1 23.0 0.0 1.0 16.0 2 23.0 0.0 2.0 17.0 3 23.0 0.0 1.0 17.0 4 15.8 0.0 1.0 17.0 ======處理後的時間格式======= 0 1 2 3 4 5 0 2006 12 16 17 24 0 1 2006 12 16 17 25 0 2 2006 12 16 17 26 0 3 2006 12 16 17 27 0 4 2006 12 16 17 28 0 準確率: 0.232255807137 均方誤差平方和: 1.18571330484 均方誤差平方和的平方根: 1.08890463533
重要的模組——官方網站解釋
1、train_test_split——劃分測試集和訓練集
(2)線性迴歸——家庭用電預測(功率與電流之間的關係)
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:ZhengzhengLiu #線性迴歸——家庭用電預測(功率與電壓之間的關係) #匯入模組 import sklearn from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame #匯入資料 path = "datas/household_power_consumption_1000.txt" data = pd.read_csv(path,sep=";") #iloc進行行列切片只能用數字下標,取出X的原始值(所有行與二、三列的表示功率的資料) x = data.iloc[:,2:4] y = data.iloc[:,5] #取出Y的資料(電流) #劃分訓練集與測試集,random_state是隨機數發生器使用的種子 x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=1) #對訓練集和測試集進行標準化 ss = StandardScaler() x_train = ss.fit_transform(x_train) x_test = ss.transform(x_test) #建立線性模型 lr = LinearRegression() lr.fit(x_train,y_train) #訓練 print("預測的決定係數R平方:",lr.score(x_train,y_train)) print("線性迴歸的估計係數:",lr.coef_) #列印線性迴歸的估計係數 print("線性模型的獨立項:",lr.intercept_) #列印線性模型的獨立項 y_predict = lr.predict(x_test) #預測 # print(y_predict) #模型效果判斷 mse = np.average((y_predict-np.array(y_test))**2) rmse = np.sqrt(mse) print("均方誤差平方和:",mse) print("均方誤差平方和的平方根:",rmse) #模型的儲存與持久化 from sklearn.externals import joblib joblib.dump(ss,"PI_data_ss.model") #將標準化模型儲存 joblib.dump(lr,"PI_data_lr.model") #將訓練後的線性模型儲存 joblib.load("PI_data_ss.model") #載入模型,會儲存該model檔案 joblib.load("PI_data_lr.model") #載入模型 #預測值和實際值畫圖比較 #解決中文問題 mpl.rcParams["font.sans-serif"] = [u"SimHei"] mpl.rcParams["axes.unicode_minus"] = False p = np.arange(len(x_test)) plt.figure(facecolor="w") #建立畫布,facecolor為背景色,w是白色 plt.plot(p,y_test,"r-",linewidth = 2,label = "真實值") plt.plot(p,y_predict,"g-",linewidth = 2,label = "預測值") plt.legend(loc = "upper right") #顯示圖例,設定圖例的位置 plt.title("線性迴歸預測功率和電流之間的關係",fontsize = 20) plt.grid(b=True) plt.savefig("線性迴歸預測功率和電流之間的關係.png") plt.show() #執行結果: 預測的決定係數R平方: 0.990719383392 線性迴歸的估計係數: [ 5.12959849 0.0589354 ] 線性模型的獨立項: 10.3485714286 均方誤差平方和: 0.193026891251 均方誤差平方和的平方根: 0.439348257366
(3)線性迴歸——家庭用電預測(時間與電壓之間的多項式關係)
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:ZhengzhengLiu #線性迴歸——家庭用電預測(時間與電壓之間的多項式關係) import sklearn from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression,Lasso,Ridge from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import PolynomialFeatures #多項式 from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV #帶有交叉驗證的網格搜尋 import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd import time #建立時間處理的函式 def time_format(x): #join方法取出的兩列資料用空格合併成一列 #用strptime方法將字串形式的時間轉換成時間元祖struct_time t = time.strptime(" ".join(x), "%d/%m/%Y %H:%M:%S") #日月年時分秒的格式 # 分別返回年月日時分秒並放入到一個元組中 return (t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec) #解決中文問題 mpl.rcParams["font.sans-serif"] = [u"SimHei"] mpl.rcParams["axes.unicode_minus"] = False #匯入資料 path = "datas/household_power_consumption_1000.txt" data = pd.read_csv(path,sep=";",low_memory=False) #日期,時間,有功功率,無功功率,電壓,電流,廚房用電功率,洗衣服用電功率,熱水器用電功率 names = ['Date','Time','Global_active_power','Global_reactive_power','Voltage','Global_intensity','Sub_metering_1','Sub_metering_2','Sub_metering_3'] #異常資料處理(異常資料過濾) new_data = data.replace("?",np.nan) datas = new_data.dropna(axis=0, how="any") #只要有資料為空就進行行刪除操作 #時間與電壓之間的關係(Liner--多項式) models = [ Pipeline([ ('Poly',PolynomialFeatures()), ('Linear',LinearRegression(fit_intercept=False)) ]) ] model = models[0] #獲取x和y變數,並將時間轉換成數值連續型變數 xdata = data.iloc[:,0:2] x = xdata.apply(lambda x:pd.Series(time_format(x)),axis=1) #apply方法表示對xdata應用後面的轉換形式 y = data.iloc[:,4] #取出Y的資料(電壓) #劃分測試集和訓練集,random_state是隨機數發生器使用的種子 x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0) #對資料的訓練集和測試集進行標準化 ss = StandardScaler() #fit做運算,計算標準化需要的均值和方差;transform是進行轉化 x_train = ss.fit_transform(x_train) x_test = ss.transform(x_test) #模型訓練 t = np.arange(len(x_test)) N = 5 d_pool = np.arange(1,N,1) #階 m = d_pool.size clrs = [] #顏色 for c in np.linspace(16711680,255,m): clrs.append('#%06x' % int(c)) line_width = 3 plt.figure(figsize=(12,6),facecolor='w') #建立繪圖視窗,設定大小和背景顏色 for i,d in enumerate(d_pool): plt.subplot(N-1,1,i+1) plt.plot(t,y_test,"r-",label = "真實值",ms = 10,zorder=N) model.set_params(Poly__degree=d) #set_params函式對Pipeline中的某個模型設定引數 model.fit(x_train,y_train) lin = model.get_params('Linear')['Linear'] output = u"%d階,係數為:" % d print(output, lin.coef_.ravel()) # ravel方法將陣列拉直,多維陣列將為一維 y_hat = model.predict(x_test) s = model.score(x_test,y_test) z = N-1 if (d==2) else 0 label = u"%d階,準確率為:%.3f" % (d, s) plt.plot(t, y_hat, color=clrs[i], lw=line_width, alpha=0.75, label=label, zorder=z) plt.legend(loc="upper left") plt.grid(True) plt.ylabel(u"%d階結果" % d, fontsize=12) plt.legend(loc="lower right") plt.suptitle(u"線性迴歸預測時間與電壓之間的多項式關係",fontsize=20) plt.grid(True) plt.show() #執行結果: 1階,係數為: [ 2.39902814e+02 0.00000000e+00 1.11022302e-16 4.23207026e+00 1.12142760e+00 2.02166226e-01 0.00000000e+00] 2階,係數為: [ -1.11539792e+13 2.92968750e-03 -6.83593750e-03 4.14912590e+12 3.23876953e+00 3.21350098e-01 7.99560547e-03 2.44140625e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.11539792e+13 -2.54700928e+01 -6.71875000e-01 0.00000000e+00 -1.03660889e+01 -5.82519531e-01 0.00000000e+00 -8.44726562e-02 0.00000000e+00 0.00000000e+00] 3階,係數為: [ -1.63002196e+13 -1.06171697e+12 -2.19799440e+13 2.68220670e+13 -2.01883991e+13 1.17965159e+13 4.59169674e+12 1.33751216e+13 -7.30563919e+11 2.49935263e+12 -3.41816728e+12 4.70268465e+12 3.41993070e+12 4.34249277e+12 -2.36933344e+12 1.99575089e+12 1.66261330e+12 0.00000000e+00 8.57830362e+12 7.50980509e+12 -4.38814065e+12 0.00000000e+00 7.84667969e-01 1.88476562e-01 0.00000000e+00 -4.19921875e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 -2.07586109e+13 2.01883991e+13 -1.17965159e+13 0.00000000e+00 -7.69610596e+00 -5.44921875e-01 0.00000000e+00 -8.59375000e-02 0.00000000e+00 0.00000000e+00 3.87646484e+00 2.26562500e-01 0.00000000e+00 -1.87500000e-01 0.00000000e+00 0.00000000e+00 -2.16796875e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00] 4階,係數為: [ -6.00682517e+11 -3.29239328e+12 1.86141729e+13 6.93021510e+11 2.69953823e+12 -6.17642085e+11 -2.70513938e+12 -1.02422258e+12 1.91156682e+12 -7.75735474e+11 -1.00006158e+12 -3.39208470e+11 5.77229194e+11 4.27298991e+11 -1.07091942e+12 4.84607019e+11 2.02165069e+12 8.34755567e+10 8.99308537e+12 -9.15830049e+12 -5.29757907e+11 -8.45993774e+10 9.23510075e+12 -6.18929887e+11 3.35305892e+10 8.50306093e+12 -7.26209570e+10 -1.36507996e+10 1.00507638e+10 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 -3.65641060e+12 3.33677698e+11 9.00170118e+11 0.00000000e+00 -3.43532967e+12 2.30233352e+11 0.00000000e+00 -3.16302099e+12 0.00000000e+00 0.00000000e+00 -1.65683594e+01 -1.81250000e+00 0.00000000e+00 -3.91601562e-01 0.00000000e+00 0.00000000e+00 -2.19726562e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 -8.56707860e+12 8.15410963e+12 7.59512214e+11 0.00000000e+00 -9.23510075e+12 6.18929887e+11 0.00000000e+00 -8.50306093e+12 0.00000000e+00 0.00000000e+00 -3.61572266e-01 -8.24658203e+00 0.00000000e+00 -7.02392578e-01 0.00000000e+00 0.00000000e+00 -7.81250000e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00 -7.55639648e+00 -3.84765625e+00 0.00000000e+00 -5.38574219e-01 0.00000000e+00 0.00000000e+00 1.22070312e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.22070312e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
4、過擬合樣例程式碼以及幾種演算法的多項式過擬合比較
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:ZhengzhengLiu
#過擬合樣例程式碼
import sklearn
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression,LassoCV,RidgeCV,ElasticNetCV
from sklearn.linear_model.coordinate_descent import ConvergenceWarning
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
#解決中文問題
mpl.rcParams["font.sans-serif"] = [u"SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
np.random.seed(100) #seed() 設定生成隨機數用的整數起始值
np.set_printoptions(linewidth=1000,suppress=True)
N =10
x = np.linspace(0,6,N)+np.random.randn(N)
y = 1.8*x**3+x**2-14*x-7+np.random.randn(N)
x.shape = -1,1 #轉完成一列
y.shape = -1,1
models = [
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',LinearRegression(fit_intercept=False))
]),
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',RidgeCV(alphas=np.logspace(-3,2,50),fit_intercept=False))
]),
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',LassoCV(alphas=np.logspace(-3,2,50),fit_intercept=False))
]),
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',ElasticNetCV(alphas=np.logspace(-3,2,50),l1_ratio=[.1,.5,.7,.9,.95,1],fit_intercept=False))
])
]
plt.figure(facecolor='w') #建立畫布
degree = np.arange(1,N,4) #階數(一階,五階,九階)
dm = degree.size
colors = []
for c in np.linspace(16711680,255,dm):
c = c.astype(int)
colors.append('#%06x' % c)
model = models[0]
for i,d in enumerate(degree):
plt.subplot(int(np.ceil(dm/2.0)),2,i+1)
plt.plot(x,y,'ro',ms=10,zorder=N)
model.set_params(Poly__degree=d)
model.fit(x,y.ravel())
lin = model.get_params('Linear')['Linear']
output = u'%d階,係數為' %d
print(output,lin.coef_.ravel())
x_hat = np.linspace(x.min(),x.max(),num=100)
x_hat.shape = -1,1
y_hat = model.predict(x_hat)
s = model.score(x,y)
z = N-1 if (d==2) else 0
label = u"%d階,準確率為:%.3f" % (d, s)
plt.plot(x_hat, y_hat, color=colors[i], lw=2, alpha=0.75, label=label, zorder=z)
plt.legend(loc="upper left")
plt.grid(True)
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.tight_layout(1,rect=(0,0,1,0.95))
plt.suptitle(u'線性迴歸過擬合顯示',fontsize=22)
plt.savefig('線性迴歸過擬合顯示.png')
plt.show()
#執行結果:
1階,係數為 [-44.14102611 40.05964256]
5階,係數為 [ -5.60899679 -14.80109301 0.75014858 2.11170671 -0.07724668 0.00566633]
9階,係數為 [-2465.5996245 6108.67810881 -5112.02743837 974.75680049 1078.90344647 -829.50835134 266.13413535 -45.7177359 4.11585669 -0.15281174]
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:ZhengzhengLiu
#幾種演算法的多項式過擬合比較
import sklearn
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression,LassoCV,RidgeCV,ElasticNetCV
from sklearn.linear_model.coordinate_descent import ConvergenceWarning
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
#解決中文問題
mpl.rcParams["font.sans-serif"] = [u"SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
np.random.seed(100) #seed() 設定生成隨機數用的整數起始值
np.set_printoptions(linewidth=1000,suppress=True)
N =10
x = np.linspace(0,6,N)+np.random.randn(N)
y = 1.8*x**3+x**2-14*x-7+np.random.randn(N)
x.shape = -1,1 #轉完成一列
y.shape = -1,1
models = [
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',LinearRegression(fit_intercept=False))
]),
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',RidgeCV(alphas=np.logspace(-3,2,50),fit_intercept=False))
]),
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',LassoCV(alphas=np.logspace(-3,2,50),fit_intercept=False))
]),
Pipeline([
('Poly',PolynomialFeatures()),
('Linear',ElasticNetCV(alphas=np.logspace(-3,2,50),l1_ratio=[.1,.5,.7,.9,.95,1],fit_intercept=False))
])
]
plt.figure(facecolor='w')
degree = np.arange(1, N, 2) # 階
dm = degree.size
colors = [] # 顏色
for c in np.linspace(16711680, 255, dm):
c = c.astype(int)
colors.append('#%06x' % c)
titles = [u'線性迴歸', u'Ridge迴歸', u'Lasso迴歸', u'ElasticNet']
for t in range(4):
model = models[t]
plt.subplot(2, 2, t + 1)
plt.plot(x, y, 'ro', ms=10, zorder=N)
for i, d in enumerate(degree):
model.set_params(Poly__degree=d)
model.fit(x, y.ravel())
lin = model.get_params('Linear')['Linear']
output = u'%s:%d階,係數為:' % (titles[t], d)
print(output, lin.coef_.ravel())
x_hat = np.linspace(x.min(), x.max(), num=100)
x_hat.shape = -1, 1
y_hat = model.predict(x_hat)
s = model.score(x, y)
z = N - 1 if (d == 2) else 0
label = u'%d階, 正確率=%.3f' % (d, s)
plt.plot(x_hat, y_hat, color=colors[i], lw=2, alpha=0.75, label=label, zorder=z)
plt.legend(loc='upper left')
plt.grid(True)
plt.title(titles[t])
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.tight_layout(1, rect=(0, 0, 1, 0.95))
plt.suptitle(u'各種不同線性迴歸過擬合顯示', fontsize=22)
plt.savefig('各種不同線性迴歸過擬合顯示.png')
plt.show()
#執行結果:
線性迴歸:1階,係數為: [-44.14102611 40.05964256]
線性迴歸:3階,係數為: [ -6.80525963 -13.743068 0.93453895 1.79844791]
線性迴歸:5階,係數為: [ -5.60899679 -14.80109301 0.75014858 2.11170671 -0.07724668 0.00566633]
線性迴歸:7階,係數為: [-41.70721173 52.3857053 -29.56451339 -7.6632283 12.07162703 -3.86969096 0.53286096 -0.02725536]
線性迴歸:9階,係數為: [-2465.59964345 6108.67815659 -5112.02747906 974.75680883 1078.9034548 -829.50835799 266.13413753 -45.71773628 4.11585673 -0.15281174]
Ridge迴歸:1階,係數為: [ -6.71593385 29.79090057]
Ridge迴歸:3階,係數為: [ -6.7819845 -13.73679293 0.92827639 1.79920954]
Ridge迴歸:5階,係數為: [-0.82920155 -1.07244754 -1.41803017 -0.93057536 0.88319116 -0.07073168]
Ridge迴歸:7階,係數為: [-1.62586368 -2.18512108 -1.82690987 -2.27495708 0.98685071 0.30551091 -0.10988434 0.00846908]
Ridge迴歸:9階,係數為: [-10.50566712 -6.12564342 -1.96421973 0.80200162 0.59148105 -0.23358229 0.20297054 -0.08109453 0.01327453 -0.00061892]
Lasso迴歸:1階,係數為: [ -0. 29.27359177]
Lasso迴歸:3階,係數為: [ -6.7688595 -13.75928024 0.93989323 1.79778598]
Lasso迴歸:5階,係數為: [ -0. -12.00109345 -0.50746853 1.74395236 0.07086952 -0.00583605]
Lasso迴歸:7階,係數為: [-0. -0. -0. -0.08083315 0.19550746 0.03066137 -0.00020584 -0.00046928]
Lasso迴歸:9階,係數為: [-0. -0. -0. -0. 0.04439727 0.05587113 0.00109023 -0.00021498 -0.00004479 -0.00000674]
ElasticNet:1階,係數為: [-13.22089654 32.08359338]
ElasticNet:3階,係數為: [ -6.7688595 -13.75928024 0.93989323 1.79778598]
ElasticNet:5階,係數為: [-1.65823671 -5.20271875 -1.26488859 0.94503683 0.2605984 -0.01683786]
ElasticNet:7階,係數為: [-0. -0. -0. -0.15812511 0.22150166 0.02955069 -0.00040066 -0.00046568]
ElasticNet:9階,係數為: [-0. -0. -0. -0. 0.05255118 0.05364699 0.00111995 -0.00020596 -0.00004365 -0.00000667]