1. 程式人生 > >吳裕雄 數據挖掘與分析案例實戰(7)——嶺回歸與LASSO回歸模型

吳裕雄 數據挖掘與分析案例實戰(7)——嶺回歸與LASSO回歸模型

Y軸 otl error 處理 回歸 models 關系 err idg

# 導入第三方模塊
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import Ridge,RidgeCV

# 讀取糖尿病數據集
diabetes = pd.read_excel(r‘F:\\python_Data_analysis_and_mining\\08\\diabetes.xlsx‘, sep = ‘‘)
print(diabetes.shape)
print(diabetes.head())
# 構造自變量(剔除患者性別、年齡和因變量)
predictors = diabetes.columns[2:-1]
# 將數據集拆分為訓練集和測試集
X_train, X_test, y_train, y_test = model_selection.train_test_split(diabetes[predictors], diabetes[‘Y‘],test_size = 0.2, random_state = 1234 )
# 構造不同的Lambda值
Lambdas = np.logspace(-5, 2, 200)
print(Lambdas.shape)
# 構造空列表,用於存儲模型的偏回歸系數
ridge_cofficients = []
# 循環叠代不同的Lambda值
for Lambda in Lambdas:
ridge = Ridge(alpha = Lambda, normalize=True)
ridge.fit(X_train, y_train)
ridge_cofficients.append(ridge.coef_)
print(np.shape(ridge_cofficients))
# 繪制Lambda與回歸系數的關系
# 中文亂碼和坐標軸負號的處理
plt.rcParams[‘font.sans-serif‘] = [‘Microsoft YaHei‘]
plt.rcParams[‘axes.unicode_minus‘] = False
# 設置繪圖風格
plt.style.use(‘ggplot‘)
plt.plot(Lambdas, ridge_cofficients)
# 對x軸作對數變換
plt.xscale(‘log‘)
# 設置折線圖x軸和y軸標簽
plt.xlabel(‘Lambda‘)
plt.ylabel(‘Cofficients‘)
# 圖形顯示
plt.show()

技術分享圖片

# 嶺回歸模型的交叉驗證
# 設置交叉驗證的參數,對於每一個Lambda值,都執行10重交叉驗證
ridge_cv = RidgeCV(alphas = Lambdas, normalize=True, scoring=‘neg_mean_squared_error‘, cv = 10)
print(ridge_cv)
# 模型擬合
ridge_cv.fit(X_train, y_train)
# 返回最佳的lambda值
ridge_best_Lambda = ridge_cv.alpha_
print(ridge_best_Lambda)

技術分享圖片

# 導入第三方包中的函數
from sklearn.metrics import mean_squared_error

# 基於最佳的Lambda值建模
ridge = Ridge(alpha = ridge_best_Lambda, normalize=True)
ridge.fit(X_train, y_train)
# 返回嶺回歸系數
pd.Series(index = [‘Intercept‘] + X_train.columns.tolist(),data = [ridge.intercept_] + ridge.coef_.tolist())
# 預測
ridge_predict = ridge.predict(X_test)
# 預測效果驗證
RMSE = np.sqrt(mean_squared_error(y_test,ridge_predict))
print(RMSE)

技術分享圖片

# 導入第三方模塊中的函數
from sklearn.linear_model import Lasso,LassoCV

# 構造空列表,用於存儲模型的偏回歸系數
lasso_cofficients = []
for Lambda in Lambdas:
lasso = Lasso(alpha = Lambda, normalize=True, max_iter=10000)
lasso.fit(X_train, y_train)
lasso_cofficients.append(lasso.coef_)

# 繪制Lambda與回歸系數的關系
plt.plot(Lambdas, lasso_cofficients)
# 對x軸作對數變換
plt.xscale(‘log‘)
# 設置折線圖x軸和y軸標簽
plt.xlabel(‘Lambda‘)
plt.ylabel(‘Cofficients‘)
# 顯示圖形
plt.show()

# LASSO回歸模型的交叉驗證
lasso_cv = LassoCV(alphas = Lambdas, normalize=True, cv = 10, max_iter=10000)
lasso_cv.fit(X_train, y_train)
# 輸出最佳的lambda值
lasso_best_alpha = lasso_cv.alpha_
print(lasso_best_alpha)
# 基於最佳的lambda值建模
lasso = Lasso(alpha = lasso_best_alpha, normalize=True, max_iter=10000)
lasso.fit(X_train, y_train)
# 返回LASSO回歸的系數
pd.Series(index = [‘Intercept‘] + X_train.columns.tolist(),data = [lasso.intercept_] + lasso.coef_.tolist())

# 預測
lasso_predict = lasso.predict(X_test)
# 預測效果驗證
RMSE = np.sqrt(mean_squared_error(y_test,lasso_predict))
print(RMSE)

技術分享圖片

# 導入第三方模塊
from statsmodels import api as sms

# 為自變量X添加常數列1,用於擬合截距項
X_train2 = sms.add_constant(X_train)
X_test2 = sms.add_constant(X_test)

# 構建多元線性回歸模型
linear = sms.formula.OLS(y_train, X_train2).fit()
# 返回線性回歸模型的系數
print(linear.params)

# 模型的預測
linear_predict = linear.predict(X_test2)
# 預測效果驗證
RMSE = np.sqrt(mean_squared_error(y_test,linear_predict))
print(RMSE)

技術分享圖片

吳裕雄 數據挖掘與分析案例實戰(7)——嶺回歸與LASSO回歸模型