1. 程式人生 > >回歸分析特征選擇(包括Stepwise算法) python 實現

回歸分析特征選擇(包括Stepwise算法) python 實現

排序 moved lis ack adding += tick nump [1]

# -*- coding: utf-8 -*-
"""
Created on Sat Aug 18 16:23:17 2018

@author: acadsoc
"""
import scipy
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import cross_val_predict, cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, r2_score
from sklearn.grid_search import RandomizedSearchCV
from sklearn.linear_model import Lasso, LassoCV, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from statsmodels.formula import api as smf
import sys
import os

‘‘‘多元線性回歸特征選擇類‘‘‘
class featureSelection():
def __init__(self, random_state=None):
self.random_state = random_state # 隨機種子

‘‘‘ElasticNet篩選重要變量‘‘‘
def elasticNetRandomSearch(self, df, cv=10, n_iter=1000, n_jobs=-1, normalize=True):
# ElasticNet隨機搜索,搜索最佳模型
if normalize: # 如果需要標準化數據
df_std = StandardScaler().fit_transform(df)
df = pd.DataFrame(df_std, columns=df.columns, index=df.index)

X = df.iloc[:, 1:]
y = df.iloc[:, 0]
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
eln = ElasticNet()
param_rs = {‘alpha‘ : scipy.stats.expon(loc=0, scale=1), # 模型需搜索的參數
‘l1_ratio‘ : scipy.stats.uniform(loc=0, scale=1)}

elasticnet_rs = RandomizedSearchCV(eln, # 建立隨機搜索
param_distributions=param_rs,
scoring=‘r2‘,
cv=cv,
n_iter=n_iter,
n_jobs=n_jobs)
elasticnet_rs.fit(X, y) # 模型訓練
# 用最佳模型進行變量篩選變量、系數
self.elasticnet_rs_best = ElasticNet(alpha=elasticnet_rs.best_params_[‘alpha‘],
l1_ratio = elasticnet_rs.best_params_[‘l1_ratio‘])
self.elasticnet_rs_best.fit(X, y)
coef = pd.DataFrame(self.elasticnet_rs_best.coef_, index=df.columns[1:],
columns=[‘系數‘]).sort_values(by=‘系數‘, axis=0, ascending=False)
self.elasticnet_rs_coef_selected_ = coef[coef > 0].dropna(axis=1).columns
self.elasticnet_rs_R2_ = 1 - np.mean((y.values.reshape(-1,1) -
self.elasticnet_rs_best.predict(X).reshape(-1,1)) ** 2) / np.var(y)
return self

‘‘‘繪制ElasticNet正則化效果圖‘‘‘
def elasticNetFeatureSelectPlot(self, df, l1_ratio=.7, normalize=True,
plot_width=12, plot_height=5, xlim_exp=[-5, 1], ylim=[-1, 1]):
if normalize: # 如果需要標準化數據
df_std = StandardScaler().fit_transform(df)
df = pd.DataFrame(df_std, columns=df.columns, index=df.index)

X = df.iloc[:, 1:]
y = df.iloc[:, 0]

plt.figure(figsize=(plot_width, plot_height))
ax = plt.subplot(111)
colors = [‘blue‘, ‘green‘, ‘red‘, ‘cyan‘, ‘magenta‘, ‘yellow‘, ‘black‘, ‘pink‘, ‘lightgreen‘,
‘lightblue‘, ‘gray‘, ‘indigo‘, ‘orange‘, ‘seagreen‘, ‘gold‘, ‘purple‘]
weights, params = [], []
for alpha in np.arange(-5, 1, 0.1, dtype=float):
eln = ElasticNet(alpha=10 ** alpha, l1_ratio=l1_ratio, random_state=123)
eln.fit(X, y)
weights.append(eln.coef_)
params.append(10 ** alpha)

weights = np.array(weights)
for column, color in zip(range(weights.shape[1]), colors):
plt.plot(params, weights[:, column], label=df.columns[column + 1], color=color)

plt.axhline(0, color=‘black‘, linestyle=‘--‘, linewidth=3)
plt.xlim(10 ** xlim_exp[0], 10 ** xlim_exp[1])
plt.ylim(ylim)
plt.title(‘彈性網絡變量選擇圖‘, fontproperties=zh_font)
plt.ylabel(‘權重系數‘, fontproperties=zh_font)
plt.xlabel(‘$alpha$‘)
plt.xscale(‘log‘)
plt.xticks(10 ** np.arange(xlim_exp[0], xlim_exp[1], dtype=float),
10 ** np.arange(xlim_exp[0], xlim_exp[1], dtype=float))
plt.legend(loc=‘best‘, prop=zh_font)
ax.legend(prop=zh_font)
#plt.grid()
plt.show()
return self

‘‘‘ElasticNet回歸分析‘‘‘
def elasticNet(self, df, feat_selected=None, alpha=1, l1_ratio=.7, fit_intercept=True, normalize=False):
if normalize: # 如果需要標準化數據
df_std = StandardScaler().fit_transform(df)
df = pd.DataFrame(df_std, columns=df.columns, index=df.index)

if feat_selected is not None: # 如果輸入了選擇好的變量
X = df[feat_selected]
else:
X = df.iloc[:, 1:]
y = df.iloc[:, 0]

self.eln = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=fit_intercept)
self.eln.fit(X, y) # 模型訓練

# 變量、系數,R2
self.elasticnet_coef_ = pd.DataFrame(self.eln.coef_, index = X.columns,
columns=[‘系數‘]).sort_values(by=‘系數‘, ascending=False)
self.elasticnet_coef_selected_ = self.elasticnet_coef_[self.elasticnet_coef_ > 0].dropna(axis=0).index
self.elasticnet_R2_ = 1 - np.mean((y.values.reshape(-1,1) -
self.eln.predict(X).reshape(-1,1)) ** 2) / np.var(y)
return self

‘‘‘畫特征條形圖(縱向排列)‘‘‘
def featureBarhPlot(self, df_coef, figsize=(12, 6)):
# 輸入的特征數據df_coef需是數據框
coef = df_coef.sort_values(by=df_coef.columns[0], axis=0, ascending=True)
plt.figure(figsize=figsize)
y_label = np.arange(len(coef))
plt.barh(y_label, coef.iloc[:, 0])
plt.yticks(y_label, coef.index, fontproperties=zh_font)

for i in np.arange(len(coef)):
if coef.iloc[i, 0] >= 0:
dist = 0.003 * coef.iloc[:, 0].max()
else:
dist = -0.02 * coef.iloc[:, 0].max()
plt.text(coef.iloc[i, 0] + dist, i - 0.2, ‘%.3f‘ % coef.iloc[i, 0], fontproperties=zh_font)

# plt.grid()
plt.ylabel(‘特征‘, fontproperties=zh_font)
plt.xlabel(‘特征系數‘, fontproperties=zh_font)
plt.title(‘特征系數條形圖‘, fontproperties=zh_font)
plt.legend(prop=zh_font)
plt.show()

‘‘‘RandomForest選擇特征(累積重要性大於等於0.85)‘‘‘
def randomForestRandomSearch(self, df, cv=10, n_iter=100, n_jobs=-1, normalize=True):
# ElasticNet隨機搜索,搜索最佳模型
if normalize: # 如果需要標準化數據
df_std = StandardScaler().fit_transform(df)
df = pd.DataFrame(df_std, columns=df.columns, index=df.index)

X = df.iloc[:, 1:]
y = df.iloc[:, 0]
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
rf = RandomForestRegressor()
param_rs = {‘n_estimators‘ : np.arange(1, 500), # 模型需搜索的參數
‘max_features‘ : np.arange(1, X.shape[1] + 1)}

rf_rs = RandomizedSearchCV(rf, # 建立隨機搜索
param_distributions=param_rs,
scoring=‘r2‘,
cv=cv,
n_iter=n_iter,
n_jobs=n_jobs)
rf_rs.fit(X, y) # 模型訓練
# 用最佳模型進行變量篩選變量、系數
self.rf_rs_best = RandomForestRegressor(n_estimators=rf_rs.best_params_[‘n_estimators‘],
max_features=rf_rs.best_params_[‘max_features‘])
self.rf_rs_best.fit(X, y)
self.rf_rs_feat_impo_ = pd.DataFrame(self.rf_rs_best.feature_importances_, index = df.columns[1:],
columns=[‘系數‘]).sort_values(by=‘系數‘, axis=0, ascending=False)

n = 0
for i, v in enumerate(self.rf_rs_feat_impo_.values.cumsum()):
if v >= 0.85:
n = i
break

self.rf_rs_feat_selected_ = self.rf_rs_feat_impo_.index[:n+1]
self.rf_rs_R2_ = 1 - np.mean((y.values.reshape(-1,1) - \
self.rf_rs_best.predict(X).reshape(-1,1)) ** 2) / np.var(y)
return self

‘‘‘Randomforest回歸分析‘‘‘
def randomForest(self, df, feat_selected=None, impo_cum_threshold=.85,
n_estimators=100, max_features=‘auto‘, normalize=False):
if normalize: # 如果需要標準化數據
df_std = StandardScaler().fit_transform(df)
df = pd.DataFrame(df_std, columns=df.columns, index=df.index)

if feat_selected is not None: # 如果輸入了選擇好的變量
X = df[feat_selected]
else:
X = df.iloc[:, 1:]
y = df.iloc[:, 0]

self.rf = RandomForestRegressor(n_estimators=n_estimators, max_features=max_features)
self.rf.fit(X, y) # 模型訓練

# 變量、系數,R2
self.rf_feat_impo_ = pd.DataFrame(self.rf.feature_importances_, index = X.columns,
columns=[‘系數‘]).sort_values(by=‘系數‘, ascending=False)

n = 0
for i, v in enumerate(self.rf_feat_impo_.values.cumsum()):
if v >= impo_cum_threshold:
n = i
break

self.rf_feat_selected_ = self.rf_feat_impo_.index[:n+1]
self.rf_R2_ = 1 - np.mean((y.values.reshape(-1,1) - self.rf.predict(X).reshape(-1,1)) ** 2) / np.var(y)
return self

‘‘‘逐步回歸‘‘‘
def stepwise(self, df, response, intercept=True, criterion=‘bic‘, f_pvalue_enter=.05, p_value_enter=.05,
direction=‘backward‘, show_step=True, val_enter=None, val_remove=None, **kw):
criterion_list = [‘bic‘, ‘aic‘, ‘ssr‘, ‘rsquared‘, ‘rsquared_adj‘]
if criterion not in criterion_list:
raise IOError(‘請輸入正確的criterion, 必須是以下內容之一:‘, ‘\n‘, criterion_list)

direction_list = [‘backward‘, ‘forward‘, ‘both‘]
if direction not in direction_list:
raise IOError(‘請輸入正確的direction, 必須是以下內容之一:‘, ‘\n‘, direction_list)

# 默認p_enter參數
p_enter = {‘bic‘:0.0, ‘aic‘:0.0, ‘ssr‘:0.05, ‘rsquared‘:0.05, ‘rsquared_adj‘:-0.05}
if val_enter: # 如果函數中對p_remove相應key傳參,則變更該參數
p_enter[criterion] = val_enter

# 默認p_remove參數
p_remove = {‘bic‘:0.01, ‘aic‘:0.01, ‘ssr‘:0.1, ‘rsquared‘:0.05, ‘rsquared_adj‘:-0.05}
if val_remove: # 如果函數中對p_remove相應key傳參,則變更該參數
p_remove[criterion] = val_remove

###### forward ######
if direction == ‘forward‘:
remaining = list(df.columns) # 自變量集合
remaining.remove(response)
selected = [] # 初始化選入模型的變量列表
# 初始化當前評分,最優新評分
if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, remaining[0])
else:
formula = "{} ~ {} - 1".format(response, remaining[0])

result = smf.ols(formula, df).fit() # 最小二乘法回歸模型擬合
current_score = eval(‘result.‘ + criterion)
best_new_score = eval(‘result.‘ + criterion)

if show_step:
print(‘\nstepwise starting:\n‘)
# 當變量未剔除完,並且當前評分更新時進行循環
while remaining and (current_score == best_new_score):
scores_with_candidates = [] # 初始化變量以及其評分列表
for candidate in remaining: # 在未剔除的變量中每次選擇一個變量進入模型,如此循環
if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected + [candidate]))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected + [candidate]))

result = smf.ols(formula, df).fit() # 最小二乘法回歸模型擬合
fvalue = result.fvalue
f_pvalue = result.f_pvalue
score = eval(‘result.‘ + criterion)
scores_with_candidates.append((score, candidate, fvalue, f_pvalue)) # 記錄此次循環的變量、評分列表

if criterion == ‘ssr‘: # 這幾個指標取最小值進行優化
scores_with_candidates.sort(reverse=True) # 對評分列表進行降序排序
best_new_score, best_candidate, best_new_fvalue, best_new_f_pvalue = scores_with_candidates.pop() # 提取最小分數及其對應變量
if ((current_score - best_new_score) > p_enter[criterion]) and (best_new_f_pvalue < f_pvalue_enter): # 如果當前評分大於最新評分
remaining.remove(best_candidate) # 從剩余未評分變量中剔除最新最優分對應的變量
selected.append(best_candidate) # 將最新最優分對應的變量放入已選變量列表
current_score = best_new_score # 更新當前評分
if show_step: # 是否顯示逐步回歸過程
print(‘Adding %s, SSR = %.3f, Fstat = %.3f, FpValue = %.3e‘ %
(best_candidate, best_new_score, best_new_fvalue, best_new_f_pvalue))
elif criterion in [‘bic‘, ‘aic‘]: # 這幾個指標取最小值進行優化
scores_with_candidates.sort(reverse=True) # 對評分列表進行降序排序
best_new_score, best_candidate, best_new_fvalue, best_new_f_pvalue = scores_with_candidates.pop() # 提取最小分數及其對應變量
if (current_score - best_new_score) > p_enter[criterion]: # 如果當前評分大於最新評分
remaining.remove(best_candidate) # 從剩余未評分變量中剔除最新最優分對應的變量
selected.append(best_candidate) # 將最新最優分對應的變量放入已選變量列表
current_score = best_new_score # 更新當前評分
if show_step: # 是否顯示逐步回歸過程
print(‘Adding %s, %s = %.3f‘ % (best_candidate, criterion, best_new_score))
else:
scores_with_candidates.sort()
best_new_score, best_candidate, best_new_fvalue, best_new_f_pvalue = scores_with_candidates.pop()
if (best_new_score - current_score) > p_enter[criterion]:
remaining.remove(best_candidate)
selected.append(best_candidate)
current_score = best_new_score
if show_step: # 是否顯示逐步回歸過程
print(‘Adding %s, %s = %.3f‘ % (best_candidate, criterion, best_new_score))

if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected))

self.stepwise_model = smf.ols(formula, df).fit() # 最優模型擬合

if show_step: # 是否顯示逐步回歸過程
print(‘\nLinear regression model:‘, ‘\n ‘, self.stepwise_model.model.formula)
print(‘\n‘, self.stepwise_model.summary())

###### backward ######
if direction == ‘backward‘:
remaining, selected = set(df.columns), set(df.columns) # 自變量集合
remaining.remove(response)
selected.remove(response) # 初始化選入模型的變量列表
# 初始化當前評分,最優新評分
if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected))

result = smf.ols(formula, df).fit() # 最小二乘法回歸模型擬合
current_score = eval(‘result.‘ + criterion)
worst_new_score = eval(‘result.‘ + criterion)

if show_step:
print(‘\nstepwise starting:\n‘)
# 當變量未剔除完,並且當前評分更新時進行循環
while remaining and (current_score == worst_new_score):
scores_with_eliminations = [] # 初始化變量以及其評分列表
for elimination in remaining: # 在未剔除的變量中每次選擇一個變量進入模型,如此循環
if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected - set(elimination)))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected - set(elimination)))

result = smf.ols(formula, df).fit() # 最小二乘法回歸模型擬合
fvalue = result.fvalue
f_pvalue = result.f_pvalue
score = eval(‘result.‘ + criterion)
scores_with_eliminations.append((score, elimination, fvalue, f_pvalue)) # 記錄此次循環的變量、評分列表

if criterion == ‘ssr‘: # 這幾個指標取最小值進行優化
scores_with_eliminations.sort(reverse=False) # 對評分列表進行降序排序
worst_new_score, worst_elimination, worst_new_fvalue, worst_new_f_pvalue = scores_with_eliminations.pop() # 提取最小分數及其對應變量
if ((worst_new_score - current_score) < p_remove[criterion]) and (worst_new_f_pvalue < f_pvalue_enter): # 如果當前評分大於最新評分
remaining.remove(worst_elimination) # 從剩余未評分變量中剔除最新最優分對應的變量
selected.remove(worst_elimination) # 從已選變量列表中剔除最新最優分對應的變量
current_score = worst_new_score # 更新當前評分
if show_step: # 是否顯示逐步回歸過程
print(‘Removing %s, SSR = %.3f, Fstat = %.3f, FpValue = %.3e‘ %
(worst_elimination, worst_new_score, worst_new_fvalue, worst_new_f_pvalue))
elif criterion in [‘bic‘, ‘aic‘]: # 這幾個指標取最小值進行優化
scores_with_eliminations.sort(reverse=False) # 對評分列表進行降序排序
worst_new_score, worst_elimination, worst_new_fvalue, worst_new_f_pvalue = scores_with_eliminations.pop() # 提取最小分數及其對應變量
if (worst_new_score - current_score) < p_remove[criterion]: # 如果評分變動不顯著
remaining.remove(worst_elimination) # 從剩余未評分變量中剔除最新最優分對應的變量
selected.remove(worst_elimination) # 從已選變量列表中剔除最新最優分對應的變量
current_score = worst_new_score # 更新當前評分
if show_step: # 是否顯示逐步回歸過程
print(‘Removing %s, %s = %.3f‘ % (worst_elimination, criterion, worst_new_score))
else:
scores_with_eliminations.sort(reverse=True)
worst_new_score, worst_elimination, worst_new_fvalue, worst_new_f_pvalue = scores_with_eliminations.pop()
if (current_score - worst_new_score) < p_remove[criterion]:
remaining.remove(worst_elimination)
selected.remove(worst_elimination)
current_score = worst_new_score
if show_step: # 是否顯示逐步回歸過程
print(‘Removing %s, %s = %.3f‘ % (worst_elimination, criterion, worst_new_score))

if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected))

self.stepwise_model = smf.ols(formula, df).fit() # 最優模型擬合

if show_step: # 是否顯示逐步回歸過程
print(‘\nLinear regression model:‘, ‘\n ‘, self.stepwise_model.model.formula)
print(‘\n‘, self.stepwise_model.summary())

###### both ######
if direction == ‘both‘:
remaining = list(df.columns) # 自變量集合
remaining.remove(response)
selected = [] # 初始化選入模型的變量列表
# 初始化當前評分,最優新評分
if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, remaining[0])
else:
formula = "{} ~ {} - 1".format(response, remaining[0])

result = smf.ols(formula, df).fit() # 最小二乘法回歸模型擬合
current_score = eval(‘result.‘ + criterion)
best_new_score = eval(‘result.‘ + criterion)

if show_step:
print(‘\nstepwise starting:\n‘)
# 當變量未剔除完,並且當前評分更新時進行循環
iter_times = 0
while remaining and (current_score == best_new_score):
scores_with_candidates = [] # 初始化變量以及其評分列表
for candidate in remaining: # 在未剔除的變量中每次選擇一個變量進入模型,如此循環
if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected + [candidate]))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected + [candidate]))

result = smf.ols(formula, df).fit() # 最小二乘法回歸模型擬合
fvalue = result.fvalue
f_pvalue = result.f_pvalue
score = eval(‘result.‘ + criterion)
scores_with_candidates.append((score, candidate, fvalue, f_pvalue)) # 記錄此次循環的變量、評分列表

if criterion == ‘ssr‘: # 這幾個指標取最小值進行優化
scores_with_candidates.sort(reverse=True) # 對評分列表進行降序排序
best_new_score, best_candidate, best_new_fvalue, best_new_f_pvalue = scores_with_candidates.pop() # 提取最小分數及其對應變量
if ((current_score - best_new_score) > p_enter[criterion]) and (best_new_f_pvalue < f_pvalue_enter): # 如果當前評分大於最新評分
remaining.remove(best_candidate) # 從剩余未評分變量中剔除最新最優分對應的變量
selected.append(best_candidate) # 將最新最優分對應的變量放入已選變量列表
current_score = best_new_score # 更新當前評分
if show_step: # 是否顯示逐步回歸過程
print(‘Adding %s, SSR = %.3f, Fstat = %.3f, FpValue = %.3e‘ %
(best_candidate, best_new_score, best_new_fvalue, best_new_f_pvalue))
elif criterion in [‘bic‘, ‘aic‘]: # 這幾個指標取最小值進行優化
scores_with_candidates.sort(reverse=True) # 對評分列表進行降序排序
best_new_score, best_candidate, best_new_fvalue, best_new_f_pvalue = scores_with_candidates.pop() # 提取最小分數及其對應變量
if (current_score - best_new_score) > p_enter[criterion]: # 如果當前評分大於最新評分
remaining.remove(best_candidate) # 從剩余未評分變量中剔除最新最優分對應的變量
selected.append(best_candidate) # 將最新最優分對應的變量放入已選變量列表
current_score = best_new_score # 更新當前評分
if show_step: # 是否顯示逐步回歸過程
print(‘Adding %s, %s = %.3f‘ % (best_candidate, criterion, best_new_score))
else:
scores_with_candidates.sort()
best_new_score, best_candidate, best_new_fvalue, best_new_f_pvalue = scores_with_candidates.pop()
if (best_new_score - current_score) > p_enter[criterion]:
remaining.remove(best_candidate)
selected.append(best_candidate)
current_score = best_new_score
if show_step: # 是否顯示逐步回歸過程
print(‘Adding %s, %s = %.3f‘ % (best_candidate, criterion, best_new_score))

if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected))

result = smf.ols(formula, df).fit() # 最優模型擬合
if iter_times >= 1: # 當第二次循環時判斷變量的pvalue是否達標
if result.pvalues.max() > p_value_enter:
var_removed = result.pvalues[result.pvalues == result.pvalues.max()].index[0]
p_value_removed = result.pvalues[result.pvalues == result.pvalues.max()].values[0]
selected.remove(result.pvalues[result.pvalues == result.pvalues.max()].index[0])
if show_step: # 是否顯示逐步回歸過程
print(‘Removing %s, Pvalue = %.3f‘ % (var_removed, p_value_removed))
iter_times += 1

if intercept: # 是否有截距
formula = "{} ~ {} + 1".format(response, ‘ + ‘.join(selected))
else:
formula = "{} ~ {} - 1".format(response, ‘ + ‘.join(selected))

self.stepwise_model = smf.ols(formula, df).fit() # 最優模型擬合

if show_step: # 是否顯示逐步回歸過程
print(‘\nLinear regression model:‘, ‘\n ‘, self.stepwise_model.model.formula)
print(‘\n‘, self.stepwise_model.summary())
# 最終模型選擇的變量
if intercept:
self.stepwise_feat_selected = list(self.stepwise_model.params.index[1:])
else:
self.stepwise_feat_selected = list(self.stepwise_model.params.index)
return self

回歸分析特征選擇(包括Stepwise算法) python 實現