Python時間序列LSTM預測系列教程(10)-多步預測
阿新 • • 發佈:2019-01-05
#coding=utf-8 from pandas import read_csv from pandas import DataFrame from pandas import concat from sklearn.metrics import mean_squared_error from math import sqrt from matplotlib import pyplot from pandas import datetime def parser(x): return datetime.strptime(x, '%Y/%m/%d') def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): n_vars = 1 if type(data) is list else data.shape[1] df = DataFrame(data)#資料多了行標、列標 cols, names = list(), list() for i in range(n_in, 0, -1): cols.append(df.shift(i)) names+=[('var%d(t-%d)'%(j+1,i)) for j in range(n_vars)] for i in range(0, n_out, 1): cols.append(df.shift(-i)) if i==0: names+=[('var%d(t)'%(j+1)) for j in range(n_vars)] else: names+=[('var%d(t+%d)'%(j+1, i)) for j in range(n_vars)] agg = concat(cols, axis=1) agg.columns = names if dropnan: agg.dropna(inplace=True) return agg #拆分正訓練+測試資料 def prepare_data(series, n_test, n_lay, n_seq): raw_values = series.values raw_values = raw_values.reshape(len(raw_values), 1) supervised = series_to_supervised(raw_values, n_lay, n_seq) supervised_values = supervised.values train, test = supervised_values[0:-n_test], supervised_values[-n_test:] return train, test #persistence model預測 #用上一次觀察值作為之後n_seq的預測值 def persistence(last_ob, n_seq): return [last_ob for i in range(n_seq)] #評估persistence model def make_forcast(train, test, n_lay, n_seq): forcasts = list() for i in range(len(test)): x, y = test[i, 0:n_lag], test[i, n_lag:] forcast = persistence(x[-1], n_seq) forcasts.append(forcast) return forcasts #預測評估 def evaluate_forcasts(test, forcasts, n_lag, n_seq): for i in range(n_seq): actual = test[:, (n_lag+i)] predicted = [forcast[i] for forcast in forcasts] print 'predicted' print predicted rmse = sqrt(mean_squared_error(actual, predicted)) print 't+%d RMSE:%f'%((i+1), rmse)#1~n_seq各個長度的預測的rmse def plot_forcasts(series, forcasts, n_test): #原始資料 pyplot.plot(series.values) #預測資料 for i in range(len(forcasts)): off_s = len(series)-n_test+i-1 off_e = off_s + len(forcasts[i])+1 xaxis = [x for x in range(off_s, off_e)] yaxis = [series.values[off_s]]+forcasts[i] print 'xaxis' print xaxis print 'yaxis' print yaxis print 'series.values[off_s]' print series.values[off_s] pyplot.plot(xaxis, yaxis, color='red') pyplot.show() series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parse r) #一步資料,預測3步 n_lag = 1 n_seq = 3 n_test = 10#給了最後12個月,預測3個月,則能預測的次數是10,即10個3個月 train, test = prepare_data(series, n_test, n_lag, n_seq) print 'train data' print train print 'test data' print test forecasts = make_forcast(train, test, n_lag, n_seq) print 'forecasts' print forecasts evaluate_forcasts(test, forecasts, n_lag, n_seq) plot_forcasts(series, forecasts, n_test+2)