1. 程式人生 > >Python時間序列LSTM預測系列教程(10)-多步預測

Python時間序列LSTM預測系列教程(10)-多步預測

#coding=utf-8                                                                                                          
from pandas import read_csv      
from pandas import DataFrame     
from pandas import concat
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot    
from pandas import datetime      
 
def parser(x):
    return datetime.strptime(x, '%Y/%m/%d')
 
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)#資料多了行標、列標
    cols, names = list(), list()
    for i in range(n_in, 0, -1): 
        cols.append(df.shift(i)) 
        names+=[('var%d(t-%d)'%(j+1,i)) for j in range(n_vars)]
    for i in range(0, n_out, 1): 
        cols.append(df.shift(-i))
        if i==0:
            names+=[('var%d(t)'%(j+1)) for j in range(n_vars)]
        else:
            names+=[('var%d(t+%d)'%(j+1, i)) for j in range(n_vars)]
    agg = concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg
	
#拆分正訓練+測試資料                                                                                                   
def prepare_data(series, n_test, n_lay, n_seq):
    raw_values = series.values
    raw_values = raw_values.reshape(len(raw_values), 1)
    
    supervised = series_to_supervised(raw_values, n_lay, n_seq)
    supervised_values = supervised.values
 
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return train, test
 
#persistence model預測
#用上一次觀察值作為之後n_seq的預測值
def persistence(last_ob, n_seq):
    return [last_ob for i in range(n_seq)]
 
#評估persistence model
def make_forcast(train, test, n_lay, n_seq):
    forcasts = list()
    for i in range(len(test)):
        x, y = test[i, 0:n_lag], test[i, n_lag:]
        forcast = persistence(x[-1], n_seq)
        forcasts.append(forcast)
    return forcasts
 
#預測評估
def evaluate_forcasts(test, forcasts, n_lag, n_seq):
	for i in range(n_seq): 
		actual = test[:, (n_lag+i)]
        predicted = [forcast[i] for forcast in forcasts]
        print 'predicted'
        print predicted
        rmse = sqrt(mean_squared_error(actual, predicted))
        print 't+%d RMSE:%f'%((i+1), rmse)#1~n_seq各個長度的預測的rmse
 
def plot_forcasts(series, forcasts, n_test):
    #原始資料
    pyplot.plot(series.values)
    #預測資料
    for i in range(len(forcasts)):
        off_s = len(series)-n_test+i-1
        off_e = off_s + len(forcasts[i])+1
        xaxis = [x for x in range(off_s, off_e)]
        yaxis = [series.values[off_s]]+forcasts[i]
        print 'xaxis'
        print xaxis
        print 'yaxis'
        print yaxis
        print 'series.values[off_s]'
        print series.values[off_s]
        pyplot.plot(xaxis, yaxis, color='red')
    pyplot.show()
 
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parse
r)
 
#一步資料,預測3步
n_lag = 1
n_seq = 3     
n_test = 10#給了最後12個月,預測3個月,則能預測的次數是10,即10個3個月
train, test = prepare_data(series, n_test, n_lag, n_seq)
print 'train data'
print train
print 'test data'
print test
forecasts = make_forcast(train, test, n_lag, n_seq)
print 'forecasts'
print forecasts
evaluate_forcasts(test, forecasts, n_lag, n_seq)
plot_forcasts(series, forecasts, n_test+2)