Blaze the Way of Quantitative Trading
阿新 • • 發佈:2019-02-02
http://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing
trend following/momentum和mean-reversion是兩種最基本的設計策略的思路,對於後者而言,首先我們要知道time-series是不是滿足mean-reversion的。從數學上說,連續的均值回覆過程就是隨機過程中的OU過程(Ornstein-Uhlenbeck),不同於布朗運動。我們可以利用pandas和statsmodel進行ADF檢驗來判斷均值回覆性(均值回覆性是平穩性的必要條件),以判斷序列是不是具有均值回覆性。如果我們得到的統計量大於臨界值,那麼不能拒絕原假設,即序列是非均值回覆的,而是隨機遊走。如果是從網路資料庫抓取資料,上面的過程可以寫為
# Import the Time Series libraryimport statsmodels.tsa.stattools as ts # Import Datetime and the Pandas DataReaderfrom datetime import datetime from pandas.io.data importDataReader# Download the Google OHLCV data from 1/1/2000 to 1/1/2013 goog =DataReader("GOOG","yahoo", datetime(2000,1,1), datetime(2013,1,1))
首先對時間序列進行單位根檢驗
ts.adfuller(,1)
我們還可以用HURST INDEX對平穩性進行檢驗
H<0.5 - The time series is mean reverting
H=0.5 - The time series is a Geometric Brownian Motion
H>0.5 - The time series is trending
from numpy import cumsum, log, polyfit, sqrt, std, subtract from numpy.random import randn def hurst下面討論協整的檢驗,Cointegrated Augmented Dickey-Fuller Test,至於協整這一概念的闡述本質兩個序列的線性組合為平穩的,(ts):"""Returns the Hurst Exponent of the time series vector ts"""# Create the range of lag values lags = range(2,100)# Calculate the array of the variances of the lagged differences tau =[sqrt(std(subtract(ts[lag:], ts[:-lag])))for lag in lags]# Use a linear fit to estimate the Hurst Exponent poly = polyfit(log(lags), log(tau),1)# Return the Hurst exponent from the polyfit outputreturn poly[0]*2.0# Create a Gometric Brownian Motion, Mean-Reverting and Trending Series gbm = log(cumsum(randn(100000))+1000) mr = log(randn(100000)+1000) tr = log(cumsum(randn(100000)+1)+1000)# Output the Hurst Exponent for each of the above series# and the price of Google (the Adjusted Close price) for # the ADF test given above in the articleprint"Hurst(GBM): %s"% hurst(gbm)print"Hurst(MR): %s"% hurst(mr)print"Hurst(TR): %s"% hurst(tr)# Assuming you have run the above code to obtain 'goog'!print"Hurst(GOOG): %s"% hurst(goog['Adj Close'])
#做統計套利時我們常常要作散點圖以及進行協整檢驗
# cadf.py
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import pandas.io.data as web
import pprint
import statsmodels.tsa.stattools as ts
from pandas.stats.api import ols
def plot_price_series(df, ts1, ts2):
months = mdates.MonthLocator() #可以是DayLocator HourLocator
fig, ax = plt.subplots()
ax.plot(df.index, df[ts1], label=ts1)
ax.plot(df.index, df[ts2], label=ts2)
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax.set_xlim(datetime.datetime(2012, 1, 1), datetime.datetime(2013, 1, 1))
ax.grid(True)
fig.autofmt_xdate()
plt.xlabel('Month/Year')
plt.ylabel('Price ($)')
plt.title('%s and %s Daily Prices' % (ts1, ts2))
plt.legend()
plt.show()
import datetime
#import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#import pandas as pd
#import pandas.io.data as web
#import pprint
import statsmodels.tsa.stattools as ts
from pandas.stats.api import ols
#我們的main函式裡面的DataFrame就是d,而程式處理後的df就是mergeDF和Pnl等結果
def plot_price_series(df, ts1, ts2):
hours = mdates.HourLocator() #可以是DayLocator HourLocator
fig, ax = plt.subplots()
ax.plot(df.index, df[ts1], label=ts1) #這裡要把index重設一下,因為原本的index是pTime和sym_
ax.plot(df.index, df[ts2], label=ts2)
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1)) #開始和結束日期要和樣本統一
ax.grid(True)
fig.autofmt_xdate()
plt.xlabel('Hour/Year')
plt.ylabel('TrdPriceLast ($)')
plt.title('%s and %s Prices' % (ts1, ts2))
plt.legend()
plt.show()
def plot_scatter_series(df, ts1, ts2):
plt.xlabel('%s TrdPriceLast ($)' % ts1)
plt.ylabel('%s TrdPriceLast ($)' % ts2)
plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
plt.scatter(df[ts1], df[ts2])
plt.show()
def plot_residuals(df):
hours = mdates.HourLocator()
fig, ax = plt.subplots()
ax.plot(df.index, df["res"], label="Residuals")
ax.xaxis.set_major_locator(Hours)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1))
ax.grid(True)
fig.autofmt_xdate()
plt.xlabel('Hour/Year')
plt.ylabel('TrdPriceLast ($)')
plt.title('Residual Plot')
plt.legend()
plt.plot(df["res"])
plt.show()
if __name__ == "__main__":
start = datetime.datetime(2014, 1, 2)
end = datetime.datetime(2014, 1, 1)
arex = web.DataReader("AREX", "yahoo", start, end)
wll = web.DataReader("WLL", "yahoo", start, end)
df = pd.DataFrame(index=arex.index)
d = d.reset_index()
d = d.set_index(["pTime"])
df["D"] = d[d["sym"]=="D"][TrdPriceLast]
df["E"] = d[d["sym"]=="E"][TrdPriceLast]
# Plot the two time series
plot_price_series(df, "D", "E")
# Display a scatter plot of the two time series
plot_scatter_series(df, "D", "E")
# Calculate optimal hedge ratio "beta"
res = ols(y=df['D'], x=df["E"])
beta_hr = res.beta.x
# Calculate the residuals of the linear combination
df["res"] = df["D"] - beta_hr*df["E"]
# Plot the residuals
plot_residuals(df)
# Calculate and output the CADF test on the residuals
cadf = ts.adfuller(df["res"])
pprint.pprint(cadf)
http://www.quantstart.com/articles/Forecasting-Financial-Time-Series-Part-1
http://matplotlib.org/api/dates_api.html
MinuteLocator
HourLocator
http://blog.sina.com.cn/s/blog_02cf67f00101iuuh.html