1. 程式人生 > >Blaze the Way of Quantitative Trading

Blaze the Way of Quantitative Trading

http://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing

trend following/momentum和mean-reversion是兩種最基本的設計策略的思路,對於後者而言,首先我們要知道time-series是不是滿足mean-reversion的。從數學上說,連續的均值回覆過程就是隨機過程中的OU過程(Ornstein-Uhlenbeck),不同於布朗運動。我們可以利用pandas和statsmodel進行ADF檢驗來判斷均值回覆性(均值回覆性是平穩性的必要條件),以判斷序列是不是具有均值回覆性。如果我們得到的統計量大於臨界值,那麼不能拒絕原假設,即序列是非均值回覆的,而是隨機遊走。如果是從網路資料庫抓取資料,上面的過程可以寫為

# Import the Time Series libraryimport statsmodels.tsa.stattools as ts

# Import Datetime and the Pandas DataReaderfrom datetime import datetime
from pandas.io.data importDataReader# Download the Google OHLCV data from 1/1/2000 to 1/1/2013
goog =DataReader("GOOG","yahoo", datetime(2000,1,1), datetime(2013,1,1
))

首先對時間序列進行單位根檢驗

ts.adfuller(,1)

我們還可以用HURST INDEX對平穩性進行檢驗

H<0.5 - The time series is mean reverting
H=0.5 - The time series is a Geometric Brownian Motion
H>0.5 - The time series is trending

from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn

def hurst
(ts):"""Returns the Hurst Exponent of the time series vector ts"""# Create the range of lag values lags = range(2,100)# Calculate the array of the variances of the lagged differences tau =[sqrt(std(subtract(ts[lag:], ts[:-lag])))for lag in lags]# Use a linear fit to estimate the Hurst Exponent poly = polyfit(log(lags), log(tau),1)# Return the Hurst exponent from the polyfit outputreturn poly[0]*2.0# Create a Gometric Brownian Motion, Mean-Reverting and Trending Series gbm = log(cumsum(randn(100000))+1000) mr = log(randn(100000)+1000) tr = log(cumsum(randn(100000)+1)+1000)# Output the Hurst Exponent for each of the above series# and the price of Google (the Adjusted Close price) for # the ADF test given above in the articleprint"Hurst(GBM): %s"% hurst(gbm)print"Hurst(MR): %s"% hurst(mr)print"Hurst(TR): %s"% hurst(tr)# Assuming you have run the above code to obtain 'goog'!print"Hurst(GOOG): %s"% hurst(goog['Adj Close'])
下面討論協整的檢驗,Cointegrated Augmented Dickey-Fuller Test,至於協整這一概念的闡述本質兩個序列的線性組合為平穩的,

#做統計套利時我們常常要作散點圖以及進行協整檢驗

# cadf.py


import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import pandas.io.data as web
import pprint
import statsmodels.tsa.stattools as ts


from pandas.stats.api import ols




def plot_price_series(df, ts1, ts2):
    months = mdates.MonthLocator()  #可以是DayLocator HourLocator
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label=ts1)
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2012, 1, 1), datetime.datetime(2013, 1, 1))
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Month/Year')
    plt.ylabel('Price ($)')
    plt.title('%s and %s Daily Prices' % (ts1, ts2))
    plt.legend()
    plt.show()


import datetime
#import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#import pandas as pd
#import pandas.io.data as web
#import pprint
import statsmodels.tsa.stattools as ts


from pandas.stats.api import ols


#我們的main函式裡面的DataFrame就是d,而程式處理後的df就是mergeDF和Pnl等結果
def plot_price_series(df, ts1, ts2):
    hours = mdates.HourLocator()  #可以是DayLocator HourLocator
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label=ts1) #這裡要把index重設一下,因為原本的index是pTime和sym_
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(hours)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1)) #開始和結束日期要和樣本統一
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Hour/Year')
    plt.ylabel('TrdPriceLast ($)')
    plt.title('%s and %s Prices' % (ts1, ts2))
    plt.legend()
    plt.show()


def plot_scatter_series(df, ts1, ts2):
    plt.xlabel('%s TrdPriceLast ($)' % ts1)
    plt.ylabel('%s TrdPriceLast ($)' % ts2)
    plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
    plt.scatter(df[ts1], df[ts2])
    plt.show()


def plot_residuals(df):
    hours = mdates.HourLocator() 
    fig, ax = plt.subplots()
    ax.plot(df.index, df["res"], label="Residuals")
    ax.xaxis.set_major_locator(Hours)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1))
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Hour/Year')
    plt.ylabel('TrdPriceLast ($)')
    plt.title('Residual Plot')
    plt.legend()


    plt.plot(df["res"])
    plt.show()




if __name__ == "__main__":
    start = datetime.datetime(2014, 1, 2)
    end = datetime.datetime(2014, 1, 1)


    arex = web.DataReader("AREX", "yahoo", start, end)
    wll = web.DataReader("WLL", "yahoo", start, end)
   
    
    df = pd.DataFrame(index=arex.index)
    d = d.reset_index()
    d = d.set_index(["pTime"])
    df["D"] = d[d["sym"]=="D"][TrdPriceLast]
    df["E"] = d[d["sym"]=="E"][TrdPriceLast]
    
    # Plot the two time series
    plot_price_series(df, "D", "E")


    # Display a scatter plot of the two time series
    plot_scatter_series(df, "D", "E")


    # Calculate optimal hedge ratio "beta"
    res = ols(y=df['D'], x=df["E"])
    beta_hr = res.beta.x


    # Calculate the residuals of the linear combination
    df["res"] = df["D"] - beta_hr*df["E"]


    # Plot the residuals
    plot_residuals(df)


    # Calculate and output the CADF test on the residuals
    cadf = ts.adfuller(df["res"])
    pprint.pprint(cadf)





http://www.quantstart.com/articles/Forecasting-Financial-Time-Series-Part-1




http://matplotlib.org/api/dates_api.html
MinuteLocator
HourLocator






http://blog.sina.com.cn/s/blog_02cf67f00101iuuh.html