1. 程式人生 > >python進階—numpy案例(一)

python進階—numpy案例(一)

首先總結一下numpy如何讀取txt,excel,csv等格式的文件。

import numpy as np

arr2 = np.arange(12).reshape(3,4)

# 儲存資料到txt文件(fmt:資料格式,delimiter:分隔符)
np.savetxt(r"C:\Users\67505\Desktop\numpyData.txt", arr2, fmt="%d", delimiter=" ", newline="\r\n")
# 從txt文件中讀取資料(dtype:資料格式)
values1 = np.loadtxt(r"C:\Users\67505\Desktop\numpyData.txt", delimiter=" ", 
         unpack=False,dtype=int)  # unpack=True表示分別儲存不同元素
# 從csv文件中讀取資料(usecols表示我們需要獲取的列)
values2 = np.loadtxt(r"C:\Users\67505\Desktop\numpyData.csv",delimiter=",",usecols=(6,7),unpack=True)

案例:股票價格

股票價格資料:股票程式碼、時間、開盤價、最高價、最低價、收盤價、交易數

# 股票價格
c, v = np.loadtxt(r"C:\Users\67505\Desktop\表1.csv", delimiter=",", usecols=(6, 7), unpack=True)
# 計算成交量加權平均價格
vwap = np.average(c, weights=v)
# 時間加權平均價格
t = np.arange(len(c))
twap = np.average(c, weights=t)
# 計算平均價格
mean = np.mean(c)
# 計算收盤價的最大值和最小值
c_max = np.max(c)
c_min = np.min(c)
# 計算收盤價的極差
c_ptp = np.ptp(c)
# 計算收盤價的中位數
c_median = np.median(c)
# 計算方差
c_var = np.var(c)
# 計算股票收益率
returns = np.diff(c) / c[:-1]
# 計算對數收益率
logReturn = np.diff(np.log(c))
# 計算哪些交易日收益率為正數
posretindices = np.where(returns > 0)

# 日期轉換
def date_str_to_week(s):
    import datetime
    return datetime.datetime.strptime(s.decode("utf-8"), "%d-%m-%Y").date().weekday()

# 日期分析(converters:資料列和轉換函式之間進行對映的字典)
dates, close = np.loadtxt(r"C:\Users\67505\Desktop\表1.csv", delimiter=",", 
               converters={1: date_str_to_week},usecols=(1, 6), unpack=True)
# 建立陣列儲存工作日的平均收盤價
weekday_average = np.zeros(5)
for i in range(5):
    weekday_index = np.where(dates == i)  # 獲取每個指定工作日的索引
    prices = np.take(close, weekday_index)
    mean_close = np.mean(prices)
    weekday_average[i] = mean_close
# 檢視哪個工作日的平均收盤價最高或者最低
top_weekday = np.argmax(weekday_average)  # 獲取最高平均收盤價對應的索引值
low_weekday = np.argmin(weekday_average)  # 獲取最低平均收盤價對應的索引值

    按照周彙總計算每週開盤價(monday_open)、收盤價(friday_close)、最高價(week_high)、最低價(week_low)

dates, start, high, low, close = np.loadtxt(r"C:\Users\67505\Desktop\表1.csv", 
                 delimiter=",",converters={1: date_str_to_week},usecols=(1, 3, 4, 5, 6), unpack=True)
# 周彙總資料
dates = dates[:16]
close = close[:16]
# 找到第一個星期一
first_monday = np.ravel(np.where(dates == 0))[0]
# 找到最後一個星期五
last_friday = np.ravel(np.where(dates == 4))[-1]
# 建立陣列儲存三週內每一天的索引值
weeks_indices = np.arange(first_monday, last_friday + 1)
# 陣列切分,每個子陣列5個元素
weeks_indices = np.split(weeks_indices, 3)


def summarize(a, s, h, l, c):
    """
    獲取一週之內的開盤價,最高價,最低價,收盤價
    :param a: 日期索引
    :param s: 開盤子陣列
    :param h: 最高價陣列
    :param l: 最低價陣列
    :param c: 收盤價陣列
    :return:
        monday_start:  一週開盤價
        week_high:     一週最高價
        week_low:      一週最低價
        week_close:    一週收盤價
    """
    monday_start = s[a[0]]
    week_high = np.max(np.take(h, a))
    week_low = np.min(np.take(l, a))
    week_close = c[a[-1]]
    return monday_start, week_high, week_low, week_close


week_summary = np.apply_along_axis(summarize, 1, weeks_indices, start, high, low, close)

計算真實波動幅度均值(ATR)

# 設定交易時間段
N = 20
# 時間段最高價
period_high = high[-N:]
# 時間段最低價
period_low = low[-N:]
# 前一個交易日的收盤價
previous_close = close[-N - 1:-1]
# 計算當日股價範圍(period_high-period_low),
# #當日最高價和前一個交易日收盤價之差(period_high-previous_close)
# #前一個交易日收盤價和當日最低價之差(previous_close - period_low)
true_range = np.maximum(period_high - period_low, period_high - previous_close, 
             previous_close - period_low)
# 設定ATR初始值
ATR = np.zeros(N)
# 設定ATR初始值
ATR[0] = np.mean(true_range)
# 計算剩下的ATR值
for i in range(1, N):
    ATR[i] = (N * ATR[i - 1] + true_range[i]) / N

線性模型預測股價

假設:當前股價可以用之前股價的線性組合表示,也就是說當前股價等於之前的股價與各自的係數相乘再做加和的結果。

# 線性模型預測股價
# 獲取一個包含N個股票價格的陣列
stock_price_vec = close[-5:]
# 反轉陣列
stock_price_vec = stock_price_vec[::-1]
# 初始化一個5*5的二維陣列
A = np.zeros((5, 5))
# 用股票價格陣列填充二維陣列
for i in range(5):
    A[i,] = close[-5 - i - 1:-1 - i]
x, residuals, rank, s = np.linalg.lstsq(A, stock_price_vec)
# 預測下一次股價
next_stock_price = np.dot(stock_price_vec, x)

案例:計算股票的相關係數(corr)和淨額成交量(OBV)

bhp = np.loadtxt(r"C:\Users\67505\Desktop\numpy教程資料集\ch4code\ch4code\BHP.csv", delimiter=",", usecols=(6,),
                 unpack=True)
vale = np.loadtxt(r"C:\Users\67505\Desktop\numpy教程資料集\ch4code\ch4code\VALE.csv", delimiter=",", usecols=(6,),
                  unpack=True)
# 計算收益率(diff函式計算前者與後者的差值,收益率=(a1-a2)/a1)
bhp_returns = np.diff(bhp) / bhp[:-1]
vale_returns = np.diff(vale) / vale[:-1]
# 計算收益率協方差矩陣,cov(x,y)=E(x-E(x))(y-E(y))
covariance = np.cov(bhp_returns, vale_returns)
# 計算收益率協方差矩陣的跡(主對角元素之和)
return_trace = covariance.trace()
# 計算相關係數矩陣,corr(x,y)=cov(x,y)/(x.std()*y.std())
corr = np.corrcoef(bhp_returns, vale_returns)
# 繪圖
t = np.arange(len(bhp_returns))
plot(t, bhp_returns, lw=2)
plot(t, vale_returns, lw=2)
show()
# 計算OBV(淨額成交量),sign(closePrice(t)-closePrice(t-1))*volumn(t)+OBV(t-1)
close_price, volumn = np.loadtxt(r"C:\Users\67505\Desktop\numpy教程資料集\ch4code\ch4code\BHP.csv", delimiter=",",
                                 unpack=True, usecols=(6, 7))
change = np.diff(close_price)
# 設定初始化OBV值
OBV = np.zeros(len(volumn) - 1)
for i in range(len(volumn) - 1):  # change.shape=(28,),故減1
    OBV[i] = np.sign(change[i]) * volumn[i] + OBV[i - 1]
print(OBV)