import pandas as pd
import akshare as ak
import numpy as np
import talib
from datetime import datetime, timedelta
import time
获取股票数据
from_date = '2010-01-01'
from_date = datetime.strptime(from_date,"%Y-%m-%d")
day_nums = 1
current_dt = time.strftime("%Y-%m-%d", time.localtime())
current_dt = datetime.strptime(current_dt, '%Y-%m-%d')
previous_date = current_dt - timedelta(days=day_nums)
data = ak.stock_zh_a_daily(symbol='sz000001',start_date = from_date,end_date = previous_date)
df = data
df.index=pd.to_datetime(df.date)
cols=['Open', 'High', 'Low', 'Close']
dataset=df[['open','high','low','close']]
dataset=dataset.rename(columns=dict(zip(dataset.columns,cols)))
dataset.head()
01 数据预处理
dataset['H-L'] = dataset['High'] - dataset['Low']
dataset['O-C'] = dataset['Close'] - dataset['Open']
dataset['3day MA'] = dataset['Close'].shift(1).rolling(window = 3).mean()
dataset['10day MA'] = dataset['Close'].shift(1).rolling(window = 10).mean()
dataset['30day MA'] = dataset['Close'].shift(1).rolling(window = 30).mean()
dataset['Std_dev']= dataset['Close'].rolling(5).std()
dataset['RSI'] = talib.RSI(dataset['Close'].values, timeperiod = 9)
dataset['Williams %R'] = talib.WILLR(dataset['High'].values, dataset['Low'].values, dataset['Close'].values, 7)
dataset['Price_Rise'] = np.where(dataset['Close'].shift(-1) > dataset['Close'], 1, 0)
#删除缺失值
dataset = dataset.dropna()
#查看最后五列数据
dataset.tail()
X = dataset.iloc[:, 4:-1]
y = dataset.iloc[:, -1]
split = int(len(dataset)*0.8)
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
02 构建神经网络
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
classifier = Sequential()
classifier.add(Dense(units = 128, kernel_initializer = 'uniform', activation = 'relu', input_dim = X.shape[1]))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
03 预测股票走势
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
dataset['y_pred'] = np.NaN
dataset.iloc[(len(dataset) - len(y_pred)):,-1:] = y_pred
trade_dataset = dataset.dropna()
04 计算策略收益
import warnings
warnings.filterwarnings('ignore')
trade_dataset['Tomorrows Returns'] = 0.
trade_dataset['Tomorrows Returns'] = np.log(trade_dataset['Close']/trade_dataset['Close'].shift(1))
trade_dataset['Tomorrows Returns'] = trade_dataset['Tomorrows Returns'].shift(-1)
trade_dataset['Strategy Returns'] = 0.
trade_dataset['Strategy Returns'] = np.where(trade_dataset['y_pred'] == True,
trade_dataset['Tomorrows Returns'], - trade_dataset['Tomorrows Returns'])
trade_dataset['Strategy Returns'] = 0.
trade_dataset['Strategy Returns'] = np.where(trade_dataset['y_pred'] == True,
trade_dataset['Tomorrows Returns'], - trade_dataset['Tomorrows Returns'])
trade_dataset['Cumulative Market Returns'] = np.cumsum(trade_dataset['Tomorrows Returns'])
trade_dataset['Cumulative Strategy Returns'] = np.cumsum(trade_dataset['Strategy Returns'])
05 收益可视化
import matplotlib.pyplot as plt
trade_dataset[['Cumulative Market Returns','Cumulative Strategy Returns']].plot(figsize=(14,8));