当前位置: 首页 > 工具软件 > AKShare > 使用案例 >

量化学习——akshare对接的常用功能(1)

万浩淼
2023-12-01

借助akshare,调用经常用到的一些量化回测数据,功能,方便展开接下来的全部的教学指导。

常用的包:

#获取股票历史数据
import akshare as ak
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import mplfinance as mpf
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']  # 设置中文字体为微软雅
plt.rcParams['font.sans-serif'] = ['SimHei']        # 字体设置
import matplotlib
matplotlib.rcParams['axes.unicode_minus']=False    # 负号显示问题

1:判断何种交易所。

根据股票开头,判断交易所,因为有些需要判断股票代码为SH,SZ,BJ

#判断何种交易所
code = str(stock_pool['代码'].values[0])
def gp_type_szsh(gp):
    if gp.find('60',0,3)==0:
        gp_type='SH'+gp
    elif gp.find('688',0,4)==0:
        gp_type='BJ'+gp
    elif gp.find('900',0,4)==0:
        gp_type='SH'+gp
    elif gp.find('00',0,3)==0:
        gp_type='SZ'+gp
    elif gp.find('300',0,4)==0:
        gp_type='SZ'+gp
    elif gp.find('200',0,4)==0:
        gp_type='SZ'+gp
    return gp_type
code = gp_type_szsh(code)   

2.获取股票历史数据

end =datetime.now().strftime('%Y%m%d')
#code= str(stock_pool['代码'].values[0])
code = '600309'
data =  ak.stock_zh_a_hist(symbol=str(code), period="daily", start_date="19900301", end_date=end, adjust="")
data['日期'] = pd.to_datetime(data['日期'], format='%Y-%m-%d')
data.rename(columns= {'日期':'date','开盘':'open','最高':'high','最低':'low','收盘':'close','成交量':'volume','涨跌幅':'pct_chg'},inplace=True)
data['code'] = code
#data = data[['date','open','close','high','low','volume','code']]
data = data.set_index(data.date)

3获取指数历史数据:

security ="000001"
begin_date ='20000101'
end_date = datetime.now().strftime('%Y%m%d')
#df = ak.stock_zh_index_daily_em(symbol=security)
df = ak.index_zh_a_hist(symbol =security,period='daily',start_date=begin_date,end_date= end_date)
df

4.获取期货数据

#获取股指期货数据
df3 = ak.futures_main_sina(symbol="IF0")
df3.rename(columns={"日期":'trade_date',"开盘价":'open',"收盘价":'close',"最高价":'high',"最低价":'low'},inplace = True)
df3.sort_index()

5.获取指数成分股名单以及历史数据:

# 成分股名单
stock_list = ak.index_stock_cons(symbol="000300")
start = "20200217"
end =datetime.now().strftime('%Y%m%d')
def select(code,start,end):
    df3= ak.stock_zh_a_hist(symbol=code, period="daily", start_date=start, end_date=end, adjust="")
    df3['code'] = code
    df3.rename(columns= {'日期':'trade_date','收盘':'price'},inplace=True)
    df3['trade_date'] = pd.to_datetime(df3['trade_date'], format='%Y-%m-%d')
    df3.index = pd.to_datetime(df3['trade_date'])
    df3.index.name = None
    df3.sort_index(inplace=True)
    return df3
df = pd.DataFrame()
for i in stock_list['品种代码']:
    df = df.append(select(i,start,end))
df

6.构建多因子数据构成表

#构建多因子数据构成表
assets = df.set_index([df.index,df['code']], drop=True)
# column为股票代码,index为日期,值为股票收盘价
close = df.pivot_table(index='trade_date',columns='code',  values='close')
close.index = pd.to_datetime(close.index)
close.index.name = None

7.去极值

#去极值
def winsorize_series(se):
    q = se.quantile([0.025, 0.975])
    if isinstance(q, pd.Series) and len(q) == 2:
        se[se < q.iloc[0]] = q.iloc[0]
        se[se > q.iloc[1]] = q.iloc[1]
    return se

8.标准化:

方法1:

#数据标准化方法
# 1)最小-最大标准化(Min-max normalization)
#   min-max标准化又称为离差标准化,是常见的归一化处理。将原始数据转化为一个0到1的数。
import numpy as np
def standardize_series2(se:pd.Series):
    return (se - se.min()) / (se.max() - se.min())
a = pd.Series([1,3,6])
standardize_series(a)

 方法2:

#  2)Z-score标准化
# 将原始数据转化为 均值为0,标准差为1  的正态分布的随机变量。
def standardize_series(se):
    se_std = se.std()
    se_mean = se.mean()
    return (se - se_mean)/se_st

9.计算两个直线的斜率

#数据标准化处理函数
def MaxMinNormalization(self,x, min, max):
    """[0,1] normaliaztion"""
    x = (x - min) / (max - min)
    return x

type = '回归线'
#得到两个直线的斜率
st_x = array([i for i in range(0, len(st['close']))])
st_x = MaxMinNormalization(st_x, 0, len(st['close']) - 1)
# T为矩阵转置把1xn变成nx1
st_x = array([st_x]).T
index_x = array([i for i in range(0, len(index['close']))])
index_x = MaxMinNormalization(index_x, 0, len(index['close']) - 1)
# T为矩阵转置把1xn变成nx1
index_x = array([index_x]).T
st_y = array([(close - st['close'][0]) / st['close'][0] for close in st['close']])
index_y = array([(close - index['close'][0]) / index['close'][0] for close in index['close']])
min = hstack((st_y, index_y)).min()
max = hstack((st_y, index_y)).max()
st_y = self.MaxMinNormalization(st_y, min, max)
st_y = array([st_y]).T
index_y = MaxMinNormalization(index_y, min, max)
index_y = array([index_y]).T
if type == '回归线':  # 是回归线版本
        # regr为回归过程,fit(x,y)进行回归
    regr_st = LinearRegression().fit(st_x, st_y)
    regr_index = LinearRegression().fit(index_x, index_y)
    return regr_st.coef_[0][0], regr_index.coef_[0][0]
else:#是两点连线
    return (list(st_y)[-1][0]-list(st_y)[0][0])/list(st_x)[-1][0],(list(index_y)[-1][0]-list(index_y)[0][0])/list(index_x)[-1][0]

10.计算两个直线的角度


def get_angle(self,x, y):#计算两个直线的角度
    # x和y是方向向量
    Lx = sqrt(x.dot(x))
    Ly = sqrt(y.dot(y))
    # 相当于勾股定理,求得斜线的长度
    cos_angle = x.dot(y) / (Lx * Ly)
    # 求得cos_sita的值再反过来计算,绝对长度乘以cos角度为矢量长度
    angle = arccos(cos_angle) * 360 / 2 / 3.1415926
    return angle

11.计算两组数据协方差

covr = np.cov(returns_stock[1:],returns_index[1:])[0][1]

12.计算数据方差

#计算沪深300指数数据的方差
var = np.var(returns_index)

13.计算贝塔值

beta = covr/var

 类似资料: