TimeSeries Analysis in Finance(1) – statistical anslysis–ARIMA
Outline for time series learn:
Time Series analysis method in Finance
Statistical analysis
Machine learning
nerual networking
Step and prerequisite knowledge for ARIMA
Stationary test for time Series data: data plot,acf/pacf,DF test.
make the difference to make sure d parameter in ARIMA.
select p and q: AIC/BIC, acf/Pacf.
Evaluate the model.
Prerequisite knowledge:AR, MA, acf/pacf, ARMA, stationary test, white noise test, AIC/BIC.
python
Packet: statsmodels
Code: Bitcoin price prediction–Example
```python
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels as sm
bitp=pd.read_csv('BTC_2015.csv',index_col=0)
bitp.index = pd.to_datetime(bitp.index,format='%Y-%m-%d')
bit=bitp[['Close']]
#bit=bitp['2021-01':'2022-01'][['Close']]
bit['diff_1']=bit.diff(1)
bit['diff_2']=bit['diff_1'].diff(1)
bit_train=bit['2021-01':'2021-12']
bit_test=bit['2022-01']
# according to the fig, the price is unstationary but the diff_1 is stationary.
fig=plt.figure(figsize=[12,8])
ax1=fig.add_subplot(3,1,1)
bit_train['Close'].plot(ax=ax1)
ax2=fig.add_subplot(3,1,2)
bit_train['diff_1'].plot(ax=ax2)
# ACF
# the d is 1 for ARIMA model.
diff_1=bit_train['diff_1'][1:]
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
fig=plt.figure(figsize=[12,8])
ax1=fig.add_subplot(2,1,1)
plot_acf(diff_1, lags=100, ax=ax1)
ax2=fig.add_subplot(2,1,2)
plot_pacf(diff_1,lags=100,ax=ax2)
# the time series data analysis conmpose
from statsmodels.tsa.seasonal import seasonal_decompose
def decompose(timeseries):
# 返回包含三个部分 trend(趋势部分) , seasonal(季节性部分) 和residual (残留部分)
decomposition = seasonal_decompose(timeseries)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(timeseries, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
return trend , seasonal, residual
decompose(bit_train['Close'])
# stationary test
from statsmodels.tsa.stattools import adfuller
def DFTest(sales,regression, maxlag,autolag='AIC'):
print("ADF-Test Result:")
dftest=adfuller(sales,regression=regression,maxlag=maxlag, autolag=autolag)
dfoutput=pd.Series(dftest[0:4],
index=['Test Statistic','P-value',
'Lags Used','nobs'])
for key,value in dftest[4].items():
dfoutput['Critical Value at %s'%key]=value
print(dfoutput)
# after diff_1, the series become stationary.
DFTest(diff_1,regression='n', maxlag=6,autolag='AIC')
DFTest(bit_train['Close'],regression='n', maxlag=6,autolag='AIC')
# calculate the AIC and BIC.
results_aic = sm.tsa.arma_order_select_ic(bit_train['Close'], ic=['aic'], max_ar=4, max_ma=4)
results_aic.aic_min_order
results_bic = sm.tsa.arma_order_select_ic(bit_train['Close'], ic=['bic'], max_ar=4, max_ma=4)
results_bic.bic_min_order
# according to the ACF and PACF, the p is 1 and the q is 1.
from statsmodels.tsa.arima.model import ARIMA
def arima_bit_train(ts,d,p,q):
model = ARIMA(ts, order=(p,d,q))
res=model.fit()
res.summary()
fig=plt.figure(figsize=[12,8])
predicted=res.predict('2022-01-01','2022-01-16',dynamic=True)
ax=fig.add_subplot(1,1,1)
ax.plot(bit_test['Close'],color='blue',label='btcp')
ax.plot(predicted,color='red',label='Predicted Sales')
return res.aic
arima_bit_train(bit_train['Close'],2,1,2)