|
from datetime import datetime, timedelta |
|
import pandas as pd |
|
import numpy as np |
|
|
|
import model_utils as mu |
|
from statsmodels.tsa.arima.model import ARIMA |
|
|
|
def model_run(df_all): |
|
""" Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price. |
|
Useful for forecasting a variable using ARIMA model. |
|
Use historical 'prices' and get prediction. |
|
Give prediction output to the client. |
|
""" |
|
first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1)) |
|
|
|
reframed_lags, df_final=mu.data_transform(df_all, first_day_future) |
|
|
|
print(f'I have transformed the dataset into the frame for supervised learning') |
|
df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']] |
|
date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day'])) |
|
df_with_date=pd.concat([date,df],axis=1) |
|
df_with_date.columns=np.append('date',df.columns) |
|
df_with_date.set_index('date',inplace=True) |
|
df_with_date=df_with_date.dropna() |
|
df_past=df_with_date.iloc[:-1,:] |
|
df_future=df_with_date.iloc[-1:,:] |
|
model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2)) |
|
model_fit = model.fit() |
|
|
|
|
|
predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices')) |
|
|
|
|
|
df_with_forecast=reframed_lags.copy() |
|
df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0] |
|
|
|
|
|
|
|
|
|
|
|
if len(reframed_lags)>500: |
|
train_size=0.9 |
|
elif len(reframed_lags)>200: |
|
train_size=0.8 |
|
else: |
|
train_size=0.7 |
|
predictions=[] |
|
test_labels_all=[] |
|
test_labels_all1=[] |
|
train_labels_all=[] |
|
data_arima=df_with_date |
|
window_length=int((len(data_arima)-len(data_arima)*train_size)) |
|
for i in range(0,window_length): |
|
train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:] |
|
|
|
test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:] |
|
train_features_accuracy=train_accuracy.drop(columns='prices') |
|
test_features_accuracy=test_accuracy.drop(columns='prices') |
|
train_labels_accuracy=train_accuracy['prices'] |
|
test_labels_accuracy=test_accuracy['prices'] |
|
print(train_labels_accuracy) |
|
|
|
arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) |
|
arima_fit=arima.fit() |
|
prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) |
|
predictions=np.append(predictions,prediction_arima) |
|
test_labels_all=np.append(test_labels_all,test_labels_accuracy) |
|
train_labels_all=np.append(train_labels_all,train_accuracy) |
|
test_labels_all1=np.append(test_labels_all1,test_accuracy) |
|
|
|
|
|
from sklearn.metrics import r2_score |
|
accuracy=r2_score(predictions,test_labels_all) |
|
result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all}) |
|
result_arima.to_csv('result_arima_kat.csv') |
|
return df_with_forecast, accuracy, result_arima |
|
|