File size: 3,635 Bytes
05a3e2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import model_utils as mu
from statsmodels.tsa.arima.model import ARIMA
def model_run(df_all):
""" Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price.
Useful for forecasting a variable using ARIMA model.
Use historical 'prices' and get prediction.
Give prediction output to the client.
"""
first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
#----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
print(f'I have transformed the dataset into the frame for supervised learning')
df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']]
date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day']))
df_with_date=pd.concat([date,df],axis=1)
df_with_date.columns=np.append('date',df.columns)
df_with_date.set_index('date',inplace=True)
df_with_date=df_with_date.dropna()
df_past=df_with_date.iloc[:-1,:]
df_future=df_with_date.iloc[-1:,:]
model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2))
model_fit = model.fit()
# Make predictions
predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices'))
#Add forecast to df_with_date
df_with_forecast=reframed_lags.copy()
df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0]
#----------------------------------- MODEL ACCURACY
#Calculate accuracy after transformation!!!
#get rid of values below 0.01 which skew the accuracy measure if in denominator
#Rolling window accuracy measure
if len(reframed_lags)>500:
train_size=0.9
elif len(reframed_lags)>200:
train_size=0.8
else:
train_size=0.7
predictions=[]
test_labels_all=[]
test_labels_all1=[]
train_labels_all=[]
data_arima=df_with_date
window_length=int((len(data_arima)-len(data_arima)*train_size))
for i in range(0,window_length):
train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:]
test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:]
train_features_accuracy=train_accuracy.drop(columns='prices')
test_features_accuracy=test_accuracy.drop(columns='prices')
train_labels_accuracy=train_accuracy['prices']
test_labels_accuracy=test_accuracy['prices']
print(train_labels_accuracy)
arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000)
arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy)
prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy)
predictions=np.append(predictions,prediction_arima)
test_labels_all=np.append(test_labels_all,test_labels_accuracy)
train_labels_all=np.append(train_labels_all,train_accuracy)
test_labels_all1=np.append(test_labels_all1,test_accuracy)
#Calculate accuracy
from sklearn.metrics import r2_score
accuracy=r2_score(predictions,test_labels_all)
result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
result_arima.to_csv('result_arima_kat.csv')
return df_with_forecast, accuracy, result_arima
|