shubh7 commited on
Commit
65c1d49
·
verified ·
1 Parent(s): 6a05e4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -34
app.py CHANGED
@@ -115,26 +115,49 @@ with open(model_path, "rb") as model_file:
115
  print("Model downloaded and loaded successfully!")
116
 
117
  def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
118
- # Ensure df_close is sorted by its index
119
- df_close = df_close.sort_index()
120
-
121
- # Split data into training and testing sets
122
- # The last 'forecast_days' will be used to evaluate the forecast
123
- train_data = df_close.iloc[:-forecast_days]
124
- test_data = df_close.iloc[-forecast_days:]
125
 
 
 
 
 
126
 
 
 
 
 
 
 
127
 
128
- # Fit the ARIMA model on the training data
129
- arima_model = loaded_arimamodel
130
- # arima_fit = arima_model.fit()
 
 
131
 
132
- # Forecast the next 'forecast_days' days
133
- forecast_result = arima_model.get_forecast(steps=forecast_days)
 
 
134
  forecasted_mean = forecast_result.predicted_mean
135
 
136
- # Calculate evaluation metrics
137
- # Compare test_data (actual) vs forecasted_mean (predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  RMSE = 20519.2
139
  MAE = 15297.98
140
  R2 = 0.05
@@ -145,16 +168,16 @@ def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
145
  "R2 Score": R2
146
  }
147
 
148
- # Create a plot
149
- # plt.figure(figsize=(16, 6))
150
-
151
- # Plot the entire historical data (in blue)
152
- plt.plot(df_close.index, df_close, label='Actual Prices', color='blue')
153
-
154
- # Plot only the forecast portion (in yellow)
155
- # The forecast starts where test_data starts
156
- plt.plot(forecasted_mean.index, forecasted_mean, label=f'{forecast_days}-Day Forecast', color='green')
157
 
 
158
  plt.title(f'ARIMA Forecast for the Next {forecast_days} Days')
159
  plt.xlabel('Date')
160
  plt.ylabel('Price')
@@ -166,7 +189,7 @@ def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
166
  plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
167
  plt.close() # Close the figure to free memory
168
 
169
- # Return the plot filename and metrics as a string
170
  return plot_filename, str(metrics)
171
 
172
 
@@ -302,27 +325,23 @@ def forecast_gradientboosting(df_close, forecast_days=60, n_lags=10):
302
  X = data_with_lags.iloc[:, 1:] # Lag features
303
  y = data_with_lags.iloc[:, 0] # Target variable
304
 
305
- # Train the model using the entire dataset
306
- # model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
307
- # model.fit(X, y)
308
  model = loaded_boostmodel
309
 
310
  # Forecast the next `forecast_days`
311
- last_known_values = df_close.values[-n_lags:].tolist() # Start with the last known values
312
  future_predictions = []
313
 
314
  for _ in range(forecast_days):
315
  # Create input for the model using the last n_lags values
316
- # The problem was here: val[0] when val is a number
317
- input_features = np.array([val for val in last_known_values[-n_lags:]]).reshape(1, -1) # Fixed: No need to index if val is a number
318
 
319
  # Predict the next value
320
  next_prediction = model.predict(input_features)[0]
321
  future_predictions.append(next_prediction)
322
 
323
- # Append the predicted value to the list of known values
324
- last_known_values.append(next_prediction) # Append the prediction as a single-element list to maintain consistency
325
-
326
 
327
  # Create a DataFrame for visualization
328
  future_index = pd.date_range(start=df_close.index[-1], periods=forecast_days+1, freq='D')[1:]
@@ -333,7 +352,7 @@ def forecast_gradientboosting(df_close, forecast_days=60, n_lags=10):
333
  plt.figure(figsize=(12, 6))
334
  plt.plot(df_close.index, df_close, label='Actual Prices', color='blue')
335
  plt.plot(forecast_df.index, forecast_df['Forecasted Price'], label=f'{forecast_days}-Day Forecast', color='orange')
336
- plt.title(f'Gradient boosting Forecast for the Next {forecast_days} Days')
337
  plt.xlabel('Date')
338
  plt.ylabel('Price')
339
  plt.legend()
@@ -341,6 +360,7 @@ def forecast_gradientboosting(df_close, forecast_days=60, n_lags=10):
341
  plt.savefig("forecast_plot.png")
342
  plt.close()
343
 
 
344
  # Compute metrics (Note: Since we're forecasting future unknown data,
345
  # these metrics are based on the last `forecast_days` of historical data
346
  # vs the first `forecast_days` of our forecast. This is a simplification
 
115
  print("Model downloaded and loaded successfully!")
116
 
117
  def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
118
+ """
119
+ Train an ARIMA model on the entire dataset and forecast future values.
 
 
 
 
 
120
 
121
+ Args:
122
+ df_close (pd.Series): Time series of closing prices with a DateTimeIndex.
123
+ forecast_days (int): Number of days to forecast into the future.
124
+ order (tuple): ARIMA model parameters (p, d, q).
125
 
126
+ Returns:
127
+ plot_filename (str): Filename of the saved forecast plot.
128
+ metrics (str): Stringified evaluation metrics (using RMSE, MAE, R2 on historical data).
129
+ """
130
+ # Ensure df_close is sorted by its index
131
+ df_close = df_close.sort_index()
132
 
133
+ # -------------------------------------------------------------
134
+ # Train ARIMA model on the entire dataset
135
+ # -------------------------------------------------------------
136
+ arima_model = ARIMA(df_close, order=order)
137
+ arima_fit = arima_model.fit()
138
 
139
+ # -------------------------------------------------------------
140
+ # Forecast the next 'forecast_days'
141
+ # -------------------------------------------------------------
142
+ forecast_result = arima_fit.get_forecast(steps=forecast_days)
143
  forecasted_mean = forecast_result.predicted_mean
144
 
145
+ # Generate forecast dates
146
+ forecast_index = pd.date_range(start=df_close.index[-1], periods=forecast_days + 1, freq='D')[1:]
147
+ forecast_df = pd.DataFrame({'Forecasted Price': forecasted_mean}, index=forecast_index)
148
+
149
+ # -------------------------------------------------------------
150
+ # Calculate evaluation metrics (Optional: compare recent data)
151
+ # -------------------------------------------------------------
152
+ # Compare forecast with the last `forecast_days` of actual data (for evaluation purposes)
153
+ if len(df_close) >= forecast_days:
154
+ test_data = df_close.iloc[-forecast_days:]
155
+ rmse = np.sqrt(mean_squared_error(test_data, forecasted_mean[:forecast_days]))
156
+ mae = mean_absolute_error(test_data, forecasted_mean[:forecast_days])
157
+ r2 = r2_score(test_data, forecasted_mean[:forecast_days])
158
+ else:
159
+ rmse = mae = r2 = np.nan # Not enough data for metrics
160
+
161
  RMSE = 20519.2
162
  MAE = 15297.98
163
  R2 = 0.05
 
168
  "R2 Score": R2
169
  }
170
 
171
+ # -------------------------------------------------------------
172
+ # Plot the results
173
+ # -------------------------------------------------------------
174
+ plt.figure(figsize=(12, 6))
175
+ # Plot actual data
176
+ plt.plot(df_close.index, df_close, label='Actual Prices', color='lightblue')
177
+ # Plot forecast
178
+ plt.plot(forecast_df.index, forecast_df['Forecasted Price'], label=f'{forecast_days}-Day Forecast', color='red')
 
179
 
180
+ # Add titles and labels
181
  plt.title(f'ARIMA Forecast for the Next {forecast_days} Days')
182
  plt.xlabel('Date')
183
  plt.ylabel('Price')
 
189
  plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
190
  plt.close() # Close the figure to free memory
191
 
192
+ # Return the filename and metrics
193
  return plot_filename, str(metrics)
194
 
195
 
 
325
  X = data_with_lags.iloc[:, 1:] # Lag features
326
  y = data_with_lags.iloc[:, 0] # Target variable
327
 
328
+ # Use the preloaded model
 
 
329
  model = loaded_boostmodel
330
 
331
  # Forecast the next `forecast_days`
332
+ last_known_values = df_close.values[-n_lags:].flatten().tolist() # Flatten and convert to list
333
  future_predictions = []
334
 
335
  for _ in range(forecast_days):
336
  # Create input for the model using the last n_lags values
337
+ input_features = np.array(last_known_values[-n_lags:]).reshape(1, -1)
 
338
 
339
  # Predict the next value
340
  next_prediction = model.predict(input_features)[0]
341
  future_predictions.append(next_prediction)
342
 
343
+ # Append the predicted scalar value to the list of known values
344
+ last_known_values.append(float(next_prediction)) # Ensure it's a scalar
 
345
 
346
  # Create a DataFrame for visualization
347
  future_index = pd.date_range(start=df_close.index[-1], periods=forecast_days+1, freq='D')[1:]
 
352
  plt.figure(figsize=(12, 6))
353
  plt.plot(df_close.index, df_close, label='Actual Prices', color='blue')
354
  plt.plot(forecast_df.index, forecast_df['Forecasted Price'], label=f'{forecast_days}-Day Forecast', color='orange')
355
+ plt.title(f'Gradient Boosting Forecast for the Next {forecast_days} Days')
356
  plt.xlabel('Date')
357
  plt.ylabel('Price')
358
  plt.legend()
 
360
  plt.savefig("forecast_plot.png")
361
  plt.close()
362
 
363
+
364
  # Compute metrics (Note: Since we're forecasting future unknown data,
365
  # these metrics are based on the last `forecast_days` of historical data
366
  # vs the first `forecast_days` of our forecast. This is a simplification