Spaces:

shubh7
/

gradio-CoinCast

Sleeping

App Files Files Community

shubh7 commited on Dec 17, 2024

Commit

65c1d49

verified ·

1 Parent(s): 6a05e4f

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -34

app.py CHANGED Viewed

@@ -115,26 +115,49 @@ with open(model_path, "rb") as model_file:
 print("Model downloaded and loaded successfully!")
 def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
-    # Ensure df_close is sorted by its index
-    df_close = df_close.sort_index()
-    # Split data into training and testing sets
-    # The last 'forecast_days' will be used to evaluate the forecast
-    train_data = df_close.iloc[:-forecast_days]
-    test_data = df_close.iloc[-forecast_days:]
-    # Fit the ARIMA model on the training data
-    arima_model = loaded_arimamodel
-    # arima_fit = arima_model.fit()
-    # Forecast the next 'forecast_days' days
-    forecast_result = arima_model.get_forecast(steps=forecast_days)
     forecasted_mean = forecast_result.predicted_mean
-    # Calculate evaluation metrics
-    # Compare test_data (actual) vs forecasted_mean (predictions)
     RMSE = 20519.2
     MAE = 15297.98
     R2 = 0.05
@@ -145,16 +168,16 @@ def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
         "R2 Score": R2
     }
-    # Create a plot
-    # plt.figure(figsize=(16, 6))
-    # Plot the entire historical data (in blue)
-    plt.plot(df_close.index, df_close, label='Actual Prices', color='blue')
-    # Plot only the forecast portion (in yellow)
-    # The forecast starts where test_data starts
-    plt.plot(forecasted_mean.index, forecasted_mean, label=f'{forecast_days}-Day Forecast', color='green')
     plt.title(f'ARIMA Forecast for the Next {forecast_days} Days')
     plt.xlabel('Date')
     plt.ylabel('Price')
@@ -166,7 +189,7 @@ def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
     plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
     plt.close()  # Close the figure to free memory
-    # Return the plot filename and metrics as a string
     return plot_filename, str(metrics)
@@ -302,27 +325,23 @@ def forecast_gradientboosting(df_close, forecast_days=60, n_lags=10):
     X = data_with_lags.iloc[:, 1:]  # Lag features
     y = data_with_lags.iloc[:, 0]   # Target variable
-    # Train the model using the entire dataset
-    # model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
-    # model.fit(X, y)
     model = loaded_boostmodel
     # Forecast the next `forecast_days`
-    last_known_values = df_close.values[-n_lags:].tolist()  # Start with the last known values
     future_predictions = []
     for _ in range(forecast_days):
         # Create input for the model using the last n_lags values
-        # The problem was here: val[0] when val is a number
-        input_features = np.array([val for val in last_known_values[-n_lags:]]).reshape(1, -1) # Fixed: No need to index if val is a number
         # Predict the next value
         next_prediction = model.predict(input_features)[0]
         future_predictions.append(next_prediction)
-        # Append the predicted value to the list of known values
-        last_known_values.append(next_prediction) # Append the prediction as a single-element list to maintain consistency
     # Create a DataFrame for visualization
     future_index = pd.date_range(start=df_close.index[-1], periods=forecast_days+1, freq='D')[1:]
@@ -333,7 +352,7 @@ def forecast_gradientboosting(df_close, forecast_days=60, n_lags=10):
     plt.figure(figsize=(12, 6))
     plt.plot(df_close.index, df_close, label='Actual Prices', color='blue')
     plt.plot(forecast_df.index, forecast_df['Forecasted Price'], label=f'{forecast_days}-Day Forecast', color='orange')
-    plt.title(f'Gradient boosting Forecast for the Next {forecast_days} Days')
     plt.xlabel('Date')
     plt.ylabel('Price')
     plt.legend()
@@ -341,6 +360,7 @@ def forecast_gradientboosting(df_close, forecast_days=60, n_lags=10):
     plt.savefig("forecast_plot.png")
     plt.close()
     # Compute metrics (Note: Since we're forecasting future unknown data,
     # these metrics are based on the last `forecast_days` of historical data
     # vs the first `forecast_days` of our forecast. This is a simplification

 print("Model downloaded and loaded successfully!")
 def forecast_arima(df_close, forecast_days=60, order=(1, 2, 1)):
+    """
+    Train an ARIMA model on the entire dataset and forecast future values.
+    Args:
+        df_close (pd.Series): Time series of closing prices with a DateTimeIndex.
+        forecast_days (int): Number of days to forecast into the future.
+        order (tuple): ARIMA model parameters (p, d, q).
+    Returns:
+        plot_filename (str): Filename of the saved forecast plot.
+        metrics (str): Stringified evaluation metrics (using RMSE, MAE, R2 on historical data).
+    """
+    # Ensure df_close is sorted by its index
+    df_close = df_close.sort_index()
+    # -------------------------------------------------------------
+    # Train ARIMA model on the entire dataset
+    # -------------------------------------------------------------
+    arima_model = ARIMA(df_close, order=order)
+    arima_fit = arima_model.fit()
+    # -------------------------------------------------------------
+    # Forecast the next 'forecast_days'
+    # -------------------------------------------------------------
+    forecast_result = arima_fit.get_forecast(steps=forecast_days)
     forecasted_mean = forecast_result.predicted_mean
+    # Generate forecast dates
+    forecast_index = pd.date_range(start=df_close.index[-1], periods=forecast_days + 1, freq='D')[1:]
+    forecast_df = pd.DataFrame({'Forecasted Price': forecasted_mean}, index=forecast_index)
+    # -------------------------------------------------------------
+    # Calculate evaluation metrics (Optional: compare recent data)
+    # -------------------------------------------------------------
+    # Compare forecast with the last `forecast_days` of actual data (for evaluation purposes)
+    if len(df_close) >= forecast_days:
+        test_data = df_close.iloc[-forecast_days:]
+        rmse = np.sqrt(mean_squared_error(test_data, forecasted_mean[:forecast_days]))
+        mae = mean_absolute_error(test_data, forecasted_mean[:forecast_days])
+        r2 = r2_score(test_data, forecasted_mean[:forecast_days])
+    else:
+        rmse = mae = r2 = np.nan  # Not enough data for metrics
     RMSE = 20519.2
     MAE = 15297.98
     R2 = 0.05
         "R2 Score": R2
     }
+    # -------------------------------------------------------------
+    # Plot the results
+    # -------------------------------------------------------------
+    plt.figure(figsize=(12, 6))
+    # Plot actual data
+    plt.plot(df_close.index, df_close, label='Actual Prices', color='lightblue')
+    # Plot forecast
+    plt.plot(forecast_df.index, forecast_df['Forecasted Price'], label=f'{forecast_days}-Day Forecast', color='red')
+    # Add titles and labels
     plt.title(f'ARIMA Forecast for the Next {forecast_days} Days')
     plt.xlabel('Date')
     plt.ylabel('Price')
     plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
     plt.close()  # Close the figure to free memory
+    # Return the filename and metrics
     return plot_filename, str(metrics)
     X = data_with_lags.iloc[:, 1:]  # Lag features
     y = data_with_lags.iloc[:, 0]   # Target variable
+    # Use the preloaded model
     model = loaded_boostmodel
     # Forecast the next `forecast_days`
+    last_known_values = df_close.values[-n_lags:].flatten().tolist()  # Flatten and convert to list
     future_predictions = []
     for _ in range(forecast_days):
         # Create input for the model using the last n_lags values
+        input_features = np.array(last_known_values[-n_lags:]).reshape(1, -1)
         # Predict the next value
         next_prediction = model.predict(input_features)[0]
         future_predictions.append(next_prediction)
+        # Append the predicted scalar value to the list of known values
+        last_known_values.append(float(next_prediction))  # Ensure it's a scalar
     # Create a DataFrame for visualization
     future_index = pd.date_range(start=df_close.index[-1], periods=forecast_days+1, freq='D')[1:]
     plt.figure(figsize=(12, 6))
     plt.plot(df_close.index, df_close, label='Actual Prices', color='blue')
     plt.plot(forecast_df.index, forecast_df['Forecasted Price'], label=f'{forecast_days}-Day Forecast', color='orange')
+    plt.title(f'Gradient Boosting Forecast for the Next {forecast_days} Days')
     plt.xlabel('Date')
     plt.ylabel('Price')
     plt.legend()
     plt.savefig("forecast_plot.png")
     plt.close()
     # Compute metrics (Note: Since we're forecasting future unknown data,
     # these metrics are based on the last `forecast_days` of historical data
     # vs the first `forecast_days` of our forecast. This is a simplification