import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.statespace.sarimax import SARIMAX from sklearn.metrics import mean_absolute_percentage_error # Streamlit app layout st.set_page_config(layout="wide") st.title("Air Passenger Data Analysis and Forecasting") st.write("Time Series Analysis and Forecasting with SARIMAX") # Load data data_path = "AirPassengers.csv" # Ensure this file is in the correct directory data = pd.read_csv(data_path, parse_dates=['Month'], index_col='Month') data.index.freq = 'MS' # Set the frequency to Month Start to avoid warnings st.write("Data Preview", data.head()) # Sidebar for SARIMAX parameters st.sidebar.header("SARIMAX Model Parameters") p = st.sidebar.slider("AR order (p)", 0, 5, 1) d = st.sidebar.slider("Difference order (d)", 0, 2, 1) q = st.sidebar.slider("MA order (q)", 0, 5, 1) P = st.sidebar.slider("Seasonal AR order (P)", 0, 5, 1) D = st.sidebar.slider("Seasonal Difference order (D)", 0, 2, 1) Q = st.sidebar.slider("Seasonal MA order (Q)", 0, 5, 1) s = st.sidebar.selectbox("Seasonal period (s)", [12]) # Monthly seasonality forecast_steps = st.sidebar.slider("Forecast Steps (Months)", min_value=1, max_value=36, value=12) # Display data preview and time series plot st.subheader("Air Passenger Traffic Over Time") fig, ax = plt.subplots() ax.plot(data.index, data['#Passengers'], color='blue', label="Observed") ax.set_xlabel("Date") ax.set_ylabel("Number of Passengers") ax.set_title("Monthly Air Passenger Data") ax.legend() st.pyplot(fig) # Train the model and forecast if st.button("Train Model and Forecast"): try: # Split data into training and test sets train_data = data[:-forecast_steps] # Use all data except the last 'forecast_steps' for training test_data = data[-forecast_steps:] # Use the last 'forecast_steps' data points for testing # Fit the SARIMAX model on the training data model = SARIMAX(train_data['#Passengers'], order=(p, d, q), seasonal_order=(P, D, Q, s), enforce_stationarity=False, enforce_invertibility=False) sarimax_model = model.fit(disp=False) st.success("Model training completed successfully.") # Display Model Summary st.subheader("Model Summary") st.text(sarimax_model.summary()) # Generate forecast for the test period to evaluate accuracy forecast = sarimax_model.get_forecast(steps=forecast_steps) forecast_df = forecast.summary_frame(alpha=0.05) # 95% confidence intervals # Calculate accuracy (MAPE) forecasted_values = forecast_df['mean'] actual_values = test_data['#Passengers'] mape = mean_absolute_percentage_error(actual_values, forecasted_values) * 100 # MAPE in percentage # Display the accuracy st.subheader("Forecast Accuracy") st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%") st.write(f"Accuracy: {100 - mape:.2f}%") # Display forecast data st.subheader("Forecast Data") st.write(forecasted_values) # Display confidence intervals lower_conf_int = forecast_df['mean_ci_lower'].rename('lower #Passengers') upper_conf_int = forecast_df['mean_ci_upper'].rename('upper #Passengers') forecast_confidence_df = pd.concat([lower_conf_int, upper_conf_int], axis=1) st.subheader("Forecast Confidence Interval") st.write(forecast_confidence_df) # Plot forecasted values with confidence intervals fig, ax = plt.subplots() ax.plot(data.index, data['#Passengers'], label='Observed', color='blue') ax.plot(forecasted_values.index, forecasted_values, label='Forecast', color='red') ax.fill_between(forecasted_values.index, lower_conf_int, upper_conf_int, color='pink', alpha=0.3) ax.set_xlabel("Date") ax.set_ylabel("Number of Passengers") ax.set_title("Air Passenger Forecast with Accuracy") ax.legend() st.pyplot(fig) except Exception as e: st.error(f"An error occurred during model training or forecasting: {e}")