Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
from sklearn.metrics import mean_absolute_percentage_error | |
# Streamlit app layout | |
st.set_page_config(layout="wide") | |
st.title("Air Passenger Data Analysis and Forecasting") | |
st.write("Time Series Analysis and Forecasting with SARIMAX") | |
# Load data | |
data_path = "AirPassengers.csv" # Ensure this file is in the correct directory | |
data = pd.read_csv(data_path, parse_dates=['Month'], index_col='Month') | |
data.index.freq = 'MS' # Set the frequency to Month Start to avoid warnings | |
st.write("Data Preview", data.head()) | |
# Sidebar for SARIMAX parameters | |
st.sidebar.header("SARIMAX Model Parameters") | |
p = st.sidebar.slider("AR order (p)", 0, 5, 1) | |
d = st.sidebar.slider("Difference order (d)", 0, 2, 1) | |
q = st.sidebar.slider("MA order (q)", 0, 5, 1) | |
P = st.sidebar.slider("Seasonal AR order (P)", 0, 5, 1) | |
D = st.sidebar.slider("Seasonal Difference order (D)", 0, 2, 1) | |
Q = st.sidebar.slider("Seasonal MA order (Q)", 0, 5, 1) | |
s = st.sidebar.selectbox("Seasonal period (s)", [12]) # Monthly seasonality | |
forecast_steps = st.sidebar.slider("Forecast Steps (Months)", min_value=1, max_value=36, value=12) | |
# Display data preview and time series plot | |
st.subheader("Air Passenger Traffic Over Time") | |
fig, ax = plt.subplots() | |
ax.plot(data.index, data['#Passengers'], color='blue', label="Observed") | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Number of Passengers") | |
ax.set_title("Monthly Air Passenger Data") | |
ax.legend() | |
st.pyplot(fig) | |
# Train the model and forecast | |
if st.button("Train Model and Forecast"): | |
try: | |
# Split data into training and test sets | |
train_data = data[:-forecast_steps] # Use all data except the last 'forecast_steps' for training | |
test_data = data[-forecast_steps:] # Use the last 'forecast_steps' data points for testing | |
# Fit the SARIMAX model on the training data | |
model = SARIMAX(train_data['#Passengers'], | |
order=(p, d, q), | |
seasonal_order=(P, D, Q, s), | |
enforce_stationarity=False, | |
enforce_invertibility=False) | |
sarimax_model = model.fit(disp=False) | |
st.success("Model training completed successfully.") | |
# Display Model Summary | |
st.subheader("Model Summary") | |
st.text(sarimax_model.summary()) | |
# Generate forecast for the test period to evaluate accuracy | |
forecast = sarimax_model.get_forecast(steps=forecast_steps) | |
forecast_df = forecast.summary_frame(alpha=0.05) # 95% confidence intervals | |
# Calculate accuracy (MAPE) | |
forecasted_values = forecast_df['mean'] | |
actual_values = test_data['#Passengers'] | |
mape = mean_absolute_percentage_error(actual_values, forecasted_values) * 100 # MAPE in percentage | |
# Display the accuracy | |
st.subheader("Forecast Accuracy") | |
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%") | |
st.write(f"Accuracy: {100 - mape:.2f}%") | |
# Display forecast data | |
st.subheader("Forecast Data") | |
st.write(forecasted_values) | |
# Display confidence intervals | |
lower_conf_int = forecast_df['mean_ci_lower'].rename('lower #Passengers') | |
upper_conf_int = forecast_df['mean_ci_upper'].rename('upper #Passengers') | |
forecast_confidence_df = pd.concat([lower_conf_int, upper_conf_int], axis=1) | |
st.subheader("Forecast Confidence Interval") | |
st.write(forecast_confidence_df) | |
# Plot forecasted values with confidence intervals | |
fig, ax = plt.subplots() | |
ax.plot(data.index, data['#Passengers'], label='Observed', color='blue') | |
ax.plot(forecasted_values.index, forecasted_values, label='Forecast', color='red') | |
ax.fill_between(forecasted_values.index, lower_conf_int, upper_conf_int, color='pink', alpha=0.3) | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Number of Passengers") | |
ax.set_title("Air Passenger Forecast with Accuracy") | |
ax.legend() | |
st.pyplot(fig) | |
except Exception as e: | |
st.error(f"An error occurred during model training or forecasting: {e}") | |