Rahul-Crudcook's picture
Upload 3 files
094aaec verified
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_percentage_error
# Streamlit app layout
st.set_page_config(layout="wide")
st.title("Air Passenger Data Analysis and Forecasting")
st.write("Time Series Analysis and Forecasting with SARIMAX")
# Load data
data_path = "AirPassengers.csv" # Ensure this file is in the correct directory
data = pd.read_csv(data_path, parse_dates=['Month'], index_col='Month')
data.index.freq = 'MS' # Set the frequency to Month Start to avoid warnings
st.write("Data Preview", data.head())
# Sidebar for SARIMAX parameters
st.sidebar.header("SARIMAX Model Parameters")
p = st.sidebar.slider("AR order (p)", 0, 5, 1)
d = st.sidebar.slider("Difference order (d)", 0, 2, 1)
q = st.sidebar.slider("MA order (q)", 0, 5, 1)
P = st.sidebar.slider("Seasonal AR order (P)", 0, 5, 1)
D = st.sidebar.slider("Seasonal Difference order (D)", 0, 2, 1)
Q = st.sidebar.slider("Seasonal MA order (Q)", 0, 5, 1)
s = st.sidebar.selectbox("Seasonal period (s)", [12]) # Monthly seasonality
forecast_steps = st.sidebar.slider("Forecast Steps (Months)", min_value=1, max_value=36, value=12)
# Display data preview and time series plot
st.subheader("Air Passenger Traffic Over Time")
fig, ax = plt.subplots()
ax.plot(data.index, data['#Passengers'], color='blue', label="Observed")
ax.set_xlabel("Date")
ax.set_ylabel("Number of Passengers")
ax.set_title("Monthly Air Passenger Data")
ax.legend()
st.pyplot(fig)
# Train the model and forecast
if st.button("Train Model and Forecast"):
try:
# Split data into training and test sets
train_data = data[:-forecast_steps] # Use all data except the last 'forecast_steps' for training
test_data = data[-forecast_steps:] # Use the last 'forecast_steps' data points for testing
# Fit the SARIMAX model on the training data
model = SARIMAX(train_data['#Passengers'],
order=(p, d, q),
seasonal_order=(P, D, Q, s),
enforce_stationarity=False,
enforce_invertibility=False)
sarimax_model = model.fit(disp=False)
st.success("Model training completed successfully.")
# Display Model Summary
st.subheader("Model Summary")
st.text(sarimax_model.summary())
# Generate forecast for the test period to evaluate accuracy
forecast = sarimax_model.get_forecast(steps=forecast_steps)
forecast_df = forecast.summary_frame(alpha=0.05) # 95% confidence intervals
# Calculate accuracy (MAPE)
forecasted_values = forecast_df['mean']
actual_values = test_data['#Passengers']
mape = mean_absolute_percentage_error(actual_values, forecasted_values) * 100 # MAPE in percentage
# Display the accuracy
st.subheader("Forecast Accuracy")
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
st.write(f"Accuracy: {100 - mape:.2f}%")
# Display forecast data
st.subheader("Forecast Data")
st.write(forecasted_values)
# Display confidence intervals
lower_conf_int = forecast_df['mean_ci_lower'].rename('lower #Passengers')
upper_conf_int = forecast_df['mean_ci_upper'].rename('upper #Passengers')
forecast_confidence_df = pd.concat([lower_conf_int, upper_conf_int], axis=1)
st.subheader("Forecast Confidence Interval")
st.write(forecast_confidence_df)
# Plot forecasted values with confidence intervals
fig, ax = plt.subplots()
ax.plot(data.index, data['#Passengers'], label='Observed', color='blue')
ax.plot(forecasted_values.index, forecasted_values, label='Forecast', color='red')
ax.fill_between(forecasted_values.index, lower_conf_int, upper_conf_int, color='pink', alpha=0.3)
ax.set_xlabel("Date")
ax.set_ylabel("Number of Passengers")
ax.set_title("Air Passenger Forecast with Accuracy")
ax.legend()
st.pyplot(fig)
except Exception as e:
st.error(f"An error occurred during model training or forecasting: {e}")