Rahul-Crudcook's picture
Upload 3 files
094aaec verified
raw
history blame
4.34 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_percentage_error
# Streamlit app layout
st.set_page_config(layout="wide")
st.title("Air Passenger Data Analysis and Forecasting")
st.write("Time Series Analysis and Forecasting with SARIMAX")
# Load data
data_path = "AirPassengers.csv" # Ensure this file is in the correct directory
data = pd.read_csv(data_path, parse_dates=['Month'], index_col='Month')
data.index.freq = 'MS' # Set the frequency to Month Start to avoid warnings
st.write("Data Preview", data.head())
# Sidebar for SARIMAX parameters
st.sidebar.header("SARIMAX Model Parameters")
p = st.sidebar.slider("AR order (p)", 0, 5, 1)
d = st.sidebar.slider("Difference order (d)", 0, 2, 1)
q = st.sidebar.slider("MA order (q)", 0, 5, 1)
P = st.sidebar.slider("Seasonal AR order (P)", 0, 5, 1)
D = st.sidebar.slider("Seasonal Difference order (D)", 0, 2, 1)
Q = st.sidebar.slider("Seasonal MA order (Q)", 0, 5, 1)
s = st.sidebar.selectbox("Seasonal period (s)", [12]) # Monthly seasonality
forecast_steps = st.sidebar.slider("Forecast Steps (Months)", min_value=1, max_value=36, value=12)
# Display data preview and time series plot
st.subheader("Air Passenger Traffic Over Time")
fig, ax = plt.subplots()
ax.plot(data.index, data['#Passengers'], color='blue', label="Observed")
ax.set_xlabel("Date")
ax.set_ylabel("Number of Passengers")
ax.set_title("Monthly Air Passenger Data")
ax.legend()
st.pyplot(fig)
# Train the model and forecast
if st.button("Train Model and Forecast"):
try:
# Split data into training and test sets
train_data = data[:-forecast_steps] # Use all data except the last 'forecast_steps' for training
test_data = data[-forecast_steps:] # Use the last 'forecast_steps' data points for testing
# Fit the SARIMAX model on the training data
model = SARIMAX(train_data['#Passengers'],
order=(p, d, q),
seasonal_order=(P, D, Q, s),
enforce_stationarity=False,
enforce_invertibility=False)
sarimax_model = model.fit(disp=False)
st.success("Model training completed successfully.")
# Display Model Summary
st.subheader("Model Summary")
st.text(sarimax_model.summary())
# Generate forecast for the test period to evaluate accuracy
forecast = sarimax_model.get_forecast(steps=forecast_steps)
forecast_df = forecast.summary_frame(alpha=0.05) # 95% confidence intervals
# Calculate accuracy (MAPE)
forecasted_values = forecast_df['mean']
actual_values = test_data['#Passengers']
mape = mean_absolute_percentage_error(actual_values, forecasted_values) * 100 # MAPE in percentage
# Display the accuracy
st.subheader("Forecast Accuracy")
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
st.write(f"Accuracy: {100 - mape:.2f}%")
# Display forecast data
st.subheader("Forecast Data")
st.write(forecasted_values)
# Display confidence intervals
lower_conf_int = forecast_df['mean_ci_lower'].rename('lower #Passengers')
upper_conf_int = forecast_df['mean_ci_upper'].rename('upper #Passengers')
forecast_confidence_df = pd.concat([lower_conf_int, upper_conf_int], axis=1)
st.subheader("Forecast Confidence Interval")
st.write(forecast_confidence_df)
# Plot forecasted values with confidence intervals
fig, ax = plt.subplots()
ax.plot(data.index, data['#Passengers'], label='Observed', color='blue')
ax.plot(forecasted_values.index, forecasted_values, label='Forecast', color='red')
ax.fill_between(forecasted_values.index, lower_conf_int, upper_conf_int, color='pink', alpha=0.3)
ax.set_xlabel("Date")
ax.set_ylabel("Number of Passengers")
ax.set_title("Air Passenger Forecast with Accuracy")
ax.legend()
st.pyplot(fig)
except Exception as e:
st.error(f"An error occurred during model training or forecasting: {e}")