File size: 4,335 Bytes
094aaec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_percentage_error

# Streamlit app layout
st.set_page_config(layout="wide")
st.title("Air Passenger Data Analysis and Forecasting")
st.write("Time Series Analysis and Forecasting with SARIMAX")

# Load data
data_path = "AirPassengers.csv"  # Ensure this file is in the correct directory
data = pd.read_csv(data_path, parse_dates=['Month'], index_col='Month')
data.index.freq = 'MS'  # Set the frequency to Month Start to avoid warnings
st.write("Data Preview", data.head())

# Sidebar for SARIMAX parameters
st.sidebar.header("SARIMAX Model Parameters")
p = st.sidebar.slider("AR order (p)", 0, 5, 1)
d = st.sidebar.slider("Difference order (d)", 0, 2, 1)
q = st.sidebar.slider("MA order (q)", 0, 5, 1)
P = st.sidebar.slider("Seasonal AR order (P)", 0, 5, 1)
D = st.sidebar.slider("Seasonal Difference order (D)", 0, 2, 1)
Q = st.sidebar.slider("Seasonal MA order (Q)", 0, 5, 1)
s = st.sidebar.selectbox("Seasonal period (s)", [12])  # Monthly seasonality
forecast_steps = st.sidebar.slider("Forecast Steps (Months)", min_value=1, max_value=36, value=12)

# Display data preview and time series plot
st.subheader("Air Passenger Traffic Over Time")
fig, ax = plt.subplots()
ax.plot(data.index, data['#Passengers'], color='blue', label="Observed")
ax.set_xlabel("Date")
ax.set_ylabel("Number of Passengers")
ax.set_title("Monthly Air Passenger Data")
ax.legend()
st.pyplot(fig)

# Train the model and forecast
if st.button("Train Model and Forecast"):
    try:
        # Split data into training and test sets
        train_data = data[:-forecast_steps]  # Use all data except the last 'forecast_steps' for training
        test_data = data[-forecast_steps:]   # Use the last 'forecast_steps' data points for testing

        # Fit the SARIMAX model on the training data
        model = SARIMAX(train_data['#Passengers'],
                        order=(p, d, q),
                        seasonal_order=(P, D, Q, s),
                        enforce_stationarity=False,
                        enforce_invertibility=False)
        sarimax_model = model.fit(disp=False)
        st.success("Model training completed successfully.")

        # Display Model Summary
        st.subheader("Model Summary")
        st.text(sarimax_model.summary())

        # Generate forecast for the test period to evaluate accuracy
        forecast = sarimax_model.get_forecast(steps=forecast_steps)
        forecast_df = forecast.summary_frame(alpha=0.05)  # 95% confidence intervals

        # Calculate accuracy (MAPE)
        forecasted_values = forecast_df['mean']
        actual_values = test_data['#Passengers']
        mape = mean_absolute_percentage_error(actual_values, forecasted_values) * 100  # MAPE in percentage

        # Display the accuracy
        st.subheader("Forecast Accuracy")
        st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
        st.write(f"Accuracy: {100 - mape:.2f}%")

        # Display forecast data
        st.subheader("Forecast Data")
        st.write(forecasted_values)

        # Display confidence intervals
        lower_conf_int = forecast_df['mean_ci_lower'].rename('lower #Passengers')
        upper_conf_int = forecast_df['mean_ci_upper'].rename('upper #Passengers')
        forecast_confidence_df = pd.concat([lower_conf_int, upper_conf_int], axis=1)
        st.subheader("Forecast Confidence Interval")
        st.write(forecast_confidence_df)

        # Plot forecasted values with confidence intervals
        fig, ax = plt.subplots()
        ax.plot(data.index, data['#Passengers'], label='Observed', color='blue')
        ax.plot(forecasted_values.index, forecasted_values, label='Forecast', color='red')
        ax.fill_between(forecasted_values.index, lower_conf_int, upper_conf_int, color='pink', alpha=0.3)
        ax.set_xlabel("Date")
        ax.set_ylabel("Number of Passengers")
        ax.set_title("Air Passenger Forecast with Accuracy")
        ax.legend()
        st.pyplot(fig)

    except Exception as e:
        st.error(f"An error occurred during model training or forecasting: {e}")