Spaces:
Sleeping
Sleeping
Rahul-Crudcook
commited on
Upload 3 files
Browse files- AirPassengers.csv +145 -0
- app.py +97 -0
- requirements.txt +6 -0
AirPassengers.csv
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Month,#Passengers
|
2 |
+
1949-01,112
|
3 |
+
1949-02,118
|
4 |
+
1949-03,132
|
5 |
+
1949-04,129
|
6 |
+
1949-05,121
|
7 |
+
1949-06,135
|
8 |
+
1949-07,148
|
9 |
+
1949-08,148
|
10 |
+
1949-09,136
|
11 |
+
1949-10,119
|
12 |
+
1949-11,104
|
13 |
+
1949-12,118
|
14 |
+
1950-01,115
|
15 |
+
1950-02,126
|
16 |
+
1950-03,141
|
17 |
+
1950-04,135
|
18 |
+
1950-05,125
|
19 |
+
1950-06,149
|
20 |
+
1950-07,170
|
21 |
+
1950-08,170
|
22 |
+
1950-09,158
|
23 |
+
1950-10,133
|
24 |
+
1950-11,114
|
25 |
+
1950-12,140
|
26 |
+
1951-01,145
|
27 |
+
1951-02,150
|
28 |
+
1951-03,178
|
29 |
+
1951-04,163
|
30 |
+
1951-05,172
|
31 |
+
1951-06,178
|
32 |
+
1951-07,199
|
33 |
+
1951-08,199
|
34 |
+
1951-09,184
|
35 |
+
1951-10,162
|
36 |
+
1951-11,146
|
37 |
+
1951-12,166
|
38 |
+
1952-01,171
|
39 |
+
1952-02,180
|
40 |
+
1952-03,193
|
41 |
+
1952-04,181
|
42 |
+
1952-05,183
|
43 |
+
1952-06,218
|
44 |
+
1952-07,230
|
45 |
+
1952-08,242
|
46 |
+
1952-09,209
|
47 |
+
1952-10,191
|
48 |
+
1952-11,172
|
49 |
+
1952-12,194
|
50 |
+
1953-01,196
|
51 |
+
1953-02,196
|
52 |
+
1953-03,236
|
53 |
+
1953-04,235
|
54 |
+
1953-05,229
|
55 |
+
1953-06,243
|
56 |
+
1953-07,264
|
57 |
+
1953-08,272
|
58 |
+
1953-09,237
|
59 |
+
1953-10,211
|
60 |
+
1953-11,180
|
61 |
+
1953-12,201
|
62 |
+
1954-01,204
|
63 |
+
1954-02,188
|
64 |
+
1954-03,235
|
65 |
+
1954-04,227
|
66 |
+
1954-05,234
|
67 |
+
1954-06,264
|
68 |
+
1954-07,302
|
69 |
+
1954-08,293
|
70 |
+
1954-09,259
|
71 |
+
1954-10,229
|
72 |
+
1954-11,203
|
73 |
+
1954-12,229
|
74 |
+
1955-01,242
|
75 |
+
1955-02,233
|
76 |
+
1955-03,267
|
77 |
+
1955-04,269
|
78 |
+
1955-05,270
|
79 |
+
1955-06,315
|
80 |
+
1955-07,364
|
81 |
+
1955-08,347
|
82 |
+
1955-09,312
|
83 |
+
1955-10,274
|
84 |
+
1955-11,237
|
85 |
+
1955-12,278
|
86 |
+
1956-01,284
|
87 |
+
1956-02,277
|
88 |
+
1956-03,317
|
89 |
+
1956-04,313
|
90 |
+
1956-05,318
|
91 |
+
1956-06,374
|
92 |
+
1956-07,413
|
93 |
+
1956-08,405
|
94 |
+
1956-09,355
|
95 |
+
1956-10,306
|
96 |
+
1956-11,271
|
97 |
+
1956-12,306
|
98 |
+
1957-01,315
|
99 |
+
1957-02,301
|
100 |
+
1957-03,356
|
101 |
+
1957-04,348
|
102 |
+
1957-05,355
|
103 |
+
1957-06,422
|
104 |
+
1957-07,465
|
105 |
+
1957-08,467
|
106 |
+
1957-09,404
|
107 |
+
1957-10,347
|
108 |
+
1957-11,305
|
109 |
+
1957-12,336
|
110 |
+
1958-01,340
|
111 |
+
1958-02,318
|
112 |
+
1958-03,362
|
113 |
+
1958-04,348
|
114 |
+
1958-05,363
|
115 |
+
1958-06,435
|
116 |
+
1958-07,491
|
117 |
+
1958-08,505
|
118 |
+
1958-09,404
|
119 |
+
1958-10,359
|
120 |
+
1958-11,310
|
121 |
+
1958-12,337
|
122 |
+
1959-01,360
|
123 |
+
1959-02,342
|
124 |
+
1959-03,406
|
125 |
+
1959-04,396
|
126 |
+
1959-05,420
|
127 |
+
1959-06,472
|
128 |
+
1959-07,548
|
129 |
+
1959-08,559
|
130 |
+
1959-09,463
|
131 |
+
1959-10,407
|
132 |
+
1959-11,362
|
133 |
+
1959-12,405
|
134 |
+
1960-01,417
|
135 |
+
1960-02,391
|
136 |
+
1960-03,419
|
137 |
+
1960-04,461
|
138 |
+
1960-05,472
|
139 |
+
1960-06,535
|
140 |
+
1960-07,622
|
141 |
+
1960-08,606
|
142 |
+
1960-09,508
|
143 |
+
1960-10,461
|
144 |
+
1960-11,390
|
145 |
+
1960-12,432
|
app.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
6 |
+
from sklearn.metrics import mean_absolute_percentage_error
|
7 |
+
|
8 |
+
# Streamlit app layout
|
9 |
+
st.set_page_config(layout="wide")
|
10 |
+
st.title("Air Passenger Data Analysis and Forecasting")
|
11 |
+
st.write("Time Series Analysis and Forecasting with SARIMAX")
|
12 |
+
|
13 |
+
# Load data
|
14 |
+
data_path = "AirPassengers.csv" # Ensure this file is in the correct directory
|
15 |
+
data = pd.read_csv(data_path, parse_dates=['Month'], index_col='Month')
|
16 |
+
data.index.freq = 'MS' # Set the frequency to Month Start to avoid warnings
|
17 |
+
st.write("Data Preview", data.head())
|
18 |
+
|
19 |
+
# Sidebar for SARIMAX parameters
|
20 |
+
st.sidebar.header("SARIMAX Model Parameters")
|
21 |
+
p = st.sidebar.slider("AR order (p)", 0, 5, 1)
|
22 |
+
d = st.sidebar.slider("Difference order (d)", 0, 2, 1)
|
23 |
+
q = st.sidebar.slider("MA order (q)", 0, 5, 1)
|
24 |
+
P = st.sidebar.slider("Seasonal AR order (P)", 0, 5, 1)
|
25 |
+
D = st.sidebar.slider("Seasonal Difference order (D)", 0, 2, 1)
|
26 |
+
Q = st.sidebar.slider("Seasonal MA order (Q)", 0, 5, 1)
|
27 |
+
s = st.sidebar.selectbox("Seasonal period (s)", [12]) # Monthly seasonality
|
28 |
+
forecast_steps = st.sidebar.slider("Forecast Steps (Months)", min_value=1, max_value=36, value=12)
|
29 |
+
|
30 |
+
# Display data preview and time series plot
|
31 |
+
st.subheader("Air Passenger Traffic Over Time")
|
32 |
+
fig, ax = plt.subplots()
|
33 |
+
ax.plot(data.index, data['#Passengers'], color='blue', label="Observed")
|
34 |
+
ax.set_xlabel("Date")
|
35 |
+
ax.set_ylabel("Number of Passengers")
|
36 |
+
ax.set_title("Monthly Air Passenger Data")
|
37 |
+
ax.legend()
|
38 |
+
st.pyplot(fig)
|
39 |
+
|
40 |
+
# Train the model and forecast
|
41 |
+
if st.button("Train Model and Forecast"):
|
42 |
+
try:
|
43 |
+
# Split data into training and test sets
|
44 |
+
train_data = data[:-forecast_steps] # Use all data except the last 'forecast_steps' for training
|
45 |
+
test_data = data[-forecast_steps:] # Use the last 'forecast_steps' data points for testing
|
46 |
+
|
47 |
+
# Fit the SARIMAX model on the training data
|
48 |
+
model = SARIMAX(train_data['#Passengers'],
|
49 |
+
order=(p, d, q),
|
50 |
+
seasonal_order=(P, D, Q, s),
|
51 |
+
enforce_stationarity=False,
|
52 |
+
enforce_invertibility=False)
|
53 |
+
sarimax_model = model.fit(disp=False)
|
54 |
+
st.success("Model training completed successfully.")
|
55 |
+
|
56 |
+
# Display Model Summary
|
57 |
+
st.subheader("Model Summary")
|
58 |
+
st.text(sarimax_model.summary())
|
59 |
+
|
60 |
+
# Generate forecast for the test period to evaluate accuracy
|
61 |
+
forecast = sarimax_model.get_forecast(steps=forecast_steps)
|
62 |
+
forecast_df = forecast.summary_frame(alpha=0.05) # 95% confidence intervals
|
63 |
+
|
64 |
+
# Calculate accuracy (MAPE)
|
65 |
+
forecasted_values = forecast_df['mean']
|
66 |
+
actual_values = test_data['#Passengers']
|
67 |
+
mape = mean_absolute_percentage_error(actual_values, forecasted_values) * 100 # MAPE in percentage
|
68 |
+
|
69 |
+
# Display the accuracy
|
70 |
+
st.subheader("Forecast Accuracy")
|
71 |
+
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
|
72 |
+
st.write(f"Accuracy: {100 - mape:.2f}%")
|
73 |
+
|
74 |
+
# Display forecast data
|
75 |
+
st.subheader("Forecast Data")
|
76 |
+
st.write(forecasted_values)
|
77 |
+
|
78 |
+
# Display confidence intervals
|
79 |
+
lower_conf_int = forecast_df['mean_ci_lower'].rename('lower #Passengers')
|
80 |
+
upper_conf_int = forecast_df['mean_ci_upper'].rename('upper #Passengers')
|
81 |
+
forecast_confidence_df = pd.concat([lower_conf_int, upper_conf_int], axis=1)
|
82 |
+
st.subheader("Forecast Confidence Interval")
|
83 |
+
st.write(forecast_confidence_df)
|
84 |
+
|
85 |
+
# Plot forecasted values with confidence intervals
|
86 |
+
fig, ax = plt.subplots()
|
87 |
+
ax.plot(data.index, data['#Passengers'], label='Observed', color='blue')
|
88 |
+
ax.plot(forecasted_values.index, forecasted_values, label='Forecast', color='red')
|
89 |
+
ax.fill_between(forecasted_values.index, lower_conf_int, upper_conf_int, color='pink', alpha=0.3)
|
90 |
+
ax.set_xlabel("Date")
|
91 |
+
ax.set_ylabel("Number of Passengers")
|
92 |
+
ax.set_title("Air Passenger Forecast with Accuracy")
|
93 |
+
ax.legend()
|
94 |
+
st.pyplot(fig)
|
95 |
+
|
96 |
+
except Exception as e:
|
97 |
+
st.error(f"An error occurred during model training or forecasting: {e}")
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
matplotlib
|
5 |
+
statsmodels
|
6 |
+
scikit-learn
|