Rahul-Crudcook
commited on
Upload 2 files
Browse files- app.py +136 -0
- nyc_energy_consumption.csv +0 -0
app.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.preprocessing import MinMaxScaler
|
5 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
|
6 |
+
from tensorflow.keras.models import Sequential
|
7 |
+
from tensorflow.keras.layers import LSTM, Dense, Dropout
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
from datetime import timedelta
|
10 |
+
|
11 |
+
# Load and preprocess data
|
12 |
+
@st.cache_data
|
13 |
+
def load_data():
|
14 |
+
data = pd.read_csv("nyc_energy_consumption.csv")
|
15 |
+
data.columns = ['timeStamp', 'demand', 'precip', 'temp']
|
16 |
+
data['timeStamp'] = pd.to_datetime(data['timeStamp'])
|
17 |
+
data.set_index('timeStamp', inplace=True)
|
18 |
+
data = data.dropna() # Drop any missing values
|
19 |
+
return data
|
20 |
+
|
21 |
+
data = load_data()
|
22 |
+
|
23 |
+
# Scale the data
|
24 |
+
scaler = MinMaxScaler(feature_range=(0, 1))
|
25 |
+
scaled_data = scaler.fit_transform(data[['demand', 'precip', 'temp']])
|
26 |
+
|
27 |
+
# Create dataset function for LSTM
|
28 |
+
def create_dataset(dataset, look_back=60):
|
29 |
+
X, y = [], []
|
30 |
+
for i in range(look_back, len(dataset)):
|
31 |
+
X.append(dataset[i-look_back:i])
|
32 |
+
y.append(dataset[i, 0]) # Predicting demand
|
33 |
+
return np.array(X), np.array(y)
|
34 |
+
|
35 |
+
# Set look-back period
|
36 |
+
look_back = 60
|
37 |
+
X, y = create_dataset(scaled_data, look_back)
|
38 |
+
|
39 |
+
# Split the dataset into train and test sets
|
40 |
+
split_ratio = 0.8
|
41 |
+
split_index = int(len(X) * split_ratio)
|
42 |
+
X_train, X_test = X[:split_index], X[split_index:]
|
43 |
+
y_train, y_test = y[:split_index], y[split_index:]
|
44 |
+
|
45 |
+
# Build and compile LSTM model
|
46 |
+
model = Sequential([
|
47 |
+
LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
|
48 |
+
Dropout(0.2),
|
49 |
+
LSTM(units=50, return_sequences=False),
|
50 |
+
Dropout(0.2),
|
51 |
+
Dense(units=25),
|
52 |
+
Dense(units=1)
|
53 |
+
])
|
54 |
+
|
55 |
+
model.compile(optimizer='adam', loss='mean_squared_error')
|
56 |
+
|
57 |
+
# Train the model with validation
|
58 |
+
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))
|
59 |
+
|
60 |
+
# Make predictions
|
61 |
+
train_predict = model.predict(X_train)
|
62 |
+
test_predict = model.predict(X_test)
|
63 |
+
|
64 |
+
# Inverse transform predictions to original scale
|
65 |
+
train_predict = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], 2))), axis=1))[:, 0]
|
66 |
+
test_predict = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], 2))), axis=1))[:, 0]
|
67 |
+
y_train_inv = scaler.inverse_transform(np.concatenate((y_train.reshape(-1, 1), np.zeros((y_train.shape[0], 2))), axis=1))[:, 0]
|
68 |
+
y_test_inv = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))), axis=1))[:, 0]
|
69 |
+
|
70 |
+
# Calculate error metrics
|
71 |
+
rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict))
|
72 |
+
mape = mean_absolute_percentage_error(y_test_inv, test_predict) * 100
|
73 |
+
accuracy = 100 - mape
|
74 |
+
|
75 |
+
# Streamlit App with filter for future prediction periods
|
76 |
+
st.title("NYC Energy Consumption Forecasting with LSTM")
|
77 |
+
st.subheader("Dataset Preview")
|
78 |
+
st.write(data.head())
|
79 |
+
|
80 |
+
# Forecasting options
|
81 |
+
st.subheader("Forecasting Options")
|
82 |
+
forecast_period = st.slider("Select number of future hours to predict", min_value=1, max_value=365, value=30)
|
83 |
+
|
84 |
+
# Future prediction
|
85 |
+
future_X = scaled_data[-look_back:]
|
86 |
+
future_X = np.reshape(future_X, (1, look_back, scaled_data.shape[1]))
|
87 |
+
|
88 |
+
future_predictions = []
|
89 |
+
for _ in range(forecast_period):
|
90 |
+
future_pred = model.predict(future_X)
|
91 |
+
future_predictions.append(future_pred[0, 0])
|
92 |
+
|
93 |
+
# Update future_X for the next prediction
|
94 |
+
future_pred_expanded = np.array([[future_pred[0, 0], 0, 0]]) # Expand future_pred to match the 3 features
|
95 |
+
future_X = np.append(future_X[:, 1:, :], [future_pred_expanded], axis=1)
|
96 |
+
|
97 |
+
# Scale back future predictions
|
98 |
+
future_predictions = scaler.inverse_transform(
|
99 |
+
np.concatenate((np.array(future_predictions).reshape(-1, 1), np.zeros((forecast_period, 2))), axis=1))[:, 0]
|
100 |
+
|
101 |
+
# Generate dates for future predictions
|
102 |
+
last_date = data.index[-1]
|
103 |
+
future_dates = [last_date + timedelta(hours=i) for i in range(1, forecast_period + 1)]
|
104 |
+
future_predictions_df = pd.DataFrame({
|
105 |
+
'DateTime': future_dates,
|
106 |
+
'Predicted Demand': future_predictions
|
107 |
+
})
|
108 |
+
|
109 |
+
# Display evaluation metrics
|
110 |
+
st.subheader("Forecasting and Model Evaluation")
|
111 |
+
st.write(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
|
112 |
+
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
|
113 |
+
st.write(f"Model Accuracy: {accuracy:.2f}%")
|
114 |
+
|
115 |
+
# Plotting actual vs predicted
|
116 |
+
st.subheader("Actual vs Predicted Demand")
|
117 |
+
plt.figure(figsize=(14,5))
|
118 |
+
plt.plot(y_test_inv, color='blue', label='Actual Demand')
|
119 |
+
plt.plot(test_predict, color='orange', linestyle='--', label='Predicted Demand')
|
120 |
+
plt.legend()
|
121 |
+
plt.xlabel('Time')
|
122 |
+
plt.ylabel('Demand')
|
123 |
+
st.pyplot(plt)
|
124 |
+
|
125 |
+
# Display future predictions in a DataFrame
|
126 |
+
st.subheader("Future Predictions with Date and Time")
|
127 |
+
st.write(future_predictions_df)
|
128 |
+
|
129 |
+
# Plotting future predictions
|
130 |
+
st.subheader("Future Predictions Plot")
|
131 |
+
plt.figure(figsize=(14,5))
|
132 |
+
plt.plot(range(len(y_test_inv), len(y_test_inv) + forecast_period), future_predictions, color='green', linestyle='--', label='Future Prediction')
|
133 |
+
plt.legend()
|
134 |
+
plt.xlabel('Future Time')
|
135 |
+
plt.ylabel('Demand')
|
136 |
+
st.pyplot(plt)
|
nyc_energy_consumption.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|