Rahul-Crudcook commited on
Commit
b89b1ca
·
verified ·
1 Parent(s): 48db7e7

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +136 -0
  2. nyc_energy_consumption.csv +0 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import MinMaxScaler
5
+ from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import LSTM, Dense, Dropout
8
+ import matplotlib.pyplot as plt
9
+ from datetime import timedelta
10
+
11
+ # Load and preprocess data
12
+ @st.cache_data
13
+ def load_data():
14
+ data = pd.read_csv("nyc_energy_consumption.csv")
15
+ data.columns = ['timeStamp', 'demand', 'precip', 'temp']
16
+ data['timeStamp'] = pd.to_datetime(data['timeStamp'])
17
+ data.set_index('timeStamp', inplace=True)
18
+ data = data.dropna() # Drop any missing values
19
+ return data
20
+
21
+ data = load_data()
22
+
23
+ # Scale the data
24
+ scaler = MinMaxScaler(feature_range=(0, 1))
25
+ scaled_data = scaler.fit_transform(data[['demand', 'precip', 'temp']])
26
+
27
+ # Create dataset function for LSTM
28
+ def create_dataset(dataset, look_back=60):
29
+ X, y = [], []
30
+ for i in range(look_back, len(dataset)):
31
+ X.append(dataset[i-look_back:i])
32
+ y.append(dataset[i, 0]) # Predicting demand
33
+ return np.array(X), np.array(y)
34
+
35
+ # Set look-back period
36
+ look_back = 60
37
+ X, y = create_dataset(scaled_data, look_back)
38
+
39
+ # Split the dataset into train and test sets
40
+ split_ratio = 0.8
41
+ split_index = int(len(X) * split_ratio)
42
+ X_train, X_test = X[:split_index], X[split_index:]
43
+ y_train, y_test = y[:split_index], y[split_index:]
44
+
45
+ # Build and compile LSTM model
46
+ model = Sequential([
47
+ LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
48
+ Dropout(0.2),
49
+ LSTM(units=50, return_sequences=False),
50
+ Dropout(0.2),
51
+ Dense(units=25),
52
+ Dense(units=1)
53
+ ])
54
+
55
+ model.compile(optimizer='adam', loss='mean_squared_error')
56
+
57
+ # Train the model with validation
58
+ history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))
59
+
60
+ # Make predictions
61
+ train_predict = model.predict(X_train)
62
+ test_predict = model.predict(X_test)
63
+
64
+ # Inverse transform predictions to original scale
65
+ train_predict = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], 2))), axis=1))[:, 0]
66
+ test_predict = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], 2))), axis=1))[:, 0]
67
+ y_train_inv = scaler.inverse_transform(np.concatenate((y_train.reshape(-1, 1), np.zeros((y_train.shape[0], 2))), axis=1))[:, 0]
68
+ y_test_inv = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))), axis=1))[:, 0]
69
+
70
+ # Calculate error metrics
71
+ rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict))
72
+ mape = mean_absolute_percentage_error(y_test_inv, test_predict) * 100
73
+ accuracy = 100 - mape
74
+
75
+ # Streamlit App with filter for future prediction periods
76
+ st.title("NYC Energy Consumption Forecasting with LSTM")
77
+ st.subheader("Dataset Preview")
78
+ st.write(data.head())
79
+
80
+ # Forecasting options
81
+ st.subheader("Forecasting Options")
82
+ forecast_period = st.slider("Select number of future hours to predict", min_value=1, max_value=365, value=30)
83
+
84
+ # Future prediction
85
+ future_X = scaled_data[-look_back:]
86
+ future_X = np.reshape(future_X, (1, look_back, scaled_data.shape[1]))
87
+
88
+ future_predictions = []
89
+ for _ in range(forecast_period):
90
+ future_pred = model.predict(future_X)
91
+ future_predictions.append(future_pred[0, 0])
92
+
93
+ # Update future_X for the next prediction
94
+ future_pred_expanded = np.array([[future_pred[0, 0], 0, 0]]) # Expand future_pred to match the 3 features
95
+ future_X = np.append(future_X[:, 1:, :], [future_pred_expanded], axis=1)
96
+
97
+ # Scale back future predictions
98
+ future_predictions = scaler.inverse_transform(
99
+ np.concatenate((np.array(future_predictions).reshape(-1, 1), np.zeros((forecast_period, 2))), axis=1))[:, 0]
100
+
101
+ # Generate dates for future predictions
102
+ last_date = data.index[-1]
103
+ future_dates = [last_date + timedelta(hours=i) for i in range(1, forecast_period + 1)]
104
+ future_predictions_df = pd.DataFrame({
105
+ 'DateTime': future_dates,
106
+ 'Predicted Demand': future_predictions
107
+ })
108
+
109
+ # Display evaluation metrics
110
+ st.subheader("Forecasting and Model Evaluation")
111
+ st.write(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
112
+ st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
113
+ st.write(f"Model Accuracy: {accuracy:.2f}%")
114
+
115
+ # Plotting actual vs predicted
116
+ st.subheader("Actual vs Predicted Demand")
117
+ plt.figure(figsize=(14,5))
118
+ plt.plot(y_test_inv, color='blue', label='Actual Demand')
119
+ plt.plot(test_predict, color='orange', linestyle='--', label='Predicted Demand')
120
+ plt.legend()
121
+ plt.xlabel('Time')
122
+ plt.ylabel('Demand')
123
+ st.pyplot(plt)
124
+
125
+ # Display future predictions in a DataFrame
126
+ st.subheader("Future Predictions with Date and Time")
127
+ st.write(future_predictions_df)
128
+
129
+ # Plotting future predictions
130
+ st.subheader("Future Predictions Plot")
131
+ plt.figure(figsize=(14,5))
132
+ plt.plot(range(len(y_test_inv), len(y_test_inv) + forecast_period), future_predictions, color='green', linestyle='--', label='Future Prediction')
133
+ plt.legend()
134
+ plt.xlabel('Future Time')
135
+ plt.ylabel('Demand')
136
+ st.pyplot(plt)
nyc_energy_consumption.csv ADDED
The diff for this file is too large to render. See raw diff