File size: 5,166 Bytes
b89b1ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
from datetime import timedelta

# Load and preprocess data
@st.cache_data
def load_data():
    data = pd.read_csv("nyc_energy_consumption.csv")
    data.columns = ['timeStamp', 'demand', 'precip', 'temp']
    data['timeStamp'] = pd.to_datetime(data['timeStamp'])
    data.set_index('timeStamp', inplace=True)
    data = data.dropna()  # Drop any missing values
    return data

data = load_data()

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['demand', 'precip', 'temp']])

# Create dataset function for LSTM
def create_dataset(dataset, look_back=60):
    X, y = [], []
    for i in range(look_back, len(dataset)):
        X.append(dataset[i-look_back:i])
        y.append(dataset[i, 0])  # Predicting demand
    return np.array(X), np.array(y)

# Set look-back period
look_back = 60
X, y = create_dataset(scaled_data, look_back)

# Split the dataset into train and test sets
split_ratio = 0.8
split_index = int(len(X) * split_ratio)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Build and compile LSTM model
model = Sequential([
    LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(units=50, return_sequences=False),
    Dropout(0.2),
    Dense(units=25),
    Dense(units=1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model with validation
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))

# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform predictions to original scale
train_predict = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], 2))), axis=1))[:, 0]
test_predict = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], 2))), axis=1))[:, 0]
y_train_inv = scaler.inverse_transform(np.concatenate((y_train.reshape(-1, 1), np.zeros((y_train.shape[0], 2))), axis=1))[:, 0]
y_test_inv = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))), axis=1))[:, 0]

# Calculate error metrics
rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict))
mape = mean_absolute_percentage_error(y_test_inv, test_predict) * 100
accuracy = 100 - mape

# Streamlit App with filter for future prediction periods
st.title("NYC Energy Consumption Forecasting with LSTM")
st.subheader("Dataset Preview")
st.write(data.head())

# Forecasting options
st.subheader("Forecasting Options")
forecast_period = st.slider("Select number of future hours to predict", min_value=1, max_value=365, value=30)

# Future prediction
future_X = scaled_data[-look_back:]
future_X = np.reshape(future_X, (1, look_back, scaled_data.shape[1]))

future_predictions = []
for _ in range(forecast_period):
    future_pred = model.predict(future_X)
    future_predictions.append(future_pred[0, 0])
    
    # Update future_X for the next prediction
    future_pred_expanded = np.array([[future_pred[0, 0], 0, 0]])  # Expand future_pred to match the 3 features
    future_X = np.append(future_X[:, 1:, :], [future_pred_expanded], axis=1)

# Scale back future predictions
future_predictions = scaler.inverse_transform(
    np.concatenate((np.array(future_predictions).reshape(-1, 1), np.zeros((forecast_period, 2))), axis=1))[:, 0]

# Generate dates for future predictions
last_date = data.index[-1]
future_dates = [last_date + timedelta(hours=i) for i in range(1, forecast_period + 1)]
future_predictions_df = pd.DataFrame({
    'DateTime': future_dates,
    'Predicted Demand': future_predictions
})

# Display evaluation metrics
st.subheader("Forecasting and Model Evaluation")
st.write(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
st.write(f"Model Accuracy: {accuracy:.2f}%")

# Plotting actual vs predicted
st.subheader("Actual vs Predicted Demand")
plt.figure(figsize=(14,5))
plt.plot(y_test_inv, color='blue', label='Actual Demand')
plt.plot(test_predict, color='orange', linestyle='--', label='Predicted Demand')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Demand')
st.pyplot(plt)

# Display future predictions in a DataFrame
st.subheader("Future Predictions with Date and Time")
st.write(future_predictions_df)

# Plotting future predictions
st.subheader("Future Predictions Plot")
plt.figure(figsize=(14,5))
plt.plot(range(len(y_test_inv), len(y_test_inv) + forecast_period), future_predictions, color='green', linestyle='--', label='Future Prediction')
plt.legend()
plt.xlabel('Future Time')
plt.ylabel('Demand')
st.pyplot(plt)