File size: 5,166 Bytes
b89b1ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
from datetime import timedelta
# Load and preprocess data
@st.cache_data
def load_data():
data = pd.read_csv("nyc_energy_consumption.csv")
data.columns = ['timeStamp', 'demand', 'precip', 'temp']
data['timeStamp'] = pd.to_datetime(data['timeStamp'])
data.set_index('timeStamp', inplace=True)
data = data.dropna() # Drop any missing values
return data
data = load_data()
# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['demand', 'precip', 'temp']])
# Create dataset function for LSTM
def create_dataset(dataset, look_back=60):
X, y = [], []
for i in range(look_back, len(dataset)):
X.append(dataset[i-look_back:i])
y.append(dataset[i, 0]) # Predicting demand
return np.array(X), np.array(y)
# Set look-back period
look_back = 60
X, y = create_dataset(scaled_data, look_back)
# Split the dataset into train and test sets
split_ratio = 0.8
split_index = int(len(X) * split_ratio)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
# Build and compile LSTM model
model = Sequential([
LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
Dropout(0.2),
LSTM(units=50, return_sequences=False),
Dropout(0.2),
Dense(units=25),
Dense(units=1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model with validation
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))
# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)
# Inverse transform predictions to original scale
train_predict = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], 2))), axis=1))[:, 0]
test_predict = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], 2))), axis=1))[:, 0]
y_train_inv = scaler.inverse_transform(np.concatenate((y_train.reshape(-1, 1), np.zeros((y_train.shape[0], 2))), axis=1))[:, 0]
y_test_inv = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))), axis=1))[:, 0]
# Calculate error metrics
rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict))
mape = mean_absolute_percentage_error(y_test_inv, test_predict) * 100
accuracy = 100 - mape
# Streamlit App with filter for future prediction periods
st.title("NYC Energy Consumption Forecasting with LSTM")
st.subheader("Dataset Preview")
st.write(data.head())
# Forecasting options
st.subheader("Forecasting Options")
forecast_period = st.slider("Select number of future hours to predict", min_value=1, max_value=365, value=30)
# Future prediction
future_X = scaled_data[-look_back:]
future_X = np.reshape(future_X, (1, look_back, scaled_data.shape[1]))
future_predictions = []
for _ in range(forecast_period):
future_pred = model.predict(future_X)
future_predictions.append(future_pred[0, 0])
# Update future_X for the next prediction
future_pred_expanded = np.array([[future_pred[0, 0], 0, 0]]) # Expand future_pred to match the 3 features
future_X = np.append(future_X[:, 1:, :], [future_pred_expanded], axis=1)
# Scale back future predictions
future_predictions = scaler.inverse_transform(
np.concatenate((np.array(future_predictions).reshape(-1, 1), np.zeros((forecast_period, 2))), axis=1))[:, 0]
# Generate dates for future predictions
last_date = data.index[-1]
future_dates = [last_date + timedelta(hours=i) for i in range(1, forecast_period + 1)]
future_predictions_df = pd.DataFrame({
'DateTime': future_dates,
'Predicted Demand': future_predictions
})
# Display evaluation metrics
st.subheader("Forecasting and Model Evaluation")
st.write(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
st.write(f"Model Accuracy: {accuracy:.2f}%")
# Plotting actual vs predicted
st.subheader("Actual vs Predicted Demand")
plt.figure(figsize=(14,5))
plt.plot(y_test_inv, color='blue', label='Actual Demand')
plt.plot(test_predict, color='orange', linestyle='--', label='Predicted Demand')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Demand')
st.pyplot(plt)
# Display future predictions in a DataFrame
st.subheader("Future Predictions with Date and Time")
st.write(future_predictions_df)
# Plotting future predictions
st.subheader("Future Predictions Plot")
plt.figure(figsize=(14,5))
plt.plot(range(len(y_test_inv), len(y_test_inv) + forecast_period), future_predictions, color='green', linestyle='--', label='Future Prediction')
plt.legend()
plt.xlabel('Future Time')
plt.ylabel('Demand')
st.pyplot(plt)
|