import streamlit as st import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense, Dropout import matplotlib.pyplot as plt from datetime import timedelta # Load and preprocess data @st.cache_data def load_data(): data = pd.read_csv("nyc_energy_consumption.csv") data.columns = ['timeStamp', 'demand', 'precip', 'temp'] data['timeStamp'] = pd.to_datetime(data['timeStamp']) data.set_index('timeStamp', inplace=True) data = data.dropna() # Drop any missing values return data data = load_data() # Scale the data scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(data[['demand', 'precip', 'temp']]) # Create dataset function for LSTM def create_dataset(dataset, look_back=60): X, y = [], [] for i in range(look_back, len(dataset)): X.append(dataset[i-look_back:i]) y.append(dataset[i, 0]) # Predicting demand return np.array(X), np.array(y) # Set look-back period look_back = 60 X, y = create_dataset(scaled_data, look_back) # Split the dataset into train and test sets split_ratio = 0.8 split_index = int(len(X) * split_ratio) X_train, X_test = X[:split_index], X[split_index:] y_train, y_test = y[:split_index], y[split_index:] # Build and compile LSTM model model = Sequential([ LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])), Dropout(0.2), LSTM(units=50, return_sequences=False), Dropout(0.2), Dense(units=25), Dense(units=1) ]) model.compile(optimizer='adam', loss='mean_squared_error') # Train the model with validation history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test)) # Make predictions train_predict = model.predict(X_train) test_predict = model.predict(X_test) # Inverse transform predictions to original scale train_predict = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], 2))), axis=1))[:, 0] test_predict = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], 2))), axis=1))[:, 0] y_train_inv = scaler.inverse_transform(np.concatenate((y_train.reshape(-1, 1), np.zeros((y_train.shape[0], 2))), axis=1))[:, 0] y_test_inv = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))), axis=1))[:, 0] # Calculate error metrics rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict)) mape = mean_absolute_percentage_error(y_test_inv, test_predict) * 100 accuracy = 100 - mape # Streamlit App with filter for future prediction periods st.title("NYC Energy Consumption Forecasting with LSTM") st.subheader("Dataset Preview") st.write(data.head()) # Forecasting options st.subheader("Forecasting Options") forecast_period = st.slider("Select number of future hours to predict", min_value=1, max_value=365, value=30) # Future prediction future_X = scaled_data[-look_back:] future_X = np.reshape(future_X, (1, look_back, scaled_data.shape[1])) future_predictions = [] for _ in range(forecast_period): future_pred = model.predict(future_X) future_predictions.append(future_pred[0, 0]) # Update future_X for the next prediction future_pred_expanded = np.array([[future_pred[0, 0], 0, 0]]) # Expand future_pred to match the 3 features future_X = np.append(future_X[:, 1:, :], [future_pred_expanded], axis=1) # Scale back future predictions future_predictions = scaler.inverse_transform( np.concatenate((np.array(future_predictions).reshape(-1, 1), np.zeros((forecast_period, 2))), axis=1))[:, 0] # Generate dates for future predictions last_date = data.index[-1] future_dates = [last_date + timedelta(hours=i) for i in range(1, forecast_period + 1)] future_predictions_df = pd.DataFrame({ 'DateTime': future_dates, 'Predicted Demand': future_predictions }) # Display evaluation metrics st.subheader("Forecasting and Model Evaluation") st.write(f"Root Mean Squared Error (RMSE): {rmse:.2f}") st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%") st.write(f"Model Accuracy: {accuracy:.2f}%") # Plotting actual vs predicted st.subheader("Actual vs Predicted Demand") plt.figure(figsize=(14,5)) plt.plot(y_test_inv, color='blue', label='Actual Demand') plt.plot(test_predict, color='orange', linestyle='--', label='Predicted Demand') plt.legend() plt.xlabel('Time') plt.ylabel('Demand') st.pyplot(plt) # Display future predictions in a DataFrame st.subheader("Future Predictions with Date and Time") st.write(future_predictions_df) # Plotting future predictions st.subheader("Future Predictions Plot") plt.figure(figsize=(14,5)) plt.plot(range(len(y_test_inv), len(y_test_inv) + forecast_period), future_predictions, color='green', linestyle='--', label='Future Prediction') plt.legend() plt.xlabel('Future Time') plt.ylabel('Demand') st.pyplot(plt)