File size: 4,010 Bytes
064a25d e4fbfab 064a25d e4fbfab 7edb958 e4fbfab 7edb958 e4fbfab 7edb958 e4fbfab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import streamlit as st
import hopsworks
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
# Constants
DATA_DIR = "data"
TIMESTAMP_FILE = "last_download_time.txt"
# Initialize Hopsworks connection
def connect_to_hopsworks():
st.write("Connecting to Hopsworks...")
project_name = "id2223AirQuality"
api_key = os.getenv("HOPSWORKS_API_KEY")
conn = hopsworks.connection(api_key_value=api_key)
project = conn.get_project(project_name)
return project
# Fetch data from Hopsworks feature group
def fetch_data_from_feature_group(project, feature_group_name, version):
feature_store = project.get_feature_store()
feature_group = feature_store.get_feature_group(name=feature_group_name, version=version)
data = feature_group.read()
return data
# Save data locally
def save_data_locally(data, filename):
os.makedirs(DATA_DIR, exist_ok=True)
filepath = os.path.join(DATA_DIR, filename)
data.to_csv(filepath, index=False)
# Save timestamp
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE)
with open(timestamp_path, "w") as f:
f.write(str(datetime.now()))
return filepath
# Load local data
def load_local_data(filename):
filepath = os.path.join(DATA_DIR, filename)
if os.path.exists(filepath):
return pd.read_csv(filepath)
else:
return None
# Check if local data is valid
def is_local_data_valid():
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE)
if not os.path.exists(timestamp_path):
return False
try:
with open(timestamp_path, "r") as f:
last_download_time = datetime.fromisoformat(f.read().strip())
# Check if the data is more than a day old
if datetime.now() - last_download_time > timedelta(days=1):
return False
return True
except Exception as e:
st.warning(f"Error reading timestamp: {e}")
return False
# Plot graphs
def plot_graphs(data):
st.write("### Data Preview")
st.dataframe(data.head())
#st.write("### Histogram")
#column = st.selectbox("Select column for histogram", data.columns)
#fig, ax = plt.subplots()
#sns.histplot(data[column], kde=True, ax=ax)
#st.pyplot(fig)
#st.write("### Correlation Matrix")
#fig, ax = plt.subplots()
#sns.heatmap(data.corr(), annot=True, cmap="coolwarm", ax=ax)
#st.pyplot(fig)
# Streamlit UI
def main():
st.title("Hopsworks Feature Group Explorer")
# Initialize session state
if "hopsworks_project" not in st.session_state:
st.session_state.hopsworks_project = None
if "data" not in st.session_state:
st.session_state.data = None
# User inputs for feature group and version
"""st.sidebar.title("Data Settings")
feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions")
version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1)
filename = st.sidebar.text_input("Local Filename", value="data.csv")
"""
# Check for valid local data
if is_local_data_valid():
st.write("Using cached local data.")
st.session_state.data = load_local_data(filename)
else:
# Fetch data if local data is invalid
if st.session_state.hopsworks_project is None:
st.write("Initializing Hopsworks connection...")
st.session_state.hopsworks_project = connect_to_hopsworks()
st.success("Connected to Hopsworks!")
project = st.session_state.hopsworks_project
data = fetch_data_from_feature_group(project, "predictions", 1)
print(data.head())
filepath = save_data_locally(data, "./data")
st.session_state.data = data
st.success(f"Data fetched and saved locally at {filepath}")
# Display data and graphs
if st.session_state.data is not None:
plot_graphs(st.session_state.data)
main()
|