Spaces:
Sleeping
Sleeping
import streamlit as st | |
import hopsworks | |
import pandas as pd | |
import os | |
import time | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from datetime import datetime, timedelta | |
# Constants | |
DATA_DIR = "data" | |
TIMESTAMP_FILE = "last_download_time.txt" | |
# Initialize Hopsworks connection | |
def connect_to_hopsworks(): | |
st.write("Connecting to Hopsworks...") | |
project_name = "id2223AirQuality" | |
api_key = os.getenv("HOPSWORKS_API_KEY") | |
conn = hopsworks.connection(api_key_value=api_key) | |
project = conn.get_project(project_name) | |
return project | |
# Fetch data from Hopsworks feature group | |
def fetch_data_from_feature_group(project, feature_group_name, version): | |
feature_store = project.get_feature_store() | |
feature_group = feature_store.get_feature_group(name=feature_group_name, version=version) | |
data = feature_group.read() | |
return data | |
# Save data locally | |
def save_data_locally(data, filename): | |
os.makedirs(DATA_DIR, exist_ok=True) | |
filepath = os.path.join(DATA_DIR, filename) | |
data.to_csv(filepath, index=False) | |
# Save timestamp | |
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) | |
with open(timestamp_path, "w") as f: | |
f.write(str(datetime.now())) | |
return filepath | |
# Load local data | |
def load_local_data(filename): | |
filepath = os.path.join(DATA_DIR, filename) | |
if os.path.exists(filepath): | |
return pd.read_csv(filepath) | |
else: | |
return None | |
# Check if local data is valid | |
def is_local_data_valid(): | |
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) | |
if not os.path.exists(timestamp_path): | |
return False | |
try: | |
with open(timestamp_path, "r") as f: | |
last_download_time = datetime.fromisoformat(f.read().strip()) | |
# Check if the data is more than a day old | |
if datetime.now() - last_download_time > timedelta(days=1): | |
return False | |
return True | |
except Exception as e: | |
st.warning(f"Error reading timestamp: {e}") | |
return False | |
# Plot graphs | |
def plot_graphs(data): | |
st.write("### Data Preview") | |
st.dataframe(data.head()) | |
#st.write("### Histogram") | |
#column = st.selectbox("Select column for histogram", data.columns) | |
#fig, ax = plt.subplots() | |
#sns.histplot(data[column], kde=True, ax=ax) | |
#st.pyplot(fig) | |
#st.write("### Correlation Matrix") | |
#fig, ax = plt.subplots() | |
#sns.heatmap(data.corr(), annot=True, cmap="coolwarm", ax=ax) | |
#st.pyplot(fig) | |
# Streamlit UI | |
def main(): | |
st.title("Hopsworks Feature Group Explorer") | |
# Initialize session state | |
if "hopsworks_project" not in st.session_state: | |
st.session_state.hopsworks_project = None | |
if "data" not in st.session_state: | |
st.session_state.data = None | |
# User inputs for feature group and version | |
"""st.sidebar.title("Data Settings") | |
feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions") | |
version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1) | |
filename = st.sidebar.text_input("Local Filename", value="data.csv") | |
""" | |
# Check for valid local data | |
if is_local_data_valid(): | |
st.write("Using cached local data.") | |
st.session_state.data = load_local_data(filename) | |
else: | |
# Fetch data if local data is invalid | |
if st.session_state.hopsworks_project is None: | |
st.write("Initializing Hopsworks connection...") | |
st.session_state.hopsworks_project = connect_to_hopsworks() | |
st.success("Connected to Hopsworks!") | |
project = st.session_state.hopsworks_project | |
data = fetch_data_from_feature_group(project, "predictions", 1) | |
print(data.head()) | |
filepath = save_data_locally(data, "./data") | |
st.session_state.data = data | |
st.success(f"Data fetched and saved locally at {filepath}") | |
# Display data and graphs | |
if st.session_state.data is not None: | |
plot_graphs(st.session_state.data) | |
main() | |