elli-teu
Merge branch 'main' of https://huggingface.co/spaces/WheelyFunTimesTeam/WheelyFunTimes
27699f5
import streamlit as st | |
import hopsworks | |
import pandas as pd | |
import os | |
import time | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from datetime import datetime, timedelta | |
import altair as alt | |
import api | |
# Constants | |
DATA_DIR = "data" | |
TIMESTAMP_FILE = "last_download_time.txt" | |
# Initialize Hopsworks connection | |
def connect_to_hopsworks(): | |
st.write("Connecting to Hopsworks...") | |
project_name = "id2223AirQuality" | |
HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY") | |
print(f"HOPSWORKS_API_KEY {HOPSWORKS_API_KEY}") | |
project = hopsworks.login(project="id2223AirQuality", api_key_value = os.getenv('HOPSWORKS_API_KEY')) | |
return project | |
# Fetch data from Hopsworks feature group | |
def fetch_data_from_feature_group(project, feature_group_name, version): | |
feature_store = project.get_feature_store() | |
feature_group = feature_store.get_feature_group(name=feature_group_name, version=version) | |
data = feature_group.read() | |
return data | |
# Save data locally | |
def save_data_locally(data, filename): | |
os.makedirs(DATA_DIR, exist_ok=True) | |
filepath = os.path.join(DATA_DIR, filename) | |
data.to_csv(filepath, index=False) | |
# Save timestamp | |
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) | |
with open(timestamp_path, "w") as f: | |
f.write(str(datetime.now())) | |
return filepath | |
# Load local data | |
def load_local_data(filename): | |
filepath = os.path.join(DATA_DIR, filename) | |
if os.path.exists(filepath): | |
return pd.read_csv(filepath) | |
else: | |
return None | |
# Check if local data is valid | |
def is_local_data_valid(): | |
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) | |
if not os.path.exists(timestamp_path): | |
return False | |
try: | |
with open(timestamp_path, "r") as f: | |
last_download_time = datetime.fromisoformat(f.read().strip()) | |
# Check if the data is more than a day old | |
if datetime.now() - last_download_time > timedelta(days=1): | |
return False | |
return True | |
except Exception as e: | |
st.warning(f"Error reading timestamp: {e}") | |
return False | |
def get_buses(): | |
bus_df = st.session_state.data[["trip_id", "route_long_name", "route_short_name"]] | |
bus_df = bus_df.drop_duplicates() | |
bus_list = bus_df[["route_long_name", "route_short_name"]] | |
bus_list = bus_list.drop_duplicates() | |
short_bus_list = list(pd.unique(bus_df["route_short_name"])) | |
return bus_df, bus_list, short_bus_list | |
print(data["trip_id"].nunique()) | |
result = [] | |
data["datetime"] = pd.to_datetime(data["datetime"]) | |
for _, group in data.groupby(['route_id', 'stop_name']): | |
# Initialize a list to store rows that are not duplicates | |
filtered_rows = [] | |
last_row = None | |
for idx, row in group.iterrows(): | |
if last_row is None or (row['datetime'] - last_row['datetime'] > pd.Timedelta(minutes = 3)): | |
# Keep the row if it's the first or sufficiently far apart in time | |
filtered_rows.append(row) | |
last_row = row | |
# Add filtered rows to the result | |
result.extend(filtered_rows) | |
filtered_df = pd.DataFrame(result) | |
# Return the filtered dataframe | |
print(filtered_df["trip_id"].nunique()) | |
return filtered_df | |
def plot_graph_title(plot_df, stop, time): | |
#Nu vill vi plotta! | |
#TODO ska den bara visa de stopp man vill eller alla? | |
categories = {0 : 'Empty', | |
1: 'Many seats available', | |
2:'Few seats available', | |
3:'Standing room only', | |
4:'Crushed standing room', | |
5: 'Full'} | |
plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]] | |
plot_df = plot_df.sort_values("datetime") | |
#plot_df = plot_df.set_index("datetime") | |
plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories) | |
# Explicitly set the order for Y_category | |
category_order = list(categories.values()) # ['Empty', 'Many seats available', ..., 'Full'] | |
category_order.reverse() | |
#st.line_chart(plot_df) | |
# Create the Altair chart | |
chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode( | |
x=alt.X('stop_name:N', title="Stop name", sort=plot_df["stop_name"]), # Use column name as string | |
y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)), # Treat Y as categorical | |
tooltip=["datetime", 'stop_name', 'Occupancy'] # Add tooltips for interactivity | |
).properties( | |
title=f"Vehicle Occupancy For Bus arriving at {stop} at {time}" | |
) | |
st.altair_chart(chart, use_container_width=True) | |
def visualize(filtered_data, stop_name): | |
import folium | |
from streamlit_folium import st_folium | |
categories = {0 : 'Empty', | |
1: 'Many seats available', | |
2:'Few seats available', | |
3:'Standing room only', | |
4:'Crushed standing room', | |
5: 'Full'} | |
legend_html = ''' | |
<style> | |
.legend-box { | |
position: relative; | |
width: 250px; | |
background-color: white; | |
padding: 10px; | |
border: 2px solid grey; | |
border-radius: 5px; | |
font-size: 14px; | |
} | |
.legend-box div { | |
margin-bottom: 5px; | |
} | |
.legend-box i { | |
width: 20px; | |
height: 10px; | |
display: inline-block; | |
margin-right: 5px; | |
} | |
</style> | |
<div class="legend-box"> | |
<b>Occupancy Legend:</b><br> | |
<div><i style="background: green;"></i>Empty</div> | |
<div><i style="background: blue;"></i>Many seats available</div> | |
<div><i style="background: purple;"></i>Few seats available</div> | |
<div><i style="background: pink;"></i>Standing room only</div> | |
<div><i style="background: orange;"></i>Crushed standing room</div> | |
<div><i style="background: red;"></i>Full</div> | |
<div><i style="background: black;"></i>Your stop</div> | |
</div> | |
''' | |
#st.markdown(legend_html, unsafe_allow_html=True) | |
# Create a folium map centered around a location | |
m = folium.Map(location=[filtered_data.iloc[0]["stop_lat"], filtered_data.iloc[0]["stop_lon"]], zoom_start=12) | |
sw = filtered_data[['stop_lat', 'stop_lon']].min().values.tolist() | |
ne = filtered_data[['stop_lat', 'stop_lon']].max().values.tolist() | |
m.fit_bounds([sw, ne]) | |
# Add bus stop markers based on filtered data | |
for idx, row in filtered_data.iterrows(): | |
if row["stop_name"] == stop_name: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="black", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
elif row['vehicle_occupancystatus'] == 0: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="green", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
elif row['vehicle_occupancystatus'] == 1: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="blue", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
elif row['vehicle_occupancystatus'] == 2: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="purple", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
elif row['vehicle_occupancystatus'] == 3: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="pink", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
elif row['vehicle_occupancystatus'] == 4: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="orange", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
elif row['vehicle_occupancystatus'] == 5: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(color="red", icon="bus-simple", prefix="fa") | |
).add_to(m) | |
else: | |
folium.Marker( | |
[row['stop_lat'], row['stop_lon']], | |
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", | |
icon = folium.Icon(icon="bus-simple", prefix="fa") | |
).add_to(m) | |
# Layout: Split screen into two columns | |
col1, col2 = st.columns([3, 1]) # Adjust proportions (3:1) as needed | |
# Display Folium map in the first column | |
with col1: | |
st_folium(m, width=700, height=500) | |
# Display Legend in the second column | |
with col2: | |
st.markdown(legend_html, unsafe_allow_html=True) | |
# Display the map | |
#st_folium(m, width=700, height=500) | |
def drop_the_duplicates(df): | |
df = df.drop_duplicates("datetime") | |
df["previous"] = df["datetime"].shift(1) | |
df = df[((df["datetime"] - df["previous"]) > timedelta(minutes=3)) | (df["previous"].isna())] | |
#df = df.drop_duplicates(["trip_id", "stop_name"]) | |
return df | |
# Streamlit UI | |
def main(): | |
st.title("Wheely Fun Times - Bus Occupancy Explorer") | |
# Initialize session state | |
if "hopsworks_project" not in st.session_state: | |
st.session_state.hopsworks_project = None | |
if "data" not in st.session_state: | |
st.session_state.data = None | |
# User inputs for feature group and version | |
#st.sidebar.title("Data Settings") | |
#feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions") | |
#version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1) | |
#filename = st.sidebar.text_input("Local Filename", value="data.csv") | |
# Check for valid local data | |
if is_local_data_valid(): | |
st.write("Using cached local data.") | |
st.session_state.data = load_local_data("data.csv") | |
if "first" not in st.session_state: | |
st.session_state.first = True | |
#st.session_state.data = remove_near_duplicates(st.session_state.data) | |
else: | |
# Fetch data if local data is invalid | |
if st.session_state.hopsworks_project is None: | |
st.write("Initializing Hopsworks connection...") | |
st.session_state.hopsworks_project = connect_to_hopsworks() | |
st.success("Connected to Hopsworks!") | |
project = st.session_state.hopsworks_project | |
data = fetch_data_from_feature_group(project, "predictions", 1) | |
#print(data.head()) | |
filepath = save_data_locally(data, "data.csv") | |
st.session_state.data = data | |
st.success(f"Data fetched and saved locally at {filepath}") | |
buses_df, bus_list, short_bus = get_buses() | |
short_bus = sorted(short_bus) | |
# Sidebar section for searching buses | |
st.sidebar.title("Search for your desired bus") | |
# Create a multiselect dropdown in the sidebar | |
search = st.sidebar.selectbox( | |
"Search for your bus number:", | |
options=short_bus, | |
help="Select one bus to view details." | |
) | |
# Display the results | |
if search: | |
route = bus_list[bus_list["route_short_name"]==search] | |
long_names = list(pd.unique(route["route_long_name"])) | |
if len(long_names)==1: | |
bus = long_names[0] | |
st.write(f"### Selected Bus: {search} {bus}") | |
else: | |
bus = st.sidebar.selectbox( | |
"Pick bus route:", | |
options=long_names, | |
help="Select one bus to view details." | |
) | |
st.write(f"### Selected Bus: {search} {bus}") | |
# Streamlit checkbox to toggle bus direction | |
if "direction" not in st.session_state: | |
st.session_state.direction = False | |
# Streamlit button to toggle bus direction | |
if st.sidebar.button('Change Direction'): | |
# Toggle between 'North' and 'South' | |
st.session_state.direction = not st.session_state.direction | |
print(st.session_state.direction) | |
#Plocka alla aktuella trip_ids från buses | |
trips = buses_df[buses_df["route_long_name"]==bus] | |
bus_trips = st.session_state.data[st.session_state.data["route_long_name"]==bus] | |
bus_trips["datetime"] = pd.to_datetime(bus_trips["datetime"]) | |
bus_trips["datetime"] = bus_trips["datetime"].dt.tz_convert(None) | |
stops = list(pd.unique(bus_trips["stop_name"])) | |
stop_choice = st.sidebar.selectbox( | |
"Select your bus stop:", | |
options=stops, | |
help="Select one bus stop to se occupancy." | |
) | |
#direction = st.sidebar.checkbox('Direction of bus', value=True) | |
today = datetime.now() | |
tomorrow = today + timedelta(days=1) | |
today = today.date() | |
tomorrow = tomorrow.date() | |
date_options = { | |
today.strftime("%d %B %Y") : today, | |
tomorrow.strftime("%d %B %Y") : tomorrow | |
} | |
day_choice = st.sidebar.radio("Select the day:", options=list(date_options.keys())) | |
# Add time input widgets in the sidebar | |
start_time = st.sidebar.time_input("Select a start time", value=None) | |
end_time = st.sidebar.time_input("Select an end time", value=None) | |
print(f"start time {type(start_time)}") | |
print(f"end time {type(end_time)}") | |
print(f"day {type(day_choice)}") | |
if start_time != None and end_time != None: | |
#TODO hur filtrera på tid? | |
st.write(f"Displaying buses between {start_time.strftime('%H:%M')} and {end_time.strftime('%H:%M')} the {day_choice}") | |
selected_trips = bus_trips[(bus_trips["datetime"] >= datetime.combine(date_options[day_choice], start_time)) | |
& (bus_trips["datetime"] <= datetime.combine(date_options[day_choice], end_time)) | |
& (bus_trips["direction_id"] == st.session_state.direction ) | |
& (bus_trips["stop_name"] == stop_choice)] | |
trip_ids = list(pd.unique(selected_trips["trip_id"])) | |
#st.write(f"{len(trip_ids)} buses available") | |
chioce = selected_trips[selected_trips["stop_name"]==stop_choice] | |
#chioce.head() | |
#TODO ta bort stop_name:) | |
chioce = chioce[["trip_id", "stop_name", "datetime"]] | |
#Ev lägga stop_chioce i session_state | |
chioce = chioce.sort_values(by=["datetime"]) | |
chioce = drop_the_duplicates(chioce) | |
st.write(f"{chioce['trip_id'].nunique()} buses available") | |
for idx, row in chioce.iterrows(): | |
#st.write(f"The bus arrives at {row['stop_name']} at {row['datetime'].strftime('%H:%M')}") | |
plot_graph_title(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], row["stop_name"], row['datetime'].strftime('%H:%M')) | |
visualize(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], stop_choice) | |
else: | |
st.write("No buses selected. Please search in the sidebar.") | |
# Display data and graphs | |
#if st.session_state.data is not None: | |
#plot_graphs(st.session_state.data) | |
main() | |
# Visa alla busslinjer? Söka? | |
# Hur se riktning? | |
# Filtrera på busslinje och riktning | |
# Filtrera på tid | |
# Ska användaren ange tid | |
# Se alla unika trip ids | |
# Mappa position till stop | |
# Visa någon sorts graf för alla bussar inom den tiden | |
# Ska det vara för alla stopp eller bara de som användaren angivit att den ska åka |