import streamlit as st import hopsworks import pandas as pd import os import time import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime, timedelta import altair as alt import api # Constants DATA_DIR = "data" TIMESTAMP_FILE = "last_download_time.txt" # Initialize Hopsworks connection def connect_to_hopsworks(): st.write("Connecting to Hopsworks...") project_name = "id2223AirQuality" HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY") print(f"HOPSWORKS_API_KEY {HOPSWORKS_API_KEY}") project = hopsworks.login(project="id2223AirQuality", api_key_value = os.getenv('HOPSWORKS_API_KEY')) return project # Fetch data from Hopsworks feature group def fetch_data_from_feature_group(project, feature_group_name, version): feature_store = project.get_feature_store() feature_group = feature_store.get_feature_group(name=feature_group_name, version=version) data = feature_group.read() return data # Save data locally def save_data_locally(data, filename): os.makedirs(DATA_DIR, exist_ok=True) filepath = os.path.join(DATA_DIR, filename) data.to_csv(filepath, index=False) # Save timestamp timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) with open(timestamp_path, "w") as f: f.write(str(datetime.now())) return filepath # Load local data def load_local_data(filename): filepath = os.path.join(DATA_DIR, filename) if os.path.exists(filepath): return pd.read_csv(filepath) else: return None # Check if local data is valid def is_local_data_valid(): timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) if not os.path.exists(timestamp_path): return False try: with open(timestamp_path, "r") as f: last_download_time = datetime.fromisoformat(f.read().strip()) # Check if the data is more than a day old if datetime.now() - last_download_time > timedelta(days=1): return False return True except Exception as e: st.warning(f"Error reading timestamp: {e}") return False def get_buses(): bus_df = st.session_state.data[["trip_id", "route_long_name", "route_short_name"]] bus_df = bus_df.drop_duplicates() bus_list = bus_df[["route_long_name", "route_short_name"]] bus_list = bus_list.drop_duplicates() short_bus_list = list(pd.unique(bus_df["route_short_name"])) return bus_df, bus_list, short_bus_list print(data["trip_id"].nunique()) result = [] data["datetime"] = pd.to_datetime(data["datetime"]) for _, group in data.groupby(['route_id', 'stop_name']): # Initialize a list to store rows that are not duplicates filtered_rows = [] last_row = None for idx, row in group.iterrows(): if last_row is None or (row['datetime'] - last_row['datetime'] > pd.Timedelta(minutes = 3)): # Keep the row if it's the first or sufficiently far apart in time filtered_rows.append(row) last_row = row # Add filtered rows to the result result.extend(filtered_rows) filtered_df = pd.DataFrame(result) # Return the filtered dataframe print(filtered_df["trip_id"].nunique()) return filtered_df def plot_graph_title(plot_df, stop, time): #Nu vill vi plotta! #TODO ska den bara visa de stopp man vill eller alla? categories = {0 : 'Empty', 1: 'Many seats available', 2:'Few seats available', 3:'Standing room only', 4:'Crushed standing room', 5: 'Full'} plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]] plot_df = plot_df.sort_values("datetime") #plot_df = plot_df.set_index("datetime") plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories) # Explicitly set the order for Y_category category_order = list(categories.values()) # ['Empty', 'Many seats available', ..., 'Full'] category_order.reverse() #st.line_chart(plot_df) # Create the Altair chart chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode( x=alt.X('stop_name:N', title="Stop name", sort=plot_df["stop_name"]), # Use column name as string y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)), # Treat Y as categorical tooltip=["datetime", 'stop_name', 'Occupancy'] # Add tooltips for interactivity ).properties( title=f"Vehicle Occupancy For Bus arriving at {stop} at {time}" ) st.altair_chart(chart, use_container_width=True) def visualize(filtered_data, stop_name): import folium from streamlit_folium import st_folium categories = {0 : 'Empty', 1: 'Many seats available', 2:'Few seats available', 3:'Standing room only', 4:'Crushed standing room', 5: 'Full'} legend_html = '''
Occupancy Legend:
Empty
Many seats available
Few seats available
Standing room only
Crushed standing room
Full
Your stop
''' #st.markdown(legend_html, unsafe_allow_html=True) # Create a folium map centered around a location m = folium.Map(location=[filtered_data.iloc[0]["stop_lat"], filtered_data.iloc[0]["stop_lon"]], zoom_start=12) sw = filtered_data[['stop_lat', 'stop_lon']].min().values.tolist() ne = filtered_data[['stop_lat', 'stop_lon']].max().values.tolist() m.fit_bounds([sw, ne]) # Add bus stop markers based on filtered data for idx, row in filtered_data.iterrows(): if row["stop_name"] == stop_name: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="black", icon="bus-simple", prefix="fa") ).add_to(m) elif row['vehicle_occupancystatus'] == 0: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="green", icon="bus-simple", prefix="fa") ).add_to(m) elif row['vehicle_occupancystatus'] == 1: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="blue", icon="bus-simple", prefix="fa") ).add_to(m) elif row['vehicle_occupancystatus'] == 2: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="purple", icon="bus-simple", prefix="fa") ).add_to(m) elif row['vehicle_occupancystatus'] == 3: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="pink", icon="bus-simple", prefix="fa") ).add_to(m) elif row['vehicle_occupancystatus'] == 4: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="orange", icon="bus-simple", prefix="fa") ).add_to(m) elif row['vehicle_occupancystatus'] == 5: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(color="red", icon="bus-simple", prefix="fa") ).add_to(m) else: folium.Marker( [row['stop_lat'], row['stop_lon']], popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", icon = folium.Icon(icon="bus-simple", prefix="fa") ).add_to(m) # Layout: Split screen into two columns col1, col2 = st.columns([3, 1]) # Adjust proportions (3:1) as needed # Display Folium map in the first column with col1: st_folium(m, width=700, height=500) # Display Legend in the second column with col2: st.markdown(legend_html, unsafe_allow_html=True) # Display the map #st_folium(m, width=700, height=500) def drop_the_duplicates(df): df = df.drop_duplicates("datetime") df["previous"] = df["datetime"].shift(1) df = df[((df["datetime"] - df["previous"]) > timedelta(minutes=3)) | (df["previous"].isna())] #df = df.drop_duplicates(["trip_id", "stop_name"]) return df # Streamlit UI def main(): st.title("Wheely Fun Times - Bus Occupancy Explorer") # Initialize session state if "hopsworks_project" not in st.session_state: st.session_state.hopsworks_project = None if "data" not in st.session_state: st.session_state.data = None # User inputs for feature group and version #st.sidebar.title("Data Settings") #feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions") #version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1) #filename = st.sidebar.text_input("Local Filename", value="data.csv") # Check for valid local data if is_local_data_valid(): st.write("Using cached local data.") st.session_state.data = load_local_data("data.csv") if "first" not in st.session_state: st.session_state.first = True #st.session_state.data = remove_near_duplicates(st.session_state.data) else: # Fetch data if local data is invalid if st.session_state.hopsworks_project is None: st.write("Initializing Hopsworks connection...") st.session_state.hopsworks_project = connect_to_hopsworks() st.success("Connected to Hopsworks!") project = st.session_state.hopsworks_project data = fetch_data_from_feature_group(project, "predictions", 1) #print(data.head()) filepath = save_data_locally(data, "data.csv") st.session_state.data = data st.success(f"Data fetched and saved locally at {filepath}") buses_df, bus_list, short_bus = get_buses() short_bus = sorted(short_bus) # Sidebar section for searching buses st.sidebar.title("Search for your desired bus") # Create a multiselect dropdown in the sidebar search = st.sidebar.selectbox( "Search for your bus number:", options=short_bus, help="Select one bus to view details." ) # Display the results if search: route = bus_list[bus_list["route_short_name"]==search] long_names = list(pd.unique(route["route_long_name"])) if len(long_names)==1: bus = long_names[0] st.write(f"### Selected Bus: {search} {bus}") else: bus = st.sidebar.selectbox( "Pick bus route:", options=long_names, help="Select one bus to view details." ) st.write(f"### Selected Bus: {search} {bus}") # Streamlit checkbox to toggle bus direction if "direction" not in st.session_state: st.session_state.direction = False # Streamlit button to toggle bus direction if st.sidebar.button('Change Direction'): # Toggle between 'North' and 'South' st.session_state.direction = not st.session_state.direction print(st.session_state.direction) #Plocka alla aktuella trip_ids från buses trips = buses_df[buses_df["route_long_name"]==bus] bus_trips = st.session_state.data[st.session_state.data["route_long_name"]==bus] bus_trips["datetime"] = pd.to_datetime(bus_trips["datetime"]) bus_trips["datetime"] = bus_trips["datetime"].dt.tz_convert(None) stops = list(pd.unique(bus_trips["stop_name"])) stop_choice = st.sidebar.selectbox( "Select your bus stop:", options=stops, help="Select one bus stop to se occupancy." ) #direction = st.sidebar.checkbox('Direction of bus', value=True) today = datetime.now() tomorrow = today + timedelta(days=1) today = today.date() tomorrow = tomorrow.date() date_options = { today.strftime("%d %B %Y") : today, tomorrow.strftime("%d %B %Y") : tomorrow } day_choice = st.sidebar.radio("Select the day:", options=list(date_options.keys())) # Add time input widgets in the sidebar start_time = st.sidebar.time_input("Select a start time", value=None) end_time = st.sidebar.time_input("Select an end time", value=None) print(f"start time {type(start_time)}") print(f"end time {type(end_time)}") print(f"day {type(day_choice)}") if start_time != None and end_time != None: #TODO hur filtrera på tid? st.write(f"Displaying buses between {start_time.strftime('%H:%M')} and {end_time.strftime('%H:%M')} the {day_choice}") selected_trips = bus_trips[(bus_trips["datetime"] >= datetime.combine(date_options[day_choice], start_time)) & (bus_trips["datetime"] <= datetime.combine(date_options[day_choice], end_time)) & (bus_trips["direction_id"] == st.session_state.direction ) & (bus_trips["stop_name"] == stop_choice)] trip_ids = list(pd.unique(selected_trips["trip_id"])) #st.write(f"{len(trip_ids)} buses available") chioce = selected_trips[selected_trips["stop_name"]==stop_choice] #chioce.head() #TODO ta bort stop_name:) chioce = chioce[["trip_id", "stop_name", "datetime"]] #Ev lägga stop_chioce i session_state chioce = chioce.sort_values(by=["datetime"]) chioce = drop_the_duplicates(chioce) st.write(f"{chioce['trip_id'].nunique()} buses available") for idx, row in chioce.iterrows(): #st.write(f"The bus arrives at {row['stop_name']} at {row['datetime'].strftime('%H:%M')}") plot_graph_title(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], row["stop_name"], row['datetime'].strftime('%H:%M')) visualize(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], stop_choice) else: st.write("No buses selected. Please search in the sidebar.") # Display data and graphs #if st.session_state.data is not None: #plot_graphs(st.session_state.data) main() # Visa alla busslinjer? Söka? # Hur se riktning? # Filtrera på busslinje och riktning # Filtrera på tid # Ska användaren ange tid # Se alla unika trip ids # Mappa position till stop # Visa någon sorts graf för alla bussar inom den tiden # Ska det vara för alla stopp eller bara de som användaren angivit att den ska åka