|
import streamlit as st |
|
import hopsworks |
|
import pandas as pd |
|
import os |
|
import time |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from datetime import datetime, timedelta |
|
import altair as alt |
|
|
|
import api |
|
|
|
|
|
DATA_DIR = "data" |
|
TIMESTAMP_FILE = "last_download_time.txt" |
|
|
|
|
|
def connect_to_hopsworks(): |
|
st.write("Connecting to Hopsworks...") |
|
project_name = "id2223AirQuality" |
|
HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY") |
|
print(f"HOPSWORKS_API_KEY {HOPSWORKS_API_KEY}") |
|
project = hopsworks.login(project="id2223AirQuality", api_key_value = os.getenv('HOPSWORKS_API_KEY')) |
|
return project |
|
|
|
|
|
def fetch_data_from_feature_group(project, feature_group_name, version): |
|
feature_store = project.get_feature_store() |
|
feature_group = feature_store.get_feature_group(name=feature_group_name, version=version) |
|
data = feature_group.read() |
|
return data |
|
|
|
|
|
def save_data_locally(data, filename): |
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
filepath = os.path.join(DATA_DIR, filename) |
|
data.to_csv(filepath, index=False) |
|
|
|
|
|
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) |
|
with open(timestamp_path, "w") as f: |
|
f.write(str(datetime.now())) |
|
return filepath |
|
|
|
|
|
def load_local_data(filename): |
|
filepath = os.path.join(DATA_DIR, filename) |
|
if os.path.exists(filepath): |
|
return pd.read_csv(filepath) |
|
else: |
|
return None |
|
|
|
|
|
def is_local_data_valid(): |
|
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) |
|
if not os.path.exists(timestamp_path): |
|
return False |
|
try: |
|
with open(timestamp_path, "r") as f: |
|
last_download_time = datetime.fromisoformat(f.read().strip()) |
|
|
|
if datetime.now() - last_download_time > timedelta(days=1): |
|
return False |
|
return True |
|
except Exception as e: |
|
st.warning(f"Error reading timestamp: {e}") |
|
return False |
|
|
|
def get_buses(): |
|
bus_df = st.session_state.data[["trip_id", "route_long_name", "route_short_name"]] |
|
bus_df = bus_df.drop_duplicates() |
|
bus_list = bus_df[["route_long_name", "route_short_name"]] |
|
bus_list = bus_list.drop_duplicates() |
|
short_bus_list = list(pd.unique(bus_df["route_short_name"])) |
|
return bus_df, bus_list, short_bus_list |
|
|
|
|
|
def remove_near_duplicates(data): |
|
print(data["trip_id"].nunique()) |
|
result = [] |
|
data["datetime"] = pd.to_datetime(data["datetime"]) |
|
for _, group in data.groupby(['route_id', 'stop_name']): |
|
|
|
filtered_rows = [] |
|
last_row = None |
|
|
|
for idx, row in group.iterrows(): |
|
if last_row is None or (row['datetime'] - last_row['datetime'] > pd.Timedelta(minutes = 3)): |
|
|
|
filtered_rows.append(row) |
|
last_row = row |
|
|
|
|
|
result.extend(filtered_rows) |
|
filtered_df = pd.DataFrame(result) |
|
|
|
print(filtered_df["trip_id"].nunique()) |
|
return filtered_df |
|
|
|
def remove_duplicate_trips(df, route_id_col="route_id", trip_id_col="trip_id", datetime_col="datetime", time_window='3min'): |
|
""" |
|
Removes duplicate trips based on route_id and starting time proximity within a time window. |
|
|
|
Parameters: |
|
df (pd.DataFrame): Input DataFrame containing trip data. |
|
route_id_col (str): Column name for route IDs. |
|
trip_id_col (str): Column name for trip IDs. |
|
datetime_col (str): Column name for departure times. |
|
time_window (str): Time window for considering trips as duplicates (e.g., '3min'). |
|
|
|
Returns: |
|
pd.DataFrame: Filtered DataFrame with duplicates removed. |
|
""" |
|
print(df["trip_id"].nunique()) |
|
|
|
|
|
df[datetime_col] = pd.to_datetime(df[datetime_col]) |
|
|
|
|
|
df = df.sort_values(by=[route_id_col, datetime_col]) |
|
|
|
|
|
df['time_diff'] = df.groupby(route_id_col)[datetime_col].diff().fillna(pd.Timedelta('0s')) |
|
|
|
|
|
time_window_timedelta = pd.to_timedelta(time_window) |
|
df['is_duplicate'] = df['time_diff'] <= time_window_timedelta |
|
|
|
|
|
df['keep'] = ~df.groupby(route_id_col)['is_duplicate'].transform('any') |
|
|
|
|
|
result = df[df['keep']].drop(columns=['time_diff', 'is_duplicate', 'keep']) |
|
|
|
print(result["trip_id"].nunique()) |
|
return result |
|
|
|
def plot_graph(plot_df): |
|
|
|
|
|
categories = {0 : 'Empty', |
|
1: 'Many seats available', |
|
2:'Few seats available', |
|
3:'Standing room only', |
|
4:'Crushed standing room', |
|
5: 'Full'} |
|
|
|
plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]] |
|
plot_df = plot_df.sort_values("datetime") |
|
st.write(plot_df.head()) |
|
st.write(plot_df.tail()) |
|
|
|
plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories) |
|
|
|
category_order = list(categories.values()) |
|
category_order.reverse() |
|
|
|
|
|
|
|
chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode( |
|
x=alt.X('stop_name:N', title="Stop name"), |
|
y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)), |
|
tooltip=["datetime", 'stop_name', 'Occupancy'] |
|
).properties( |
|
title="Vehicle Occupancy Status Over Time" |
|
) |
|
st.altair_chart(chart, use_container_width=True) |
|
|
|
def plot_graph_title(plot_df, stop, time): |
|
|
|
|
|
categories = {0 : 'Empty', |
|
1: 'Many seats available', |
|
2:'Few seats available', |
|
3:'Standing room only', |
|
4:'Crushed standing room', |
|
5: 'Full'} |
|
|
|
plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]] |
|
plot_df = plot_df.sort_values("datetime") |
|
|
|
plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories) |
|
|
|
category_order = list(categories.values()) |
|
category_order.reverse() |
|
|
|
|
|
|
|
chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode( |
|
x=alt.X('stop_name:N', title="Stop name", sort=plot_df["stop_name"]), |
|
y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)), |
|
tooltip=["datetime", 'stop_name', 'Occupancy'] |
|
).properties( |
|
title=f"Vehicle Occupancy For Bus arriving at {stop} at {time}" |
|
) |
|
st.altair_chart(chart, use_container_width=True) |
|
|
|
|
|
def visualize(filtered_data, stop_name): |
|
import folium |
|
from streamlit_folium import st_folium |
|
|
|
categories = {0 : 'Empty', |
|
1: 'Many seats available', |
|
2:'Few seats available', |
|
3:'Standing room only', |
|
4:'Crushed standing room', |
|
5: 'Full'} |
|
|
|
legend_html = ''' |
|
<style> |
|
.legend-box { |
|
position: relative; |
|
width: 250px; |
|
background-color: white; |
|
padding: 10px; |
|
border: 2px solid grey; |
|
border-radius: 5px; |
|
font-size: 14px; |
|
} |
|
.legend-box div { |
|
margin-bottom: 5px; |
|
} |
|
.legend-box i { |
|
width: 20px; |
|
height: 10px; |
|
display: inline-block; |
|
margin-right: 5px; |
|
} |
|
</style> |
|
<div class="legend-box"> |
|
<b>Occupancy Legend:</b><br> |
|
<div><i style="background: green;"></i>Empty</div> |
|
<div><i style="background: blue;"></i>Many seats available</div> |
|
<div><i style="background: purple;"></i>Few seats available</div> |
|
<div><i style="background: pink;"></i>Standing room only</div> |
|
<div><i style="background: orange;"></i>Crushed standing room</div> |
|
<div><i style="background: red;"></i>Full</div> |
|
<div><i style="background: black;"></i>Your stop</div> |
|
</div> |
|
''' |
|
|
|
|
|
|
|
m = folium.Map(location=[filtered_data.iloc[0]["stop_lat"], filtered_data.iloc[0]["stop_lon"]], zoom_start=12) |
|
|
|
sw = filtered_data[['stop_lat', 'stop_lon']].min().values.tolist() |
|
ne = filtered_data[['stop_lat', 'stop_lon']].max().values.tolist() |
|
|
|
m.fit_bounds([sw, ne]) |
|
|
|
|
|
for idx, row in filtered_data.iterrows(): |
|
if row["stop_name"] == stop_name: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="black", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
elif row['vehicle_occupancystatus'] == 0: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="green", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
elif row['vehicle_occupancystatus'] == 1: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="blue", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
elif row['vehicle_occupancystatus'] == 2: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="purple", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
elif row['vehicle_occupancystatus'] == 3: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="pink", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
elif row['vehicle_occupancystatus'] == 4: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="orange", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
elif row['vehicle_occupancystatus'] == 5: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
|
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(color="red", icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
else: |
|
folium.Marker( |
|
[row['stop_lat'], row['stop_lon']], |
|
popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }", |
|
icon = folium.Icon(icon="bus-simple", prefix="fa") |
|
).add_to(m) |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
|
|
|
|
with col1: |
|
st_folium(m, width=700, height=500) |
|
|
|
|
|
with col2: |
|
st.markdown(legend_html, unsafe_allow_html=True) |
|
|
|
|
|
|
|
def drop_the_duplicates(df): |
|
|
|
df = df.drop_duplicates("datetime") |
|
df["previous"] = df["datetime"].shift(1) |
|
df = df[((df["datetime"] - df["previous"]) > timedelta(minutes=3)) | (df["previous"].isna())] |
|
|
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
st.title("Wheely Fun Times - Bus Occupancy Explorer") |
|
|
|
|
|
if "hopsworks_project" not in st.session_state: |
|
st.session_state.hopsworks_project = None |
|
if "data" not in st.session_state: |
|
st.session_state.data = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if is_local_data_valid(): |
|
st.write("Using cached local data.") |
|
st.session_state.data = load_local_data("data.csv") |
|
if "first" not in st.session_state: |
|
st.session_state.first = True |
|
|
|
else: |
|
|
|
if st.session_state.hopsworks_project is None: |
|
st.write("Initializing Hopsworks connection...") |
|
st.session_state.hopsworks_project = connect_to_hopsworks() |
|
st.success("Connected to Hopsworks!") |
|
|
|
project = st.session_state.hopsworks_project |
|
data = fetch_data_from_feature_group(project, "predictions", 1) |
|
|
|
filepath = save_data_locally(data, "data.csv") |
|
st.session_state.data = data |
|
st.success(f"Data fetched and saved locally at {filepath}") |
|
|
|
buses_df, bus_list, short_bus = get_buses() |
|
|
|
short_bus = sorted(short_bus) |
|
|
|
|
|
st.sidebar.title("Search for your desired bus") |
|
|
|
|
|
search = st.sidebar.selectbox( |
|
"Search for your bus number:", |
|
options=short_bus, |
|
help="Select one bus to view details." |
|
) |
|
|
|
|
|
if search: |
|
route = bus_list[bus_list["route_short_name"]==search] |
|
long_names = list(pd.unique(route["route_long_name"])) |
|
if len(long_names)==1: |
|
bus = long_names[0] |
|
st.write(f"### Selected Bus: {search} {bus}") |
|
else: |
|
bus = st.sidebar.selectbox( |
|
"Pick bus route:", |
|
options=long_names, |
|
help="Select one bus to view details." |
|
) |
|
st.write(f"### Selected Bus: {search} {bus}") |
|
|
|
|
|
if "direction" not in st.session_state: |
|
st.session_state.direction = False |
|
|
|
|
|
if st.sidebar.button('Change Direction'): |
|
|
|
st.session_state.direction = not st.session_state.direction |
|
print(st.session_state.direction) |
|
|
|
|
|
trips = buses_df[buses_df["route_long_name"]==bus] |
|
bus_trips = st.session_state.data[st.session_state.data["route_long_name"]==bus] |
|
bus_trips["datetime"] = pd.to_datetime(bus_trips["datetime"]) |
|
bus_trips["datetime"] = bus_trips["datetime"].dt.tz_convert(None) |
|
|
|
stops = list(pd.unique(bus_trips["stop_name"])) |
|
stop_choice = st.sidebar.selectbox( |
|
"Select your bus stop:", |
|
options=stops, |
|
help="Select one bus stop to se occupancy." |
|
) |
|
|
|
|
|
today = datetime.now() |
|
tomorrow = today + timedelta(days=1) |
|
today = today.date() |
|
tomorrow = tomorrow.date() |
|
|
|
date_options = { |
|
today.strftime("%d %B %Y") : today, |
|
tomorrow.strftime("%d %B %Y") : tomorrow |
|
} |
|
|
|
day_choice = st.sidebar.radio("Select the day:", options=list(date_options.keys())) |
|
|
|
|
|
start_time = st.sidebar.time_input("Select a start time", value=None) |
|
end_time = st.sidebar.time_input("Select an end time", value=None) |
|
|
|
|
|
print(f"start time {type(start_time)}") |
|
print(f"end time {type(end_time)}") |
|
print(f"day {type(day_choice)}") |
|
|
|
if start_time != None and end_time != None: |
|
|
|
st.write(f"Displaying buses between {start_time.strftime('%H:%M')} and {end_time.strftime('%H:%M')} the {day_choice}") |
|
|
|
selected_trips = bus_trips[(bus_trips["datetime"] >= datetime.combine(date_options[day_choice], start_time)) |
|
& (bus_trips["datetime"] <= datetime.combine(date_options[day_choice], end_time)) |
|
& (bus_trips["direction_id"] == st.session_state.direction ) |
|
& (bus_trips["stop_name"] == stop_choice)] |
|
trip_ids = list(pd.unique(selected_trips["trip_id"])) |
|
|
|
chioce = selected_trips[selected_trips["stop_name"]==stop_choice] |
|
|
|
|
|
chioce = chioce[["trip_id", "stop_name", "datetime"]] |
|
|
|
|
|
chioce = chioce.sort_values(by=["datetime"]) |
|
chioce = drop_the_duplicates(chioce) |
|
|
|
st.write(f"{chioce['trip_id'].nunique()} buses available") |
|
|
|
for idx, row in chioce.iterrows(): |
|
|
|
plot_graph_title(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], row["stop_name"], row['datetime'].strftime('%H:%M')) |
|
visualize(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], stop_choice) |
|
|
|
|
|
|
|
else: |
|
st.write("No buses selected. Please search in the sidebar.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|