File size: 16,651 Bytes
064a25d
e4fbfab
 
 
 
 
 
 
2bd6eac
064a25d
effa819
 
e4fbfab
 
 
 
 
 
 
 
6f35e8c
8aa409a
7030e08
e4fbfab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
effa819
 
 
 
 
 
 
 
e4fbfab
8769306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0212e2c
8769306
 
 
 
 
 
 
 
cc58789
7762bc9
 
 
 
 
 
 
 
 
 
d18dba3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7762bc9
 
 
 
 
 
 
 
 
 
cc58789
 
 
96b4442
cc58789
d18dba3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc58789
 
 
 
 
 
 
d18dba3
 
 
 
 
 
 
7762bc9
d18dba3
 
 
7762bc9
d18dba3
7762bc9
96b4442
 
 
 
 
 
7fbd224
 
96b4442
 
 
e4fbfab
 
7762bc9
e4fbfab
8dd2873
e4fbfab
 
 
8dd2873
e4fbfab
 
618cd91
 
 
 
 
e4fbfab
8dd2873
e4fbfab
618cd91
5184bfe
 
8769306
e4fbfab
 
 
 
 
 
 
 
7edb958
effa819
618cd91
e4fbfab
 
 
effa819
 
2d3ae25
 
effa819
 
 
 
 
 
fbcd22d
effa819
 
 
 
 
 
 
 
 
96b4442
effa819
 
 
 
 
 
96b4442
39a6a86
7762bc9
 
 
 
 
 
 
 
 
 
8769306
 
 
 
 
 
 
 
 
 
 
 
7762bc9
 
39a6a86
 
 
 
 
 
 
 
 
effa819
39a6a86
effa819
39a6a86
 
 
effa819
7762bc9
39a6a86
 
 
 
 
 
 
96b4442
7762bc9
 
8769306
 
39a6a86
96b4442
8769306
96b4442
cc58789
8769306
 
 
 
96b4442
 
 
8769306
 
96b4442
8769306
96b4442
2bd6eac
effa819
 
 
 
 
 
 
e4fbfab
96b4442
39a6a86
96b4442
e4fbfab
 
d5c7130
 
 
 
 
 
 
 
 
8dd2873
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
import streamlit as st
import hopsworks
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import altair as alt

import api

# Constants
DATA_DIR = "data"
TIMESTAMP_FILE = "last_download_time.txt"

# Initialize Hopsworks connection
def connect_to_hopsworks():
    st.write("Connecting to Hopsworks...")
    project_name = "id2223AirQuality"
    HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")
    print(f"HOPSWORKS_API_KEY {HOPSWORKS_API_KEY}")
    project = hopsworks.login(project="id2223AirQuality", api_key_value = os.getenv('HOPSWORKS_API_KEY'))
    return project

# Fetch data from Hopsworks feature group
def fetch_data_from_feature_group(project, feature_group_name, version):
    feature_store = project.get_feature_store()
    feature_group = feature_store.get_feature_group(name=feature_group_name, version=version)
    data = feature_group.read()
    return data

# Save data locally
def save_data_locally(data, filename):
    os.makedirs(DATA_DIR, exist_ok=True)
    filepath = os.path.join(DATA_DIR, filename)
    data.to_csv(filepath, index=False)
    
    # Save timestamp
    timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE)
    with open(timestamp_path, "w") as f:
        f.write(str(datetime.now()))
    return filepath

# Load local data
def load_local_data(filename):
    filepath = os.path.join(DATA_DIR, filename)
    if os.path.exists(filepath):
        return pd.read_csv(filepath)
    else:
        return None

# Check if local data is valid
def is_local_data_valid():
    timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE)
    if not os.path.exists(timestamp_path):
        return False
    try:
        with open(timestamp_path, "r") as f:
            last_download_time = datetime.fromisoformat(f.read().strip())
        # Check if the data is more than a day old
        if datetime.now() - last_download_time > timedelta(days=1):
            return False
        return True
    except Exception as e:
        st.warning(f"Error reading timestamp: {e}")
        return False
    
def get_buses():
    bus_df = st.session_state.data[["trip_id", "route_long_name", "route_short_name"]]
    bus_df = bus_df.drop_duplicates()
    bus_list = bus_df[["route_long_name", "route_short_name"]]
    bus_list = bus_list.drop_duplicates()
    short_bus_list = list(pd.unique(bus_df["route_short_name"]))
    return bus_df, bus_list, short_bus_list

    print(data["trip_id"].nunique())
    result = []
    data["datetime"] = pd.to_datetime(data["datetime"])
    for _, group in data.groupby(['route_id', 'stop_name']):
        # Initialize a list to store rows that are not duplicates
        filtered_rows = []
        last_row = None

        for idx, row in group.iterrows():
            if last_row is None or (row['datetime'] - last_row['datetime'] > pd.Timedelta(minutes = 3)):
                # Keep the row if it's the first or sufficiently far apart in time
                filtered_rows.append(row)
                last_row = row

        # Add filtered rows to the result
        result.extend(filtered_rows)
    filtered_df = pd.DataFrame(result)
    # Return the filtered dataframe
    print(filtered_df["trip_id"].nunique())
    return filtered_df

def plot_graph_title(plot_df, stop, time):
    #Nu vill vi plotta!
    #TODO ska den bara visa de stopp man vill eller alla?
    categories =  {0 : 'Empty',
    1: 'Many seats available',
    2:'Few seats available',
    3:'Standing room only',
    4:'Crushed standing room',
    5: 'Full'}
    
    plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name", "route_id"]]
    plot_df = plot_df.sort_values("datetime")
    #plot_df = plot_df.set_index("datetime")
    plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories)
    # Explicitly set the order for Y_category
    category_order = list(categories.values())  # ['Empty', 'Many seats available', ..., 'Full']
    category_order.reverse()

    #st.line_chart(plot_df)
    # Create the Altair chart
    chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode(
        x=alt.X('stop_name:N', title="Stop name", sort=plot_df["stop_name"]),  # Use column name as string
        y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)),  # Treat Y as categorical
        tooltip=["datetime", 'stop_name', 'Occupancy']  # Add tooltips for interactivity
    ).properties(
        title=f"Vehicle Occupancy For Bus arriving at {stop} at {time}"
    )
    st.altair_chart(chart, use_container_width=True)


def visualize(filtered_data, stop_name):
    import folium
    from streamlit_folium import st_folium

    categories =  {0 : 'Empty',
    1: 'Many seats available',
    2:'Few seats available',
    3:'Standing room only',
    4:'Crushed standing room',
    5: 'Full'}

    legend_html = '''
        <style>
        .legend-box {
            position: relative;
            width: 250px;
            background-color: white;
            padding: 10px;
            border: 2px solid grey;
            border-radius: 5px;
            font-size: 14px;
        }
        .legend-box div {
            margin-bottom: 5px;
        }
        .legend-box i {
            width: 20px;
            height: 10px;
            display: inline-block;
            margin-right: 5px;
        }
        </style>
        <div class="legend-box">
            <b>Occupancy Legend:</b><br>
            <div><i style="background: green;"></i>Empty</div>
            <div><i style="background: blue;"></i>Many seats available</div>
            <div><i style="background: purple;"></i>Few seats available</div>
            <div><i style="background: pink;"></i>Standing room only</div>
            <div><i style="background: orange;"></i>Crushed standing room</div>
            <div><i style="background: red;"></i>Full</div>
            <div><i style="background: black;"></i>Your stop</div>
        </div>
        '''
    #st.markdown(legend_html, unsafe_allow_html=True)

    # Create a folium map centered around a location
    m = folium.Map(location=[filtered_data.iloc[0]["stop_lat"], filtered_data.iloc[0]["stop_lon"]], zoom_start=12)

    sw = filtered_data[['stop_lat', 'stop_lon']].min().values.tolist()
    ne = filtered_data[['stop_lat', 'stop_lon']].max().values.tolist()

    m.fit_bounds([sw, ne])

    # Add bus stop markers based on filtered data
    for idx, row in filtered_data.iterrows():
        if row["stop_name"] == stop_name:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="black", icon="bus-simple", prefix="fa")
            ).add_to(m)
        elif row['vehicle_occupancystatus'] == 0:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="green", icon="bus-simple", prefix="fa")
            ).add_to(m)
        elif row['vehicle_occupancystatus'] == 1:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="blue", icon="bus-simple", prefix="fa")
            ).add_to(m)
        elif row['vehicle_occupancystatus'] == 2:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="purple", icon="bus-simple", prefix="fa")
            ).add_to(m)
        elif row['vehicle_occupancystatus'] == 3:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="pink", icon="bus-simple", prefix="fa")
            ).add_to(m)
        elif row['vehicle_occupancystatus'] == 4:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="orange", icon="bus-simple", prefix="fa")
            ).add_to(m)
        elif row['vehicle_occupancystatus'] == 5:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
            
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(color="red", icon="bus-simple", prefix="fa")
            ).add_to(m)
        else:
            folium.Marker(
                [row['stop_lat'], row['stop_lon']],
                popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
                icon = folium.Icon(icon="bus-simple", prefix="fa")
            ).add_to(m)
    
    # Layout: Split screen into two columns
    col1, col2 = st.columns([3, 1])  # Adjust proportions (3:1) as needed

    # Display Folium map in the first column
    with col1:
        st_folium(m, width=700, height=500)

    # Display Legend in the second column
    with col2:
        st.markdown(legend_html, unsafe_allow_html=True)
    # Display the map
    #st_folium(m, width=700, height=500)

def drop_the_duplicates(df):
    
    df = df.drop_duplicates("datetime")
    df["previous"] = df["datetime"].shift(1)
    df = df[((df["datetime"] - df["previous"]) > timedelta(minutes=3)) | (df["previous"].isna())]

    #df = df.drop_duplicates(["trip_id", "stop_name"])

    return df


# Streamlit UI
def main():
    st.title("Wheely Fun Times - Bus Occupancy Explorer")
    
    # Initialize session state
    if "hopsworks_project" not in st.session_state:
        st.session_state.hopsworks_project = None
    if "data" not in st.session_state:
        st.session_state.data = None

    # User inputs for feature group and version
    #st.sidebar.title("Data Settings")
    #feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions")
    #version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1)
    #filename = st.sidebar.text_input("Local Filename", value="data.csv")
    
    # Check for valid local data
    if is_local_data_valid():
        st.write("Using cached local data.")
        st.session_state.data = load_local_data("data.csv")
        if "first" not in st.session_state:
            st.session_state.first = True
            #st.session_state.data = remove_near_duplicates(st.session_state.data)
    else:
        # Fetch data if local data is invalid
        if st.session_state.hopsworks_project is None:
            st.write("Initializing Hopsworks connection...")
            st.session_state.hopsworks_project = connect_to_hopsworks()
            st.success("Connected to Hopsworks!")
        
        project = st.session_state.hopsworks_project
        data = fetch_data_from_feature_group(project, "predictions", 1)
        #print(data.head())
        filepath = save_data_locally(data, "data.csv")
        st.session_state.data = data
        st.success(f"Data fetched and saved locally at {filepath}")

    buses_df, bus_list, short_bus = get_buses()

    short_bus = sorted(short_bus)
    
    # Sidebar section for searching buses
    st.sidebar.title("Search for your desired bus")

    # Create a multiselect dropdown in the sidebar
    search = st.sidebar.selectbox(
        "Search for your bus number:",
        options=short_bus,
        help="Select one bus to view details."
    )

    # Display the results
    if search:
        route = bus_list[bus_list["route_short_name"]==search]
        long_names = list(pd.unique(route["route_long_name"]))
        if len(long_names)==1:
            bus = long_names[0]
            st.write(f"### Selected Bus: {search} {bus}")
        else:
            bus = st.sidebar.selectbox(
            "Pick bus route:",
            options=long_names,
            help="Select one bus to view details."
            )
            st.write(f"### Selected Bus: {search} {bus}")
        
        # Streamlit checkbox to toggle bus direction
        if "direction" not in st.session_state:
            st.session_state.direction = False
        
        # Streamlit button to toggle bus direction
        if st.sidebar.button('Change Direction'):
            # Toggle between 'North' and 'South'
            st.session_state.direction = not st.session_state.direction
            print(st.session_state.direction)

        #Plocka alla aktuella trip_ids från buses
        trips = buses_df[buses_df["route_long_name"]==bus]
        bus_trips = st.session_state.data[st.session_state.data["route_long_name"]==bus]
        bus_trips["datetime"] = pd.to_datetime(bus_trips["datetime"])
        bus_trips["datetime"] = bus_trips["datetime"].dt.tz_convert(None)

        stops = list(pd.unique(bus_trips["stop_name"]))
        stop_choice = st.sidebar.selectbox(
            "Select your bus stop:",
            options=stops,
            help="Select one bus stop to se occupancy."
            )  
        #direction = st.sidebar.checkbox('Direction of bus', value=True) 

        today = datetime.now()
        tomorrow = today + timedelta(days=1)
        today = today.date()
        tomorrow = tomorrow.date()
        
        date_options = {
            today.strftime("%d %B %Y") : today,
            tomorrow.strftime("%d %B %Y") : tomorrow
        }

        day_choice = st.sidebar.radio("Select the day:", options=list(date_options.keys()))

        # Add time input widgets in the sidebar
        start_time = st.sidebar.time_input("Select a start time", value=None)
        end_time = st.sidebar.time_input("Select an end time", value=None)


        print(f"start time {type(start_time)}")
        print(f"end time {type(end_time)}")
        print(f"day {type(day_choice)}")

        if start_time != None and end_time != None:
            #TODO hur filtrera på tid?
            st.write(f"Displaying buses between {start_time.strftime('%H:%M')} and {end_time.strftime('%H:%M')} the {day_choice}")
        
            selected_trips = bus_trips[(bus_trips["datetime"] >= datetime.combine(date_options[day_choice], start_time)) 
                                       & (bus_trips["datetime"] <= datetime.combine(date_options[day_choice], end_time))
                                       & (bus_trips["direction_id"] == st.session_state.direction )
                                       & (bus_trips["stop_name"] == stop_choice)]
            trip_ids = list(pd.unique(selected_trips["trip_id"]))
            #st.write(f"{len(trip_ids)} buses available")
            chioce = selected_trips[selected_trips["stop_name"]==stop_choice]
            #chioce.head()
            #TODO ta bort stop_name:)
            chioce = chioce[["trip_id", "stop_name", "datetime"]]
            #Ev lägga stop_chioce i session_state

            chioce = chioce.sort_values(by=["datetime"])
            chioce = drop_the_duplicates(chioce)
            
            st.write(f"{chioce['trip_id'].nunique()} buses available")

            for idx, row in chioce.iterrows():
                #st.write(f"The bus arrives at {row['stop_name']} at {row['datetime'].strftime('%H:%M')}")
                plot_graph_title(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], row["stop_name"], row['datetime'].strftime('%H:%M'))
                visualize(st.session_state.data[st.session_state.data["trip_id"]==row["trip_id"]], stop_choice)
            


    else:
        st.write("No buses selected. Please search in the sidebar.")
    
    

    # Display data and graphs
    #if st.session_state.data is not None:
        #plot_graphs(st.session_state.data)
        

main()

# Visa alla busslinjer? Söka?
    # Hur se riktning?
# Filtrera på busslinje och riktning
# Filtrera på tid 
    # Ska användaren ange tid
# Se alla unika trip ids
# Mappa position till stop
# Visa någon sorts graf för alla bussar inom den tiden
    # Ska det vara för alla stopp eller bara de som användaren angivit att den ska åka