gwf-spectrum / app.py
ehsk's picture
Duplicate from gwf-uwaterloo/scicatter2d
780a0a5
raw
history blame
2.42 kB
import os
import re
import pandas as pd
import plotly.express as px
import streamlit as st
st.set_page_config(layout="wide")
DATA_FILE = "data/anthology-2020-23_specter2_base.json"
THEMES = {"cluster": "fall", "year": "mint", "source": "phase"}
def load_df(data_file: os.PathLike):
df = pd.read_json(data_file, orient="records")
df["x"] = df["point2d"].apply(lambda x: x[0])
df["y"] = df["point2d"].apply(lambda x: x[1])
if "publication_type" in df.columns:
df["type"] = df["publication_type"]
df = df.drop(columns=["point2d", "publication_type"])
else:
df = df.drop(columns=["point2d"])
return df
@st.cache_data
def load_dataframe():
return load_df(DATA_FILE)
DF = load_dataframe()
with st.sidebar:
venues = st.multiselect(
"Venues",
["ACL", "EMNLP", "NAACL", "TACL"],
["ACL", "EMNLP", "NAACL", "TACL"],
)
start_year, end_year = st.select_slider(
"Publication year", options=("2020", "2021", "2022", "2023"), value=("2020", "2023")
)
author_names = st.text_input("Author names (separated by comma)")
title = st.text_input("Title")
start_year = int(start_year)
end_year = int(end_year)
df = DF[(DF["year"] >= start_year) & (DF["year"] <= end_year)]
if 0 < len(venues) < 4:
selected_venues = [v.lower() for v in venues]
df = df[df["source"].isin(selected_venues)]
elif not venues:
st.write(":red[Please select a venue]")
if author_names:
authors = [a.strip() for a in author_names.split(",")]
author_mask = df.authors.apply(
lambda row: all(any(re.match(rf".*{a}.*", x, re.IGNORECASE) for x in row) for a in authors)
)
df = df[author_mask]
if title:
df = df[df.title.apply(lambda x: title.lower() in x.lower())]
st.write(f"Number of points: {df.shape[0]}")
color = st.selectbox("Color", ("cluster", "year", "source"))
fig = px.scatter(
df,
x="x",
y="y",
color=color,
width=1000,
height=800,
hover_data=["title", "authors", "year", "source", "type"],
color_continuous_scale=THEMES[color],
)
fig.update_layout(
# margin=dict(l=10, r=10, t=10, b=10),
showlegend=False,
font=dict(
family="Times New Roman",
size=30,
),
)
fig.update_xaxes(title="")
fig.update_yaxes(title="")
st.plotly_chart(fig, use_container_width=True)