Spaces:
Sleeping
Sleeping
update
Browse files- app.py +40 -2
- benchmark_data.csv +11 -0
app.py
CHANGED
@@ -1,4 +1,42 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from fuzzywuzzy import process
|
4 |
|
5 |
+
|
6 |
+
def load_data():
|
7 |
+
return pd.read_csv("benchmark_data.csv")
|
8 |
+
|
9 |
+
|
10 |
+
def fuzzy_search(data, query, column):
|
11 |
+
if query:
|
12 |
+
choices = data[column].unique()
|
13 |
+
results = process.extract(query, choices, limit=10)
|
14 |
+
selected = [res[0] for res in results]
|
15 |
+
return data[data[column].isin(selected)]
|
16 |
+
return data
|
17 |
+
|
18 |
+
|
19 |
+
def main():
|
20 |
+
st.title("Multihop-RAG Benchmark Space")
|
21 |
+
|
22 |
+
data = load_data()
|
23 |
+
|
24 |
+
st.sidebar.header("Search Options")
|
25 |
+
framework_query = st.sidebar.text_input("Search by Framework")
|
26 |
+
model_query = st.sidebar.text_input("Search by Model")
|
27 |
+
|
28 |
+
if framework_query:
|
29 |
+
data = fuzzy_search(data, framework_query, 'framework')
|
30 |
+
if model_query:
|
31 |
+
data = fuzzy_search(data, model_query, 'model')
|
32 |
+
|
33 |
+
st.header("Benchmark Results")
|
34 |
+
st.write("Displaying results for MRR@10 and Hit@10 across different frameworks, models, and chunk sizes.")
|
35 |
+
st.dataframe(data)
|
36 |
+
|
37 |
+
if st.sidebar.checkbox("Show Metrics Distribution"):
|
38 |
+
st.subheader("Metrics Distribution")
|
39 |
+
st.bar_chart(data[['MRR@10', 'Hit@10']])
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
main()
|
benchmark_data.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
framework,model,chunk size,MRR@10,Hit@10
|
2 |
+
Transformers,BERT-base,128,0.32,0.45
|
3 |
+
Transformers,BERT-large,256,0.37,0.50
|
4 |
+
Fairseq,ROBERTA-base,128,0.35,0.48
|
5 |
+
Fairseq,ROBERTA-large,256,0.40,0.55
|
6 |
+
HuggingFace,GPT-2,128,0.30,0.44
|
7 |
+
HuggingFace,GPT-3,256,0.42,0.57
|
8 |
+
TensorFlow,T5-small,128,0.31,0.46
|
9 |
+
TensorFlow,T5-large,256,0.39,0.51
|
10 |
+
PyTorch,BART-base,128,0.33,0.47
|
11 |
+
PyTorch,BART-large,256,0.38,0.53
|