ArenaTester / app.py
atrytone's picture
Update app.py
2db50b5
raw
history blame
5.23 kB
import gradio as gr
import csv
import random
import uuid
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
USER_ID = uuid.uuid4()
INDEXES = ["miread_large", "miread_contrastive", "scibert_contrastive"]
MODELS = [
"biodatlab/MIReAD-Neuro-Large",
"biodatlab/MIReAD-Neuro-Contrastive",
"biodatlab/SciBERT-Neuro-Contrastive",
]
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedders = [HuggingFaceEmbeddings(
model_name=name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs) for name in MODELS]
vecdbs = [FAISS.load_local(index_name, faiss_embedder)
for index_name, faiss_embedder in zip(INDEXES, faiss_embedders)]
def get_matchup():
choices = INDEXES
left, right = random.sample(choices,2)
return left, right
def get_comp(prompt):
left, right = get_matchup()
left_output = inference(prompt,left)
right_output = inference(prompt,right)
return left_output, right_output
def get_article(db_name="miread_contrastive"):
db = vecdbs[index_names.index(db_name)]
return db[0]
def send_result(l_output, r_output, prompt, pick):
with csv.open('results.csv','a') as res_file:
writer = csv.writer(res_file)
row = [USER_ID,left,right,prompt,pick]
writer.writerow(row)
def get_matches(query, db_name="miread_contrastive"):
"""
Wrapper to call the similarity search on the required index
"""
matches = vecdbs[index_names.index(
db_name)].similarity_search_with_score(query, k=60)
return matches
def inference(query, model="miread_contrastive"):
"""
This function processes information retrieved by the get_matches() function
Returns - Gradio update commands for the authors, abstracts and journals tablular output
"""
matches = get_matches(query, model)
auth_counts = {}
n_table = []
scores = [round(match[1].item(), 3) for match in matches]
min_score = min(scores)
max_score = max(scores)
def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
for i, match in enumerate(matches):
doc = match[0]
score = round(normaliser(round(match[1].item(), 3)), 3)
title = doc.metadata['title']
author = doc.metadata['authors'][0].title()
date = doc.metadata.get('date', 'None')
link = doc.metadata.get('link', 'None')
# For authors
record = [i+1,
score,
author,
title,
link,
date]
if auth_counts.get(author, 0) < 2:
n_table.append(record)
if auth_counts.get(author, 0) == 0:
auth_counts[author] = 1
else:
auth_counts[author] += 1
n_output = gr.Dataframe.update(value=n_table, visible=True)
return n_output
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine Arena")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click on the appropriate \"Find Matches\" button.\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
models = gr.State(value=get_matchup())
prompt = gr.State(value=get_prompt())
action_btn = gr.Button(value="Get comparison")
with gr.Row().style(equal_height=True):
with gr.Column(scale=1):
l_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=True,
label='Model A',
show_label = True
scale=1
)
l_btn = gr.Button(value="Model A is better",scale=1)
with gr.Column(scale=1):
r_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=True,
label='Model B',
show_label = True
scale=1
)
r_btn = gr.Button(value="Model B is better",scale=1)
action_btn.click(fn=get_comp,
inputs=[prompt,],
outputs=[l_output, r_output],
api_name="arena")
l_btn.click(fn=lambda x,y,z: send_result(x,y,z,'left'),
inputs=[l_output,r_output,prompt],
api_name="feedleft")
l_btn.click(fn=lambda x,y,z: send_result(x,y,z,'right'),
inputs=[l_output,r_output,prompt],
api_name="feedright")
demo.launch(debug=True)