retrieve / app.py
Sean MacAvaney
initial commit
70f85dc
raw
history blame
1.78 kB
import pandas as pd
import gradio as gr
import pyterrier as pt
pt.init()
from pyterrier_gradio import Demo, MarkdownFile, interface, df2code, code2md, EX_Q
retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed')
COLAB_NAME = 'pyterrier_retrieve.ipynb'
COLAB_INSTALL = '''
!pip install -q python-terrier
'''.strip()
def predict(input, _, wmodel, num_results, pipe_text):
retr.controls["wmodel"] = wmodel
retr.controls["end"] = str(num_results -1)
code = f'''import pandas as pd
import pyterrier as pt ; pt.init()
retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed', wmodel={repr(wmodel)}, num_results={num_results})
'''
pipeline = retr
if pipe_text:
pipeline = pipeline >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text')
code += f'''
pipeline = retr >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text')
pipeline({df2code(input)})'''
else:
code += f'''
retr({df2code(input)})'''
res = pipeline(input)
res['score'] = res['score'].map(lambda x: round(x, 2))
return (res, code2md(code, COLAB_INSTALL, COLAB_NAME))
interface(
MarkdownFile('README.md'),
Demo(
predict,
{k: v for k, v in EX_Q.items() if k != 'antique/train'},
[
gr.Dropdown(
choices=['msmarco-passage stemmed'],
value='msmarco-passage stemmed',
label='Index',
interactive=False,
), gr.Dropdown(
choices=['TF_IDF', 'BM25', 'PL2', 'DPH'],
value='BM25',
label='Retrieval Model',
), gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1.,
label='# Results'
), gr.Checkbox(
value=True,
label="Include get_text in pipeline",
)],
scale=2/3
),
MarkdownFile('wrapup.md'),
).launch(share=True)