Spaces:
Sleeping
Sleeping
import pandas as pd | |
import gradio as gr | |
import pyterrier as pt | |
pt.init() | |
from pyterrier_gradio import Demo, MarkdownFile, interface, df2code, code2md, EX_Q | |
retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed') | |
COLAB_NAME = 'pyterrier_retrieve.ipynb' | |
COLAB_INSTALL = ''' | |
!pip install -q python-terrier | |
'''.strip() | |
def predict(input, _, wmodel, num_results, pipe_text): | |
retr.controls["wmodel"] = wmodel | |
retr.controls["end"] = str(num_results -1) | |
code = f'''import pandas as pd | |
import pyterrier as pt ; pt.init() | |
retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed', wmodel={repr(wmodel)}, num_results={num_results}) | |
''' | |
pipeline = retr | |
if pipe_text: | |
pipeline = pipeline >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text') | |
code += f''' | |
pipeline = retr >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text') | |
pipeline({df2code(input)})''' | |
else: | |
code += f''' | |
retr({df2code(input)})''' | |
res = pipeline(input) | |
res['score'] = res['score'].map(lambda x: round(x, 2)) | |
return (res, code2md(code, COLAB_INSTALL, COLAB_NAME)) | |
interface( | |
MarkdownFile('README.md'), | |
Demo( | |
predict, | |
{k: v for k, v in EX_Q.items() if k != 'antique/train'}, | |
[ | |
gr.Dropdown( | |
choices=['msmarco-passage stemmed'], | |
value='msmarco-passage stemmed', | |
label='Index', | |
interactive=False, | |
), gr.Dropdown( | |
choices=['TF_IDF', 'BM25', 'PL2', 'DPH'], | |
value='BM25', | |
label='Retrieval Model', | |
), gr.Slider( | |
minimum=1, | |
maximum=10, | |
value=5, | |
step=1., | |
label='# Results' | |
), gr.Checkbox( | |
value=True, | |
label="Include get_text in pipeline", | |
)], | |
scale=2/3 | |
), | |
MarkdownFile('wrapup.md'), | |
).launch(share=False) | |