import pandas as pd import gradio as gr import pyterrier as pt pt.init() from pyterrier_gradio import Demo, MarkdownFile, interface, df2code, code2md, EX_Q retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed') COLAB_NAME = 'pyterrier_retrieve.ipynb' COLAB_INSTALL = ''' !pip install -q python-terrier '''.strip() def predict(input, _, wmodel, num_results, pipe_text): retr.controls["wmodel"] = wmodel retr.controls["end"] = str(num_results -1) code = f'''import pandas as pd import pyterrier as pt ; pt.init() retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed', wmodel={repr(wmodel)}, num_results={num_results}) ''' pipeline = retr if pipe_text: pipeline = pipeline >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text') code += f''' pipeline = retr >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text') pipeline({df2code(input)})''' else: code += f''' retr({df2code(input)})''' res = pipeline(input) res['score'] = res['score'].map(lambda x: round(x, 2)) return (res, code2md(code, COLAB_INSTALL, COLAB_NAME)) interface( MarkdownFile('README.md'), Demo( predict, {k: v for k, v in EX_Q.items() if k != 'antique/train'}, [ gr.Dropdown( choices=['msmarco-passage stemmed'], value='msmarco-passage stemmed', label='Index', interactive=False, ), gr.Dropdown( choices=['TF_IDF', 'BM25', 'PL2', 'DPH'], value='BM25', label='Retrieval Model', ), gr.Slider( minimum=1, maximum=10, value=5, step=1., label='# Results' ), gr.Checkbox( value=True, label="Include get_text in pipeline", )], scale=2/3 ), MarkdownFile('wrapup.md'), ).launch(share=False)