Sean MacAvaney commited on
Commit
70f85dc
Β·
1 Parent(s): 0160adc

initial commit

Browse files
Files changed (5) hide show
  1. README.md +19 -6
  2. app.py +64 -0
  3. packages.txt +5 -0
  4. requirements.txt +5 -0
  5. wrapup.md +3 -0
README.md CHANGED
@@ -1,12 +1,25 @@
1
  ---
2
- title: Retrieve
3
- emoji: πŸ“š
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.8.2
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: PyTerrier Retrieve
3
+ emoji: πŸ•
4
+ colorFrom: green
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.7
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # πŸ• PyTerrier: Retrieve
13
+
14
+ This is a demonstration of [PyTerrier's TerrierRetrieve transformer](https://pyterrier.readthedocs.io/en/latest/terrier-retrieval.html).
15
+
16
+ TerrierRetrieve functions as a `Q→R` (retrieval, query-to-result) transformer and can be used in pipelines accordingly. For example, you can
17
+ pipe the results to a transformer such as `get_text` to load the text associated with the document:
18
+
19
+ <div class="pipeline">
20
+ <div class="df" title="Query Frame">Q</div>
21
+ <div class="transformer attn" title="PisaRetrieve Transformer">TerrierRetrieve</div>
22
+ <div class="df" title="Result Frame">R</div>
23
+ <div class="transformer" title="get_text Transformer">get_text</div>
24
+ <div class="df" title="Result Frame">R</div>
25
+ </div>
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import pyterrier as pt
4
+ pt.init()
5
+ from pyterrier_gradio import Demo, MarkdownFile, interface, df2code, code2md, EX_Q
6
+
7
+ retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed')
8
+
9
+ COLAB_NAME = 'pyterrier_retrieve.ipynb'
10
+ COLAB_INSTALL = '''
11
+ !pip install -q python-terrier
12
+ '''.strip()
13
+
14
+ def predict(input, _, wmodel, num_results, pipe_text):
15
+ retr.controls["wmodel"] = wmodel
16
+ retr.controls["end"] = str(num_results -1)
17
+ code = f'''import pandas as pd
18
+ import pyterrier as pt ; pt.init()
19
+
20
+ retr = pt.TerrierRetrieve.from_dataset('msmarco_passage', 'terrier_stemmed', wmodel={repr(wmodel)}, num_results={num_results})
21
+ '''
22
+ pipeline = retr
23
+ if pipe_text:
24
+ pipeline = pipeline >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text')
25
+ code += f'''
26
+ pipeline = retr >> pt.text.get_text(pt.get_dataset('irds:msmarco-passage'), 'text')
27
+
28
+ pipeline({df2code(input)})'''
29
+ else:
30
+ code += f'''
31
+ retr({df2code(input)})'''
32
+ res = pipeline(input)
33
+ res['score'] = res['score'].map(lambda x: round(x, 2))
34
+ return (res, code2md(code, COLAB_INSTALL, COLAB_NAME))
35
+
36
+ interface(
37
+ MarkdownFile('README.md'),
38
+ Demo(
39
+ predict,
40
+ {k: v for k, v in EX_Q.items() if k != 'antique/train'},
41
+ [
42
+ gr.Dropdown(
43
+ choices=['msmarco-passage stemmed'],
44
+ value='msmarco-passage stemmed',
45
+ label='Index',
46
+ interactive=False,
47
+ ), gr.Dropdown(
48
+ choices=['TF_IDF', 'BM25', 'PL2', 'DPH'],
49
+ value='BM25',
50
+ label='Retrieval Model',
51
+ ), gr.Slider(
52
+ minimum=1,
53
+ maximum=10,
54
+ value=5,
55
+ step=1.,
56
+ label='# Results'
57
+ ), gr.Checkbox(
58
+ value=True,
59
+ label="Include get_text in pipeline",
60
+ )],
61
+ scale=2/3
62
+ ),
63
+ MarkdownFile('wrapup.md'),
64
+ ).launch(share=True)
packages.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openjdk-11-jdk
2
+ openjdk-11-jre-headless
3
+ openjdk-11-jre
4
+ openjdk-11-jre-headless
5
+ debianutils
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ git+https://github.com/seanmacavaney/[email protected]
2
+ git+https://github.com/terrier-org/pyterrier
3
+ pyterrier-pisa
4
+ ir_datasets
5
+ ir_measures
wrapup.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ### References & Credits
2
+
3
+ - Craig Macdonald, Nicola Tonellotto, Sean MacAvaney, Iadh Ounis. [PyTerrier: Declarative Experimentation in Python from BM25 to Dense Retrieval](https://dl.acm.org/doi/abs/10.1145/3459637.3482013). CIKM 2021.