Sean MacAvaney commited on
Commit
cf494b2
Β·
1 Parent(s): 9889763
Files changed (2) hide show
  1. README.md +55 -0
  2. app.py +11 -7
README.md CHANGED
@@ -11,7 +11,62 @@ models:
11
  - macavaney/doc2query-t5-base-msmarco
12
  ---
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  This is a demonstration of [PyTerrier's Doc2Query package](https://github.com/terrierteam/pyterrier_doc2query). Doc2Query generates
15
  queries for a document, which can then be appended to a document's text before indexing to boost important terms and add missing terms.
16
 
 
 
 
 
 
 
 
 
17
  Try it below!
 
11
  - macavaney/doc2query-t5-base-msmarco
12
  ---
13
 
14
+ <style>
15
+ .transformer {
16
+ display: inline-block;
17
+ background: #8facdb;
18
+ position: relative;
19
+ height: 60px;
20
+ line-height: 60px;
21
+ padding: 0 24px;
22
+ margin: 0 18px;
23
+ color: #333;
24
+ }
25
+ .transformer::before {
26
+ content: "";
27
+ position: absolute;
28
+ bottom: 0;
29
+ top: 0;
30
+ left: -15px;
31
+ border-top: 30px solid #8facdb;
32
+ border-bottom: 30px solid #8facdb;
33
+ border-left: 15px solid transparent;
34
+ }
35
+ .transformer::after {
36
+ content: "";
37
+ position: absolute;
38
+ bottom: 0;
39
+ top: 0;
40
+ right: -15px;
41
+ border-top: 30px solid transparent;
42
+ border-bottom: 30px solid transparent;
43
+ border-left: 15px solid #8facdb;
44
+ }
45
+ .df {
46
+ width: 24px;
47
+ line-height: 24px;
48
+ text-align: center;
49
+ border: 3px double #888;
50
+ background-color: #eee;
51
+ color: #333;
52
+ border-radius: 4px;
53
+ display: inline-block;
54
+ box-sizing: content-box;
55
+ }
56
+ .pipeline {
57
+ text-align: center;
58
+ }
59
+ </style>
60
+
61
  This is a demonstration of [PyTerrier's Doc2Query package](https://github.com/terrierteam/pyterrier_doc2query). Doc2Query generates
62
  queries for a document, which can then be appended to a document's text before indexing to boost important terms and add missing terms.
63
 
64
+ Doc2Query functions as a `D→D` (document-to-document) transformer and can be used in pipelines accordingly.
65
+
66
+ <div class="pipeline">
67
+ <div class="df" title="Document Frame">D</div>
68
+ <div class="transformer">Doc2Query</div>
69
+ <div class="df" title="Document Frame">D</div>
70
+ </div>
71
+
72
  Try it below!
app.py CHANGED
@@ -2,7 +2,9 @@ import pandas as pd
2
  import gradio as gr
3
  from pyterrier_doc2query import Doc2Query
4
 
5
- doc2query = Doc2Query('macavaney/doc2query-t5-base-msmarco', append=True, num_samples=5)
 
 
6
 
7
  def df2code(df):
8
  rows = []
@@ -14,7 +16,7 @@ def df2code(df):
14
  ])'''
15
 
16
  def predict(input, model, append, num_samples):
17
- assert model == 'macavaney/doc2query-t5-base-msmarco'
18
  doc2query.append = append
19
  doc2query.num_samples = num_samples
20
  code = f'''
@@ -33,6 +35,8 @@ example_inp = pd.DataFrame([
33
  {'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'}
34
  ])
35
 
 
 
36
  gr.Interface(
37
  predict,
38
  inputs=[gr.Dataframe(
@@ -44,8 +48,8 @@ gr.Interface(
44
  label='Pipeline Input',
45
  value=example_inp,
46
  ), gr.Dropdown(
47
- choices=['macavaney/doc2query-t5-base-msmarco'],
48
- value='macavaney/doc2query-t5-base-msmarco',
49
  label='Model',
50
  interactive=False,
51
  ), gr.Checkbox(
@@ -65,10 +69,10 @@ gr.Interface(
65
  row_count=1,
66
  wrap=True,
67
  label='Pipeline Output',
68
- value=doc2query(example_inp),
69
- ), gr.Markdown()],
70
  title="πŸ• PyTerrier: Doc2Query",
71
- description=open('README.md', 'rt').read().split('---\n')[-1],
72
  allow_flagging='never',
73
  css="table.font-mono td { white-space: pre-line; }",
74
  ).launch(share=False)
 
2
  import gradio as gr
3
  from pyterrier_doc2query import Doc2Query
4
 
5
+ MODEL = 'macavaney/doc2query-t5-base-msmarco'
6
+
7
+ doc2query = Doc2Query(MODEL, append=True, num_samples=5)
8
 
9
  def df2code(df):
10
  rows = []
 
16
  ])'''
17
 
18
  def predict(input, model, append, num_samples):
19
+ assert model == MODEL
20
  doc2query.append = append
21
  doc2query.num_samples = num_samples
22
  code = f'''
 
35
  {'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'}
36
  ])
37
 
38
+ example_out = predict(example_inp, MODEL, doc2query.append, doc2query.num_samples)
39
+
40
  gr.Interface(
41
  predict,
42
  inputs=[gr.Dataframe(
 
48
  label='Pipeline Input',
49
  value=example_inp,
50
  ), gr.Dropdown(
51
+ choices=[MODEL],
52
+ value=MODEL,
53
  label='Model',
54
  interactive=False,
55
  ), gr.Checkbox(
 
69
  row_count=1,
70
  wrap=True,
71
  label='Pipeline Output',
72
+ value=example_out[0],
73
+ ), gr.Markdown(value=example_out[1])],
74
  title="πŸ• PyTerrier: Doc2Query",
75
+ description=open('README.md', 'rt').read().split('\n---\n')[-1],
76
  allow_flagging='never',
77
  css="table.font-mono td { white-space: pre-line; }",
78
  ).launch(share=False)