import pandas as pd import gradio as gr from pyterrier_doc2query import Doc2Query MODEL = 'macavaney/doc2query-t5-base-msmarco' doc2query = Doc2Query(MODEL, append=True, num_samples=5) def df2code(df): rows = [] for row in df.itertuples(index=False): rows.append(f' {dict(row._asdict())},') rows = '\n'.join(rows) return f'''pd.DataFrame([ {rows} ])''' def predict(input, model, append, num_samples): assert model == MODEL doc2query.append = append doc2query.num_samples = num_samples code = f''' **Code:** ```python import pandas as pd from pyterrier_doc2query import Doc2Query doc2query = Doc2Query({repr(model)}, append={append}, num_samples={num_samples}) doc2query({df2code(input)}) ``` ''' return (doc2query(input), code) example_inp = pd.DataFrame([ {'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'} ]) example_out = predict(example_inp, MODEL, doc2query.append, doc2query.num_samples) gr.Interface( predict, inputs=[gr.Dataframe( headers=["docno", "text"], datatype=["str", "str"], col_count=(2, "fixed"), row_count=1, wrap=True, label='Pipeline Input', value=example_inp, ), gr.Dropdown( choices=[MODEL], value=MODEL, label='Model', interactive=False, ), gr.Checkbox( value=doc2query.append, label="Append", ), gr.Slider( minimum=1, maximum=10, value=doc2query.num_samples, step=1., label='# Queries' )], outputs=[gr.Dataframe( headers=["docno", "text", "querygen"], datatype=["str", "str", "str"], col_count=3, row_count=1, wrap=True, label='Pipeline Output', value=example_out[0], ), gr.Markdown(value=example_out[1])], title="🐕 PyTerrier: Doc2Query", description=open('README.md', 'rt').read().split('\n---\n')[-1], allow_flagging='never', css="table.font-mono td { white-space: pre-line; }", ).launch(share=False)