import gradio as gr from pyserini.search.lucene import LuceneSearcher import os # Download the index if it doesn't exist if not os.path.exists('msmarco-passage'): os.system('python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'msmarco-passage\')"') searcher = LuceneSearcher('msmarco-passage') searcher.set_bm25(k1=0.9, b=0.4) def search_pyserini(query): try: hits = searcher.search(query, k=10) results = [] for i, hit in enumerate(hits): doc = searcher.doc(hit.docid) content = doc.raw()[:200] + "..." if len(doc.raw()) > 200 else doc.raw() results.append(f"Rank: {i+1}\nDoc ID: {hit.docid}\nScore: {hit.score:.4f}\nContent: {content}\n") return "\n".join(results) except Exception as e: return f"An error occurred: {str(e)}" css = """ .gradio-container { font-family: 'Arial', sans-serif; } .output-text { white-space: pre-wrap; font-family: 'Courier New', monospace; font-size: 14px; line-height: 1.5; padding: 10px; border: 1px solid #ccc; border-radius: 5px; background-color: #f9f9f9; } """ with gr.Blocks(css=css) as iface: gr.Markdown("# Pyserini Search Interface") gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).") with gr.Row(): query_input = gr.Textbox( lines=1, placeholder="Enter your search query here...", label="Search Query" ) with gr.Row(): search_button = gr.Button("Search", variant="primary") with gr.Row(): output = gr.Textbox( lines=20, label="Search Results", elem_classes=["output-text"] ) search_button.click( fn=search_pyserini, inputs=query_input, outputs=output ) iface.launch()