import gradio as gr import chromadb from typing import List, Dict # ChromaDB 配置 client = chromadb.PersistentClient(path="chroma_data") # 加载集合 embed_collection = client.get_collection(name="embed_data") answer_collection = client.get_collection(name="answer_data") def search_embed_and_answer(label: str): def search_top_answers(target_embedding: List[float], top_n: int = 10) -> List[Dict]: """使用ChromaDB进行相似度搜索""" print("正在执行相似度搜索...") results = answer_collection.query( query_embeddings=[target_embedding], n_results=top_n, include=["metadatas", "documents", "distances"] ) # 处理结果 answers = [] for meta, doc, dist in zip( results['metadatas'][0], results['documents'][0], results['distances'][0] ): answers.append({ "formal_name": meta['formal_name'], "formal_statement": meta['formal_statement'], "informal_name": meta['informal_name'], "informal_statement": doc, "similarity": 1 - dist }) print("相似度搜索完成!") return answers """主搜索函数""" print("开始处理搜索请求...") # 在 embed 集合中查找目标条目 print("正在查询目标定理的元数据...") results = embed_collection.query( query_texts=[label], n_results=1, include=["metadatas", "embeddings"] ) if not results['ids'][0]: print(f"未找到 label 为 {label} 的定理。") return f"未找到 label 为 {label} 的定理。", "" # 提取目标条目 print("正在提取目标定理的特征...") target_metadata = results['metadatas'][0][0] target_embedding = results['embeddings'][0][0] # 执行搜索 print("正在执行相似度搜索...") top_answers = search_top_answers(target_embedding) # 格式化结果 print("正在格式化搜索结果...") latex_output = f"目标定理的 tag: {target_metadata['tag']}\nLaTeX 渲染结果: {target_metadata['latex']}" answer_output = "最吻合的前十个定理:\n" for i, ans in enumerate(top_answers, 1): answer_output += ( f"{i}. Formal Name: {ans['formal_name']}\n" f" Formal Statement: {ans['formal_statement']}\n" f" Informal Name: {ans['informal_name']}\n" f" Informal Statement: {ans['informal_statement']}\n" f" 相似度: {ans['similarity']:.4f}\n\n" ) print("搜索请求处理完成!") return latex_output, answer_output print("初始化 Gradio 界面...") with gr.Blocks(theme=gr.themes.Soft(), title="Mathlib4 Search") as demo: gr.Markdown("""
ReasLab Smart Search
""") with gr.Tab("Theorem Search"): label_input = gr.Textbox( placeholder="Example: quadratic_formula", label="Label", info="Enter a theorem label from embed.json" ) search_button = gr.Button("Search") latex_output = gr.Textbox(label="LaTeX and Tag", interactive=False) answer_output = gr.Textbox(label="Top Matching Theorems", interactive=False) search_button.click( fn=search_embed_and_answer, inputs=label_input, outputs=[latex_output, answer_output] ) print("启动 Gradio 服务...") try: demo.launch(server_name="0.0.0.0", server_port=9071, share=False,debug=True) print("Gradio 服务已启动!") except Exception as e: print(f"启动 Gradio 服务失败: {e}")