File size: 3,789 Bytes
7a358b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import chromadb
from typing import List, Dict

# ChromaDB 配置
client = chromadb.PersistentClient(path="chroma_data")

# 加载集合
embed_collection = client.get_collection(name="embed_data")
answer_collection = client.get_collection(name="answer_data")


def search_embed_and_answer(label: str):
    def search_top_answers(target_embedding: List[float], top_n: int = 10) -> List[Dict]:
        """使用ChromaDB进行相似度搜索"""
        print("正在执行相似度搜索...")
        results = answer_collection.query(
            query_embeddings=[target_embedding],
            n_results=top_n,
            include=["metadatas", "documents", "distances"]
        )
    
        # 处理结果
        answers = []
        for meta, doc, dist in zip(
            results['metadatas'][0],
            results['documents'][0],
            results['distances'][0]
        ):
            answers.append({
                "formal_name": meta['formal_name'],
                "formal_statement": meta['formal_statement'],
                "informal_name": meta['informal_name'],
                "informal_statement": doc,
                "similarity": 1 - dist
            })
        print("相似度搜索完成!")
        return answers
        
    """主搜索函数"""
    print("开始处理搜索请求...")
    
    # 在 embed 集合中查找目标条目
    print("正在查询目标定理的元数据...")
    results = embed_collection.query(
        query_texts=[label],
        n_results=1,
        include=["metadatas", "embeddings"]
    )
    
    if not results['ids'][0]:
        print(f"未找到 label 为 {label} 的定理。")
        return f"未找到 label 为 {label} 的定理。", ""
    
    # 提取目标条目
    print("正在提取目标定理的特征...")
    target_metadata = results['metadatas'][0][0]
    target_embedding = results['embeddings'][0][0]
    
    # 执行搜索
    print("正在执行相似度搜索...")
    top_answers = search_top_answers(target_embedding)
    
    # 格式化结果
    print("正在格式化搜索结果...")
    latex_output = f"目标定理的 tag: {target_metadata['tag']}\nLaTeX 渲染结果: {target_metadata['latex']}"
    
    answer_output = "最吻合的前十个定理:\n"
    for i, ans in enumerate(top_answers, 1):
        answer_output += (
            f"{i}. Formal Name: {ans['formal_name']}\n"
            f"   Formal Statement: {ans['formal_statement']}\n"
            f"   Informal Name: {ans['informal_name']}\n"
            f"   Informal Statement: {ans['informal_statement']}\n"
            f"   相似度: {ans['similarity']:.4f}\n\n"
        )
    
    print("搜索请求处理完成!")
    return latex_output, answer_output

print("初始化 Gradio 界面...")
with gr.Blocks(theme=gr.themes.Soft(), title="Mathlib4 Search") as demo:
    gr.Markdown("""<center><font size=8> ReasLab Smart Search</font></center>""")
        
    with gr.Tab("Theorem Search"):
        label_input = gr.Textbox(
            placeholder="Example: quadratic_formula", 
            label="Label",
            info="Enter a theorem label from embed.json"
        )
        search_button = gr.Button("Search")
        latex_output = gr.Textbox(label="LaTeX and Tag", interactive=False)
        answer_output = gr.Textbox(label="Top Matching Theorems", interactive=False)
            
        search_button.click(
            fn=search_embed_and_answer,
            inputs=label_input,
            outputs=[latex_output, answer_output]
        )
    
print("启动 Gradio 服务...")
try:
    demo.launch(server_name="0.0.0.0", server_port=9071, share=False,debug=True)
    print("Gradio 服务已启动!")
except Exception as e:
    print(f"启动 Gradio 服务失败: {e}")