bhlewis commited on
Commit
663d1ad
1 Parent(s): 4622c28

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import h5py
4
+ import faiss
5
+ import json
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ # Load embeddings and metadata
9
+ def load_data():
10
+ with h5py.File('patent_embeddings.h5', 'r') as f:
11
+ embeddings = f['embeddings'][:]
12
+ patent_numbers = f['patent_numbers'][:]
13
+
14
+ metadata = {}
15
+ with open('patent_metadata.jsonl', 'r') as f:
16
+ for line in f:
17
+ data = json.loads(line)
18
+ metadata[data['patent_number']] = data
19
+
20
+ return embeddings, patent_numbers, metadata
21
+
22
+ embeddings, patent_numbers, metadata = load_data()
23
+
24
+ # Create FAISS index
25
+ index = faiss.IndexFlatL2(embeddings.shape[1])
26
+ index.add(embeddings)
27
+
28
+ # Load BERT model for encoding search queries
29
+ model = SentenceTransformer('all-MiniLM-L6-v2')
30
+
31
+ def search(query, top_k=5):
32
+ # Encode the query
33
+ query_embedding = model.encode([query])[0]
34
+
35
+ # Perform similarity search
36
+ distances, indices = index.search(np.array([query_embedding]), top_k)
37
+
38
+ results = []
39
+ for i, idx in enumerate(indices[0]):
40
+ patent_number = patent_numbers[idx]
41
+ patent_data = metadata[patent_number]
42
+ result = f"Patent Number: {patent_number}\n"
43
+ result += f"Abstract: {patent_data['abstract'][:200]}...\n"
44
+ result += f"Similarity Score: {1 - distances[0][i]:.4f}\n\n"
45
+ results.append(result)
46
+
47
+ return "\n".join(results)
48
+
49
+ # Create Gradio interface
50
+ iface = gr.Interface(
51
+ fn=search,
52
+ inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."),
53
+ outputs=gr.Textbox(lines=10, label="Search Results"),
54
+ title="Patent Similarity Search",
55
+ description="Enter a query to find similar patents based on their embeddings."
56
+ )
57
+
58
+ iface.launch()