Update app.py
Browse files
app.py
CHANGED
@@ -53,17 +53,11 @@ def load_data():
|
|
53 |
embeddings, patent_numbers, metadata, texts = load_data()
|
54 |
|
55 |
# Load BERT model for encoding search queries
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
print("Falling back to a general-purpose model.")
|
62 |
-
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
63 |
-
bert_model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
64 |
-
|
65 |
-
def encode_texts(texts):
|
66 |
-
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
|
67 |
with torch.no_grad():
|
68 |
outputs = bert_model(**inputs)
|
69 |
embeddings = outputs.last_hidden_state.mean(dim=1)
|
@@ -174,7 +168,7 @@ iface = gr.Interface(
|
|
174 |
fn=hybrid_search,
|
175 |
inputs=[
|
176 |
gr.Textbox(lines=2, placeholder="Enter your patent query here..."),
|
177 |
-
gr.Slider(minimum=1, maximum=20, step=1,
|
178 |
],
|
179 |
outputs=gr.Textbox(lines=10, label="Search Results"),
|
180 |
title="Patent Similarity Search",
|
|
|
53 |
embeddings, patent_numbers, metadata, texts = load_data()
|
54 |
|
55 |
# Load BERT model for encoding search queries
|
56 |
+
tokenizer = AutoTokenizer.from_pretrained('anferico/bert-for-patents')
|
57 |
+
bert_model = AutoModel.from_pretrained('anferico/bert-for-patents')
|
58 |
+
|
59 |
+
def encode_texts(texts, max_length=512):
|
60 |
+
inputs = tokenizer(texts, padding=True, truncation=True, max_length=max_length, return_tensors='pt')
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
with torch.no_grad():
|
62 |
outputs = bert_model(**inputs)
|
63 |
embeddings = outputs.last_hidden_state.mean(dim=1)
|
|
|
168 |
fn=hybrid_search,
|
169 |
inputs=[
|
170 |
gr.Textbox(lines=2, placeholder="Enter your patent query here..."),
|
171 |
+
gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Top K Results"),
|
172 |
],
|
173 |
outputs=gr.Textbox(lines=10, label="Search Results"),
|
174 |
title="Patent Similarity Search",
|