Spaces:
Sleeping
Sleeping
Add demo files
Browse files- .gitattributes +1 -0
- app.py +73 -0
- definitions.ann +3 -0
- definitions.safetensors +3 -0
- requirements.txt +9 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
definitions.ann filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import duckdb
|
3 |
+
from annoy import AnnoyIndex
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
|
6 |
+
model = SentenceTransformer("sentence-transformers/LaBSE")
|
7 |
+
|
8 |
+
annoy_index = AnnoyIndex(768, "angular")
|
9 |
+
annoy_index.load("definitions.ann")
|
10 |
+
|
11 |
+
conn = duckdb.connect("sonajaht.db")
|
12 |
+
|
13 |
+
|
14 |
+
def search_query(query, top_k=10):
|
15 |
+
query_vector = model.encode(query)
|
16 |
+
|
17 |
+
similar_item_ids, distances = annoy_index.get_nns_by_vector(
|
18 |
+
query_vector, top_k, include_distances=True
|
19 |
+
)
|
20 |
+
|
21 |
+
id_list = ", ".join(map(str, similar_item_ids))
|
22 |
+
sql_query = f"""
|
23 |
+
SELECT w.value AS word, d.value AS definition
|
24 |
+
FROM definitions d
|
25 |
+
JOIN words w ON d.word_id = w.word_id
|
26 |
+
WHERE d.entry_id IN ({id_list})
|
27 |
+
ORDER BY CASE d.entry_id
|
28 |
+
{' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])}
|
29 |
+
END
|
30 |
+
"""
|
31 |
+
|
32 |
+
results = conn.execute(sql_query).fetchdf()
|
33 |
+
|
34 |
+
results["relevance_score"] = [1 - d for d in distances]
|
35 |
+
|
36 |
+
return results
|
37 |
+
|
38 |
+
|
39 |
+
# Example queries in natural language
|
40 |
+
examples = [
|
41 |
+
"väga vana mees",
|
42 |
+
"очень старый дед",
|
43 |
+
"un très vieil homme"
|
44 |
+
]
|
45 |
+
|
46 |
+
|
47 |
+
def handle_example(example):
|
48 |
+
return example, search_query(example)
|
49 |
+
|
50 |
+
|
51 |
+
with gr.Blocks() as demo:
|
52 |
+
gr.Markdown("# Sõnajaht Demo")
|
53 |
+
|
54 |
+
query_input = gr.Textbox(label="Sisestage teie otsingupäring")
|
55 |
+
search_button = gr.Button("Otsi")
|
56 |
+
|
57 |
+
with gr.Row():
|
58 |
+
example_buttons = [gr.Button(example) for example in examples]
|
59 |
+
|
60 |
+
results_output = gr.Dataframe(label="Otsingutulemused")
|
61 |
+
|
62 |
+
search_button.click(search_query, inputs=query_input, outputs=results_output)
|
63 |
+
|
64 |
+
for button in example_buttons:
|
65 |
+
button.click(
|
66 |
+
handle_example,
|
67 |
+
inputs=gr.State(button.value),
|
68 |
+
outputs=[query_input, results_output],
|
69 |
+
)
|
70 |
+
|
71 |
+
|
72 |
+
if __name__ == "__main__":
|
73 |
+
demo.launch()
|
definitions.ann
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7951b3c11b358bf9e41dcbe4f6839a71a0230d72d062764ceaf2a8012b90bead
|
3 |
+
size 732357480
|
definitions.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1d67e8b66660ba36bcf024d903c4242af299fc8e9d52dd617140e1a874dfae2
|
3 |
+
size 657254488
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
sentence-transformers
|
3 |
+
duckdb
|
4 |
+
annoy
|
5 |
+
tqdm
|
6 |
+
safetensors
|
7 |
+
numpy
|
8 |
+
torch
|
9 |
+
datasets
|