adorkin commited on
Commit
c532148
·
verified ·
1 Parent(s): f42fe4d

Add demo files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +73 -0
  3. definitions.ann +3 -0
  4. definitions.safetensors +3 -0
  5. requirements.txt +9 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ definitions.ann filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import duckdb
3
+ from annoy import AnnoyIndex
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ model = SentenceTransformer("sentence-transformers/LaBSE")
7
+
8
+ annoy_index = AnnoyIndex(768, "angular")
9
+ annoy_index.load("definitions.ann")
10
+
11
+ conn = duckdb.connect("sonajaht.db")
12
+
13
+
14
+ def search_query(query, top_k=10):
15
+ query_vector = model.encode(query)
16
+
17
+ similar_item_ids, distances = annoy_index.get_nns_by_vector(
18
+ query_vector, top_k, include_distances=True
19
+ )
20
+
21
+ id_list = ", ".join(map(str, similar_item_ids))
22
+ sql_query = f"""
23
+ SELECT w.value AS word, d.value AS definition
24
+ FROM definitions d
25
+ JOIN words w ON d.word_id = w.word_id
26
+ WHERE d.entry_id IN ({id_list})
27
+ ORDER BY CASE d.entry_id
28
+ {' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])}
29
+ END
30
+ """
31
+
32
+ results = conn.execute(sql_query).fetchdf()
33
+
34
+ results["relevance_score"] = [1 - d for d in distances]
35
+
36
+ return results
37
+
38
+
39
+ # Example queries in natural language
40
+ examples = [
41
+ "väga vana mees",
42
+ "очень старый дед",
43
+ "un très vieil homme"
44
+ ]
45
+
46
+
47
+ def handle_example(example):
48
+ return example, search_query(example)
49
+
50
+
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("# Sõnajaht Demo")
53
+
54
+ query_input = gr.Textbox(label="Sisestage teie otsingupäring")
55
+ search_button = gr.Button("Otsi")
56
+
57
+ with gr.Row():
58
+ example_buttons = [gr.Button(example) for example in examples]
59
+
60
+ results_output = gr.Dataframe(label="Otsingutulemused")
61
+
62
+ search_button.click(search_query, inputs=query_input, outputs=results_output)
63
+
64
+ for button in example_buttons:
65
+ button.click(
66
+ handle_example,
67
+ inputs=gr.State(button.value),
68
+ outputs=[query_input, results_output],
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ demo.launch()
definitions.ann ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7951b3c11b358bf9e41dcbe4f6839a71a0230d72d062764ceaf2a8012b90bead
3
+ size 732357480
definitions.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d67e8b66660ba36bcf024d903c4242af299fc8e9d52dd617140e1a874dfae2
3
+ size 657254488
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ sentence-transformers
3
+ duckdb
4
+ annoy
5
+ tqdm
6
+ safetensors
7
+ numpy
8
+ torch
9
+ datasets