innocent-charles commited on
Commit
50fdd96
1 Parent(s): 062d5b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -25
app.py CHANGED
@@ -1,33 +1,98 @@
 
1
  import gradio as gr
2
  from sentence_transformers import SentenceTransformer, util
3
 
4
- # Load the model
5
- model = SentenceTransformer('sartifyllc/AviLaBSE')
 
 
 
 
 
 
 
6
 
7
- # Function to compute similarities
8
- def compute_similarities(original_sentence, sentences_to_compare):
9
- # Encode the original sentence and the sentences to compare
10
- embeddings_original = model.encode([original_sentence])
11
- embeddings_sentences_to_compare = model.encode(sentences_to_compare)
12
 
13
- # Compute cosine similarities
14
- similarities = util.cos_sim(embeddings_original, embeddings_sentences_to_compare)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Prepare the results as a list of tuples
17
- results = [(sentence, similarities[0][i].item()) for i, sentence in enumerate(sentences_to_compare)]
18
- return results
 
 
 
 
 
19
 
20
- # Define the Gradio interface
21
- iface = gr.Interface(
22
- fn=compute_similarities,
23
- inputs=[
24
- gr.inputs.Textbox(lines=2, placeholder="Enter the original sentence here..."),
25
- gr.inputs.Textbox(lines=5, placeholder="Enter sentences to compare, separated by new lines...")
26
- ],
27
- outputs=gr.outputs.Dataframe(headers=["Sentence", "Similarity Score"]),
28
- title="Sentence Similarity Checker",
29
- description="Enter an original sentence and a list of sentences to compare. The app will compute and display similarity scores for each comparison."
30
- )
31
 
32
- # Launch the interface
33
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
  import gradio as gr
3
  from sentence_transformers import SentenceTransformer, util
4
 
5
+ # Available models
6
+ model_dict = {
7
+ "AviLaBSE" :"sartifyllc/AviLaBSE",
8
+ "MultiLinguSwahili-serengeti-E250-nli-matryoshka": "sartifyllc/MultiLinguSwahili-serengeti-E250-nli-matryoshka",
9
+ "MultiLinguSwahili-bert-base-sw-cased-nli-matryoshka": "Mollel/MultiLinguSwahili-bert-base-sw-cased-nli-matryoshka",
10
+ "swahili-paraphrase-multilingual-mpnet-base-v2-nli-matryoshka": "sartifyllc/swahili-paraphrase-multilingual-mpnet-base-v2-nli-matryoshka",
11
+ "bge-base-swahili-matryoshka":"sartifyllc/bge-base-swahili-matryoshka",
12
+ "MultiLinguSwahili-bge-small-en-v1.5-nli-matryoshka": "sartifyllc/MultiLinguSwahili-bge-small-en-v1.5-nli-matryoshka",
13
+ }
14
 
15
+ # Function to load the selected model
16
+ def load_model(model_name):
17
+ return SentenceTransformer(model_dict[model_name])
 
 
18
 
19
+ # Function to compute similarity and classify relationship
20
+ def predict(model_name, mode, sentence1, sentence2=None, sentence3=None, sentence4=None, dimension="64"):
21
+ model = load_model(model_name)
22
+ dimension = int(dimension)
23
+ result = {
24
+ "Selected Dimension": dimension,
25
+ "Input Sentences": {
26
+ "Sentence 1": sentence1,
27
+ "Sentence 2": sentence2,
28
+ "Sentence 3": sentence3,
29
+ "Sentence 4": sentence4
30
+ },
31
+ "Similarity Scores": {}
32
+ }
33
+
34
+ if mode == "Compare one to three":
35
+ if sentence2 is None or sentence3 is None or sentence4 is None:
36
+ return "Please provide three sentences for comparison.", {}
37
+ sentences = [sentence1, sentence2, sentence3, sentence4]
38
+ else:
39
+ if sentence2 is None:
40
+ return "Please provide the second sentence for comparison.", {}
41
+ sentences = [sentence1, sentence2]
42
+
43
+ embeddings = model.encode(sentences)
44
+ embeddings = embeddings[..., :dimension]
45
+
46
+ if mode == "Compare one to three":
47
+ similarities = util.cos_sim(embeddings[0], embeddings[1:])
48
+ similarity_scores = {f"Sentence {i+2}": float(similarities[0, i]) for i in range(3)}
49
+ result["Similarity Scores"] = similarity_scores
50
+ else:
51
+ similarity_score = util.cos_sim(embeddings[0], embeddings[1])
52
+ similarity_scores = {"Similarity Score": float(similarity_score)}
53
+ result["Similarity Scores"] = similarity_scores
54
+
55
+ # Word-level similarity
56
+ if mode == "Compare two sentences" and sentence2 is not None:
57
+ words1 = sentence1.split()
58
+ words2 = sentence2.split()
59
+ word_pairs = [(w1, w2) for w1 in words1 for w2 in words2]
60
+ word_embeddings1 = model.encode(words1)[..., :dimension]
61
+ word_embeddings2 = model.encode(words2)[..., :dimension]
62
+ word_similarities = {
63
+ f"{w1} - {w2}": float(util.cos_sim(we1, we2))
64
+ for (w1, we1) in zip(words1, word_embeddings1)
65
+ for (w2, we2) in zip(words2, word_embeddings2)
66
+ }
67
+ result["Word-level Similarities"] = word_similarities
68
+
69
+ return result
70
 
71
+ # Define inputs and outputs for Gradio interface
72
+ model_dropdown = gr.Dropdown(choices=list(model_dict.keys()), label="Model")
73
+ mode_dropdown = gr.Dropdown(choices=["Compare two sentences", "Compare one to three"], label="Mode")
74
+ dimension_dropdown = gr.Dropdown(choices=["768", "512", "256", "128", "64"], label="Embedding Dimension")
75
+ sentence1_input = gr.Textbox(lines=2, placeholder="Enter the first sentence here...", label="Sentence 1")
76
+ sentence2_input = gr.Textbox(lines=2, placeholder="Enter the second sentence here...", label="Sentence 2 (or first of three for mode)")
77
+ sentence3_input = gr.Textbox(lines=2, placeholder="Enter the third sentence here...", label="Sentence 3")
78
+ sentence4_input = gr.Textbox(lines=2, placeholder="Enter the fourth sentence here...", label="Sentence 4")
79
 
80
+ inputs = [model_dropdown, mode_dropdown, sentence1_input, sentence2_input, sentence3_input, sentence4_input, dimension_dropdown]
81
+ outputs = gr.JSON(label="Detailed Similarity Scores")
 
 
 
 
 
 
 
 
 
82
 
83
+ examples = [
84
+ ["MultiLinguSwahili-serengeti-E250-nli-matryoshka", "Compare one to three", "Mtoto mdogo anaruka mikononi mwa mwanamke aliyevalia suti nyeusi ya kuogelea akiwa kwenye dimbwi.", "Mtoto akiruka mikononi mwa mwanamke aliyevalia suti ya kuogelea kwenye dimbwi.", "Mama na binti wakinunua viatu.", "Mtu anashindana katika mashindano ya mbio.", "64"],
85
+ ["MultiLinguSwahili-serengeti-E250-nli-matryoshka", "Compare two sentences", "Mwanamume na mwanamke wachanga waliovaa mikoba wanaweka au kuondoa kitu kutoka kwenye mti mweupe wa zamani, huku watu wengine wamesimama au wameketi nyuma.", "tai huruka", None, None, "64"]
86
+ ]
87
+
88
+ # Create Gradio interface
89
+ gr.Interface(
90
+ fn=predict,
91
+ title="Swahili Sentence Similarity with Matryoshka Model",
92
+ description="Compute the semantic similarity between Swahili sentences using various SentenceTransformer models.",
93
+ inputs=inputs,
94
+ examples=examples,
95
+ outputs=outputs,
96
+ cache_examples=False,
97
+ article="Author: Michael Mollel. Model from Hugging Face Hub (sartify.com): [Swahili-Nli-Matryoshka](https://huggingface.co/sartifyllc/MultiLinguSwahili-serengeti-E250-nli-matryoshka)",
98
+ ).launch(debug=True, share=True)