Spaces:
Sleeping
Sleeping
adding files
Browse files- .gitattributes +1 -0
- .gitignore +8 -0
- app.py +49 -0
- embeddings.pkl +3 -0
- quran_hadith.csv +3 -0
- requirements.txt +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
quran_hadith.csv filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
venv/
|
3 |
+
*.pyc
|
4 |
+
*.pyo
|
5 |
+
*.pyd
|
6 |
+
*.pyc
|
7 |
+
*.pyo
|
8 |
+
*.pyd
|
app.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import pandas as pd
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.neighbors import NearestNeighbors
|
5 |
+
import gradio as gr
|
6 |
+
|
7 |
+
# Load the embeddings from the file
|
8 |
+
with open('embeddings.pkl', 'rb') as f:
|
9 |
+
embeddings = pickle.load(f)
|
10 |
+
|
11 |
+
# Initialize the Nearest Neighbors model with cosine similarity
|
12 |
+
nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings)
|
13 |
+
|
14 |
+
# Load the dataset
|
15 |
+
df = pd.read_csv('quran_hadith.csv')
|
16 |
+
|
17 |
+
# Initialize the SentenceTransformer model
|
18 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
19 |
+
|
20 |
+
def semantic_search(query, model, embeddings, nbrs, k=10):
|
21 |
+
# Encode the query
|
22 |
+
query_embedding = model.encode([query])[0]
|
23 |
+
|
24 |
+
# Find the k nearest neighbors
|
25 |
+
distances, indices = nbrs.kneighbors([query_embedding])
|
26 |
+
|
27 |
+
# Convert distances to percentages and round them to two decimal places
|
28 |
+
# distances = [(1 - dist) * 100 for dist in distances[0]] # Cosine similarity as percentage
|
29 |
+
# distances = [round(dist, 2) for dist in distances]
|
30 |
+
|
31 |
+
# Return the k most similar sentences and their indices
|
32 |
+
similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)]
|
33 |
+
return similar_sentences
|
34 |
+
|
35 |
+
# Gradio function
|
36 |
+
def search_interface(query):
|
37 |
+
similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10)
|
38 |
+
results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences]
|
39 |
+
return results
|
40 |
+
|
41 |
+
# Create Gradio interface
|
42 |
+
iface = gr.Interface(
|
43 |
+
fn=search_interface,
|
44 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
|
45 |
+
outputs=gr.JSON(label="Similar Sentences")
|
46 |
+
)
|
47 |
+
|
48 |
+
# Launch the interface
|
49 |
+
iface.launch(share=True)
|
embeddings.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bea0ad0ae5e5cf9a73dad7706c32f651e6596cd5b025a5abd440ca5bde7e006a
|
3 |
+
size 40502947
|
quran_hadith.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6beddedddf73d2e8327e284a0eeed55820a246e6f99e19445c812027b5744cc5
|
3 |
+
size 10748559
|
requirements.txt
ADDED
Binary file (3.59 kB). View file
|
|