Spaces:
Sleeping
Sleeping
import pickle | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from sklearn.neighbors import NearestNeighbors | |
import gradio as gr | |
# Load the embeddings from the file | |
with open('embeddings.pkl', 'rb') as f: | |
embeddings = pickle.load(f) | |
# Initialize the Nearest Neighbors model with cosine similarity | |
nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings) | |
# Load the dataset | |
df = pd.read_csv('quran_hadith.csv') | |
# Initialize the SentenceTransformer model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
def semantic_search(query, model, embeddings, nbrs, k=10): | |
# Encode the query | |
query_embedding = model.encode([query])[0] | |
# Find the k nearest neighbors | |
distances, indices = nbrs.kneighbors([query_embedding]) | |
# Convert distances to percentages and round them to two decimal places | |
# distances = [(1 - dist) * 100 for dist in distances[0]] # Cosine similarity as percentage | |
# distances = [round(dist, 2) for dist in distances] | |
# Return the k most similar sentences and their indices | |
similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)] | |
return similar_sentences | |
# Gradio function | |
def search_interface(query): | |
similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10) | |
results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences] | |
return results | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=search_interface, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), | |
outputs=gr.JSON(label="Similar Sentences") | |
) | |
# Launch the interface | |
iface.launch(share=True) | |