Spaces:
Sleeping
Sleeping
import pickle | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from sklearn.neighbors import NearestNeighbors | |
import gradio as gr | |
# Load the embeddings from the file | |
with open('embeddings_hadith.pkl', 'rb') as f: | |
embeddings = pickle.load(f) | |
# Initialize the Nearest Neighbors model with cosine similarity | |
nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings) | |
# Load the dataset | |
df = pd.read_csv('hadith_combined.csv', delimiter='\t') | |
# Initialize the SentenceTransformer model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
def semantic_search(query, model, embeddings, nbrs): | |
# Encode the query | |
query_embedding = model.encode([query])[0] | |
# Find the k nearest neighbors | |
distances, indices = nbrs.kneighbors([query_embedding]) | |
# Return the k most similar sentences and their distances | |
similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances[0])] | |
return similar_sentences | |
def search_interface(query): | |
similar_sentences = semantic_search(query, model, embeddings, nbrs) | |
sentences = [sentence for sentence, distance in similar_sentences] | |
formatted_output = '\n\n'.join(sentences) # Join sentences with double newlines for separation | |
return formatted_output | |
pd.set_option('display.max_colwidth', None) | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=search_interface, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), | |
outputs=gr.Textbox(label="Similar Sentences") | |
) | |
# Launch the interface | |
iface.launch(share=True) | |