mhdhrubo's picture
adding files
e6b521c
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
import gradio as gr
# Load the embeddings from the file
with open('embeddings_hadith.pkl', 'rb') as f:
embeddings = pickle.load(f)
# Initialize the Nearest Neighbors model with cosine similarity
nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings)
# Load the dataset
df = pd.read_csv('hadith_combined.csv', delimiter='\t')
# Initialize the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
def semantic_search(query, model, embeddings, nbrs):
# Encode the query
query_embedding = model.encode([query])[0]
# Find the k nearest neighbors
distances, indices = nbrs.kneighbors([query_embedding])
# Return the k most similar sentences and their distances
similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances[0])]
return similar_sentences
def search_interface(query):
similar_sentences = semantic_search(query, model, embeddings, nbrs)
sentences = [sentence for sentence, distance in similar_sentences]
formatted_output = '\n\n'.join(sentences) # Join sentences with double newlines for separation
return formatted_output
pd.set_option('display.max_colwidth', None)
# Create Gradio interface
iface = gr.Interface(
fn=search_interface,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs=gr.Textbox(label="Similar Sentences")
)
# Launch the interface
iface.launch(share=True)