hadith_verse_finder

Sleeping

hadith_verse_finder / app.py

adding files

e6b521c about 2 months ago

1.59 kB

	import pickle
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from sklearn.neighbors import NearestNeighbors
	import gradio as gr

	# Load the embeddings from the file
	with open('embeddings_hadith.pkl', 'rb') as f:
	embeddings = pickle.load(f)

	# Initialize the Nearest Neighbors model with cosine similarity
	nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings)

	# Load the dataset
	df = pd.read_csv('hadith_combined.csv', delimiter='\t')

	# Initialize the SentenceTransformer model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	def semantic_search(query, model, embeddings, nbrs):
	# Encode the query
	query_embedding = model.encode([query])[0]

	# Find the k nearest neighbors
	distances, indices = nbrs.kneighbors([query_embedding])

	# Return the k most similar sentences and their distances
	similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances[0])]
	return similar_sentences

	def search_interface(query):
	similar_sentences = semantic_search(query, model, embeddings, nbrs)
	sentences = [sentence for sentence, distance in similar_sentences]
	formatted_output = '\n\n'.join(sentences) # Join sentences with double newlines for separation
	return formatted_output

	pd.set_option('display.max_colwidth', None)


	# Create Gradio interface
	iface = gr.Interface(
	fn=search_interface,
	inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
	outputs=gr.Textbox(label="Similar Sentences")
	)

	# Launch the interface
	iface.launch(share=True)