hadith_verse_finder

Sleeping

App Files Files Community

hadith_verse_finder / app.py

mhdhrubo

adding files

7c5cc2a verified 4 months ago

raw

history blame

1.75 kB

	import pickle
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from sklearn.neighbors import NearestNeighbors
	import gradio as gr

	# Load the embeddings from the file
	with open('embeddings.pkl', 'rb') as f:
	embeddings = pickle.load(f)

	# Initialize the Nearest Neighbors model with cosine similarity
	nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings)

	# Load the dataset
	df = pd.read_csv('quran_hadith.csv')

	# Initialize the SentenceTransformer model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	def semantic_search(query, model, embeddings, nbrs, k=10):
	# Encode the query
	query_embedding = model.encode([query])[0]

	# Find the k nearest neighbors
	distances, indices = nbrs.kneighbors([query_embedding])

	# Convert distances to percentages and round them to two decimal places
	# distances = [(1 - dist) * 100 for dist in distances[0]] # Cosine similarity as percentage
	# distances = [round(dist, 2) for dist in distances]

	# Return the k most similar sentences and their indices
	similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)]
	return similar_sentences

	# Gradio function
	def search_interface(query):
	similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10)
	results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences]
	return results

	# Create Gradio interface
	iface = gr.Interface(
	fn=search_interface,
	inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
	outputs=gr.JSON(label="Similar Sentences")
	)

	# Launch the interface
	iface.launch(share=True)