Spaces:

saifeddinemk
/

cv_job

Sleeping

App Files Files Community

cv_job / app.py

saifeddinemk

Init Commit

8f30f12 7 days ago

raw

history blame

4.15 kB

	from sentence_transformers import SentenceTransformer, util
	from transformers import pipeline
	import gradio as gr
	import nltk

	# Load the SentenceTransformer model for sentence similarity
	try:
	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	except Exception as e:
	print(f"Error loading SentenceTransformer model: {e}")

	# Load a summarization pipeline
	try:
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	except Exception as e:
	print(f"Error loading summarization pipeline: {e}")

	# Download NLTK punkt tokenizer if not already installed (run this once)
	nltk.download('punkt')

	def summarize_text(text, max_length=100, min_length=25):
	try:
	# Summarize the input text
	summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
	return summary[0]["summary_text"]
	except Exception as e:
	return f"Error summarizing text: {e}"

	def match_cv_to_jobs(cv_text, job_descriptions):
	debug_info = "Debug Info:\n"
	results = []

	# Summarize the CV text
	try:
	summarized_cv = summarize_text(cv_text, max_length=150)
	debug_info += f"Summarized CV Text: {summarized_cv}\n"
	except Exception as e:
	debug_info += f"Error summarizing CV text: {e}\n"
	return [], debug_info

	# Summarize the job description
	try:
	summarized_job_desc = summarize_text(job_descriptions, max_length=150)
	debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
	except Exception as e:
	debug_info += f"Error summarizing job descriptions: {e}\n"
	return [], debug_info

	# Encode the summarized CV text
	try:
	cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
	debug_info += f"CV Embedding: {cv_embedding}\n"
	except Exception as e:
	debug_info += f"Error encoding CV text: {e}\n"
	return [], debug_info

	# Split summarized job description into sentences
	try:
	description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
	except Exception as e:
	debug_info += f"Error tokenizing job description: {e}\n"
	return [], debug_info

	for sentence in description_sentences:
	try:
	# Encode each sentence from the summarized job description
	sentence_embedding = model.encode(sentence, convert_to_tensor=True)
	debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"

	# Compute similarity score
	similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
	debug_info += f"Similarity Score for sentence: {similarity_score}\n"

	results.append({
	"Job Description Sentence": sentence,
	"Similarity Score": similarity_score
	})
	except Exception as e:
	debug_info += f"Error processing sentence '{sentence}': {e}\n"
	continue

	# Sort results by similarity score in descending order
	try:
	results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
	except Exception as e:
	debug_info += f"Error sorting results: {e}\n"

	return results, debug_info

	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")

	# Input fields for CV and job descriptions
	cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
	job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)

	# Button and output area
	match_button = gr.Button("Match CV to Job Descriptions")
	output = gr.JSON(label="Match Results")
	debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info

	# Set button click to run the function
	match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])

	demo.launch()