Spaces:

saifeddinemk
/

cv_job

Sleeping

App Files Files Community

cv_job / app.py

saifeddinemk

Init Commit

abe3356 7 days ago

raw

history blame

3.23 kB

	from sentence_transformers import SentenceTransformer, util
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
	import gradio as gr

	# Load the SentenceTransformer model
	model = SentenceTransformer('msmarco-distilbert-base-v4')

	# Load Hugging Face NER model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
	ner_model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
	ner_pipeline = pipeline("ner", model=ner_model, tokenizer=tokenizer, aggregation_strategy="simple")

	# Define function to extract entities from text using the Hugging Face NER pipeline
	def extract_entities(text):
	entities = {"skills": [], "experience": [], "education": []}
	ner_results = ner_pipeline(text)
	for entity in ner_results:
	label = entity['entity_group']
	if "SKILL" in label:
	entities["skills"].append(entity['word'])
	elif "EXPERIENCE" in label or "JOB" in label:
	entities["experience"].append(entity['word'])
	elif "DEGREE" in label or "EDUCATION" in label:
	entities["education"].append(entity['word'])
	return entities

	def match_cv_to_job(cv_text, job_description):
	debug_info = "Debug Info:\n"

	# Extract entities from CV and job description
	cv_entities = extract_entities(cv_text)
	job_entities = extract_entities(job_description)

	# Calculate similarity score between entities
	match_score = 0
	for key in cv_entities:
	if key in job_entities:
	match_score += len(set(cv_entities[key]) & set(job_entities[key])) / len(set(job_entities[key])) if job_entities[key] else 0

	# Average score by number of categories
	ner_match_score = (match_score / 3) * 100 # Normalized score for NER entities
	debug_info += f"NER Match Score: {ner_match_score:.2f}%\n"

	# Calculate overall similarity score using embeddings
	cv_embedding = model.encode(cv_text, convert_to_tensor=True)
	job_embedding = model.encode(job_description, convert_to_tensor=True)
	similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()

	# Combine scores with weights (embedding similarity + NER matching)
	combined_score = (similarity_score * 0.7) + (ner_match_score / 100) * 0.3 # Weighted combined score
	match_percentage = combined_score * 100
	debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"

	return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info

	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# CV and Job Description Matcher with Embeddings and NER Matching")

	cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
	job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)

	match_button = gr.Button("Calculate Match Percentage")
	output = gr.JSON(label="Match Result")
	debug_output = gr.Textbox(label="Debug Info", lines=10)

	match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output])

	demo.launch()