Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer, util | |
import spacy | |
from fuzzywuzzy import fuzz | |
import gradio as gr | |
# Load models | |
model = SentenceTransformer('msmarco-distilbert-base-v4') # Use job-specific model | |
nlp = spacy.load("en_core_web_sm") # NER model | |
# Define target job-specific entities to look for in the job description and CV | |
TARGET_ENTITIES = ["SKILL", "EXPERIENCE", "DEGREE"] | |
def extract_entities(text): | |
doc = nlp(text) | |
entities = { | |
"skills": [], | |
"experience": [], | |
"education": [] | |
} | |
for ent in doc.ents: | |
if ent.label_ == "SKILL": | |
entities["skills"].append(ent.text) | |
elif ent.label_ == "EXPERIENCE": | |
entities["experience"].append(ent.text) | |
elif ent.label_ == "DEGREE": | |
entities["education"].append(ent.text) | |
return entities | |
def fuzzy_match_keywords(cv_text, job_text, keywords): | |
match_score = 0 | |
for keyword in keywords: | |
score = fuzz.partial_ratio(cv_text.lower(), keyword.lower()) | |
match_score += score if score > 80 else 0 # Only consider high-confidence matches | |
return match_score / len(keywords) if keywords else 0 | |
def match_cv_to_job(cv_text, job_description): | |
debug_info = "Debug Info:\n" | |
# Extract entities from CV and job description | |
cv_entities = extract_entities(cv_text) | |
job_entities = extract_entities(job_description) | |
# Compute fuzzy matching score for skills, experience, education keywords | |
fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, job_entities["skills"]) | |
fuzzy_experience_score = fuzzy_match_keywords(cv_text, job_description, job_entities["experience"]) | |
fuzzy_education_score = fuzzy_match_keywords(cv_text, job_description, job_entities["education"]) | |
debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n" | |
debug_info += f"Fuzzy Experience Score: {fuzzy_experience_score:.2f}\n" | |
debug_info += f"Fuzzy Education Score: {fuzzy_education_score:.2f}\n" | |
# Calculate overall similarity score using embeddings | |
cv_embedding = model.encode(cv_text, convert_to_tensor=True) | |
job_embedding = model.encode(job_description, convert_to_tensor=True) | |
similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item() | |
# Combine scores with weights (embedding similarity + NER + fuzzy matching) | |
combined_score = ( | |
similarity_score * 0.5 + | |
(fuzzy_skill_score / 100) * 0.2 + | |
(fuzzy_experience_score / 100) * 0.2 + | |
(fuzzy_education_score / 100) * 0.1 | |
) | |
match_percentage = combined_score * 100 | |
debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n" | |
return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Enhanced CV and Job Description Matcher with NER and Fuzzy Matching") | |
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10) | |
job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10) | |
match_button = gr.Button("Calculate Match Percentage") | |
output = gr.JSON(label="Match Result") | |
debug_output = gr.Textbox(label="Debug Info", lines=10) | |
match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output]) | |
demo.launch() | |