from sentence_transformers import SentenceTransformer, util import spacy from fuzzywuzzy import fuzz import gradio as gr # Load models model = SentenceTransformer('msmarco-distilbert-base-v4') # Use job-specific model nlp = spacy.load("en_core_web_sm") # NER model # Define target job-specific entities to look for in the job description and CV TARGET_ENTITIES = ["SKILL", "EXPERIENCE", "DEGREE"] def extract_entities(text): doc = nlp(text) entities = { "skills": [], "experience": [], "education": [] } for ent in doc.ents: if ent.label_ == "SKILL": entities["skills"].append(ent.text) elif ent.label_ == "EXPERIENCE": entities["experience"].append(ent.text) elif ent.label_ == "DEGREE": entities["education"].append(ent.text) return entities def fuzzy_match_keywords(cv_text, job_text, keywords): match_score = 0 for keyword in keywords: score = fuzz.partial_ratio(cv_text.lower(), keyword.lower()) match_score += score if score > 80 else 0 # Only consider high-confidence matches return match_score / len(keywords) if keywords else 0 def match_cv_to_job(cv_text, job_description): debug_info = "Debug Info:\n" # Extract entities from CV and job description cv_entities = extract_entities(cv_text) job_entities = extract_entities(job_description) # Compute fuzzy matching score for skills, experience, education keywords fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, job_entities["skills"]) fuzzy_experience_score = fuzzy_match_keywords(cv_text, job_description, job_entities["experience"]) fuzzy_education_score = fuzzy_match_keywords(cv_text, job_description, job_entities["education"]) debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n" debug_info += f"Fuzzy Experience Score: {fuzzy_experience_score:.2f}\n" debug_info += f"Fuzzy Education Score: {fuzzy_education_score:.2f}\n" # Calculate overall similarity score using embeddings cv_embedding = model.encode(cv_text, convert_to_tensor=True) job_embedding = model.encode(job_description, convert_to_tensor=True) similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item() # Combine scores with weights (embedding similarity + NER + fuzzy matching) combined_score = ( similarity_score * 0.5 + (fuzzy_skill_score / 100) * 0.2 + (fuzzy_experience_score / 100) * 0.2 + (fuzzy_education_score / 100) * 0.1 ) match_percentage = combined_score * 100 debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n" return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info # Gradio interface with gr.Blocks() as demo: gr.Markdown("# Enhanced CV and Job Description Matcher with NER and Fuzzy Matching") cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10) job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10) match_button = gr.Button("Calculate Match Percentage") output = gr.JSON(label="Match Result") debug_output = gr.Textbox(label="Debug Info", lines=10) match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output]) demo.launch()