Spaces:

saifeddinemk
/

cv_job

Sleeping

File size: 3,419 Bytes

from sentence_transformers import SentenceTransformer, util
import spacy
from fuzzywuzzy import fuzz
import gradio as gr

# Load models
model = SentenceTransformer('msmarco-distilbert-base-v4')  # Use job-specific model
nlp = spacy.load("en_core_web_sm")  # NER model

# Define target job-specific entities to look for in the job description and CV
TARGET_ENTITIES = ["SKILL", "EXPERIENCE", "DEGREE"]

def extract_entities(text):
    doc = nlp(text)
    entities = {
        "skills": [],
        "experience": [],
        "education": []
    }
    for ent in doc.ents:
        if ent.label_ == "SKILL":
            entities["skills"].append(ent.text)
        elif ent.label_ == "EXPERIENCE":
            entities["experience"].append(ent.text)
        elif ent.label_ == "DEGREE":
            entities["education"].append(ent.text)
    return entities

def fuzzy_match_keywords(cv_text, job_text, keywords):
    match_score = 0
    for keyword in keywords:
        score = fuzz.partial_ratio(cv_text.lower(), keyword.lower())
        match_score += score if score > 80 else 0  # Only consider high-confidence matches
    return match_score / len(keywords) if keywords else 0

def match_cv_to_job(cv_text, job_description):
    debug_info = "Debug Info:\n"
    
    # Extract entities from CV and job description
    cv_entities = extract_entities(cv_text)
    job_entities = extract_entities(job_description)
    
    # Compute fuzzy matching score for skills, experience, education keywords
    fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, job_entities["skills"])
    fuzzy_experience_score = fuzzy_match_keywords(cv_text, job_description, job_entities["experience"])
    fuzzy_education_score = fuzzy_match_keywords(cv_text, job_description, job_entities["education"])
    debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n"
    debug_info += f"Fuzzy Experience Score: {fuzzy_experience_score:.2f}\n"
    debug_info += f"Fuzzy Education Score: {fuzzy_education_score:.2f}\n"
    
    # Calculate overall similarity score using embeddings
    cv_embedding = model.encode(cv_text, convert_to_tensor=True)
    job_embedding = model.encode(job_description, convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
    
    # Combine scores with weights (embedding similarity + NER + fuzzy matching)
    combined_score = (
        similarity_score * 0.5 +
        (fuzzy_skill_score / 100) * 0.2 +
        (fuzzy_experience_score / 100) * 0.2 +
        (fuzzy_education_score / 100) * 0.1
    )
    match_percentage = combined_score * 100
    debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
    
    return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Enhanced CV and Job Description Matcher with NER and Fuzzy Matching")
    
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
    
    match_button = gr.Button("Calculate Match Percentage")
    output = gr.JSON(label="Match Result")
    debug_output = gr.Textbox(label="Debug Info", lines=10)
    
    match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output])

demo.launch()