File size: 3,419 Bytes
f5d2489
6a1a97b
 
9208e17
b397dc0
6a1a97b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
725f549
3efbf71
f65dc03
9208e17
6a1a97b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9208e17
3efbf71
9208e17
 
 
6a1a97b
9208e17
 
3efbf71
9208e17
3efbf71
 
6a1a97b
9f26a6c
3efbf71
91207a8
9208e17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from sentence_transformers import SentenceTransformer, util
import spacy
from fuzzywuzzy import fuzz
import gradio as gr

# Load models
model = SentenceTransformer('msmarco-distilbert-base-v4')  # Use job-specific model
nlp = spacy.load("en_core_web_sm")  # NER model

# Define target job-specific entities to look for in the job description and CV
TARGET_ENTITIES = ["SKILL", "EXPERIENCE", "DEGREE"]

def extract_entities(text):
    doc = nlp(text)
    entities = {
        "skills": [],
        "experience": [],
        "education": []
    }
    for ent in doc.ents:
        if ent.label_ == "SKILL":
            entities["skills"].append(ent.text)
        elif ent.label_ == "EXPERIENCE":
            entities["experience"].append(ent.text)
        elif ent.label_ == "DEGREE":
            entities["education"].append(ent.text)
    return entities

def fuzzy_match_keywords(cv_text, job_text, keywords):
    match_score = 0
    for keyword in keywords:
        score = fuzz.partial_ratio(cv_text.lower(), keyword.lower())
        match_score += score if score > 80 else 0  # Only consider high-confidence matches
    return match_score / len(keywords) if keywords else 0

def match_cv_to_job(cv_text, job_description):
    debug_info = "Debug Info:\n"
    
    # Extract entities from CV and job description
    cv_entities = extract_entities(cv_text)
    job_entities = extract_entities(job_description)
    
    # Compute fuzzy matching score for skills, experience, education keywords
    fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, job_entities["skills"])
    fuzzy_experience_score = fuzzy_match_keywords(cv_text, job_description, job_entities["experience"])
    fuzzy_education_score = fuzzy_match_keywords(cv_text, job_description, job_entities["education"])
    debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n"
    debug_info += f"Fuzzy Experience Score: {fuzzy_experience_score:.2f}\n"
    debug_info += f"Fuzzy Education Score: {fuzzy_education_score:.2f}\n"
    
    # Calculate overall similarity score using embeddings
    cv_embedding = model.encode(cv_text, convert_to_tensor=True)
    job_embedding = model.encode(job_description, convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
    
    # Combine scores with weights (embedding similarity + NER + fuzzy matching)
    combined_score = (
        similarity_score * 0.5 +
        (fuzzy_skill_score / 100) * 0.2 +
        (fuzzy_experience_score / 100) * 0.2 +
        (fuzzy_education_score / 100) * 0.1
    )
    match_percentage = combined_score * 100
    debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
    
    return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Enhanced CV and Job Description Matcher with NER and Fuzzy Matching")
    
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
    
    match_button = gr.Button("Calculate Match Percentage")
    output = gr.JSON(label="Match Result")
    debug_output = gr.Textbox(label="Debug Info", lines=10)
    
    match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output])

demo.launch()