Spaces:
Sleeping
Sleeping
File size: 3,419 Bytes
f5d2489 6a1a97b 9208e17 b397dc0 6a1a97b 725f549 3efbf71 f65dc03 9208e17 6a1a97b 9208e17 3efbf71 9208e17 6a1a97b 9208e17 3efbf71 9208e17 3efbf71 6a1a97b 9f26a6c 3efbf71 91207a8 9208e17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from sentence_transformers import SentenceTransformer, util
import spacy
from fuzzywuzzy import fuzz
import gradio as gr
# Load models
model = SentenceTransformer('msmarco-distilbert-base-v4') # Use job-specific model
nlp = spacy.load("en_core_web_sm") # NER model
# Define target job-specific entities to look for in the job description and CV
TARGET_ENTITIES = ["SKILL", "EXPERIENCE", "DEGREE"]
def extract_entities(text):
doc = nlp(text)
entities = {
"skills": [],
"experience": [],
"education": []
}
for ent in doc.ents:
if ent.label_ == "SKILL":
entities["skills"].append(ent.text)
elif ent.label_ == "EXPERIENCE":
entities["experience"].append(ent.text)
elif ent.label_ == "DEGREE":
entities["education"].append(ent.text)
return entities
def fuzzy_match_keywords(cv_text, job_text, keywords):
match_score = 0
for keyword in keywords:
score = fuzz.partial_ratio(cv_text.lower(), keyword.lower())
match_score += score if score > 80 else 0 # Only consider high-confidence matches
return match_score / len(keywords) if keywords else 0
def match_cv_to_job(cv_text, job_description):
debug_info = "Debug Info:\n"
# Extract entities from CV and job description
cv_entities = extract_entities(cv_text)
job_entities = extract_entities(job_description)
# Compute fuzzy matching score for skills, experience, education keywords
fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, job_entities["skills"])
fuzzy_experience_score = fuzzy_match_keywords(cv_text, job_description, job_entities["experience"])
fuzzy_education_score = fuzzy_match_keywords(cv_text, job_description, job_entities["education"])
debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n"
debug_info += f"Fuzzy Experience Score: {fuzzy_experience_score:.2f}\n"
debug_info += f"Fuzzy Education Score: {fuzzy_education_score:.2f}\n"
# Calculate overall similarity score using embeddings
cv_embedding = model.encode(cv_text, convert_to_tensor=True)
job_embedding = model.encode(job_description, convert_to_tensor=True)
similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
# Combine scores with weights (embedding similarity + NER + fuzzy matching)
combined_score = (
similarity_score * 0.5 +
(fuzzy_skill_score / 100) * 0.2 +
(fuzzy_experience_score / 100) * 0.2 +
(fuzzy_education_score / 100) * 0.1
)
match_percentage = combined_score * 100
debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Enhanced CV and Job Description Matcher with NER and Fuzzy Matching")
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
match_button = gr.Button("Calculate Match Percentage")
output = gr.JSON(label="Match Result")
debug_output = gr.Textbox(label="Debug Info", lines=10)
match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output])
demo.launch()
|