Spaces:

saifeddinemk
/

cv_job

Sleeping

App Files Files Community

saifeddinemk commited on 7 days ago

Commit

abe3356

•

1 Parent(s): 0e74d2d

Init Commit

Browse files

Files changed (1) hide show

app.py +34 -18

app.py CHANGED Viewed

@@ -1,37 +1,53 @@
 from sentence_transformers import SentenceTransformer, util
-from fuzzywuzzy import fuzz
 import gradio as gr
 # Load the SentenceTransformer model
 model = SentenceTransformer('msmarco-distilbert-base-v4')
-# Define job-specific keywords manually
-TARGET_KEYWORDS = ["skill", "experience", "degree"]
-def fuzzy_match_keywords(cv_text, job_text, keywords):
-    match_score = 0
-    for keyword in keywords:
-        score = fuzz.partial_ratio(cv_text.lower(), keyword.lower())
-        match_score += score if score > 80 else 0  # Only consider high-confidence matches
-    return match_score / len(keywords) if keywords else 0
 def match_cv_to_job(cv_text, job_description):
     debug_info = "Debug Info:\n"
-    # Compute fuzzy matching score for manually defined keywords
-    fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, TARGET_KEYWORDS)
-    debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n"
     # Calculate overall similarity score using embeddings
     cv_embedding = model.encode(cv_text, convert_to_tensor=True)
     job_embedding = model.encode(job_description, convert_to_tensor=True)
     similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
-    # Combine scores with weights (embedding similarity + fuzzy matching)
-    combined_score = (
-        similarity_score * 0.7 +  # Higher weight for embedding similarity
-        (fuzzy_skill_score / 100) * 0.3  # Fuzzy matching for keywords
-    )
     match_percentage = combined_score * 100
     debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
@@ -39,7 +55,7 @@ def match_cv_to_job(cv_text, job_description):
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# CV and Job Description Matcher with Embeddings and Fuzzy Matching")
     cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
     job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)

 from sentence_transformers import SentenceTransformer, util
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 import gradio as gr
 # Load the SentenceTransformer model
 model = SentenceTransformer('msmarco-distilbert-base-v4')
+# Load Hugging Face NER model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
+ner_model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
+ner_pipeline = pipeline("ner", model=ner_model, tokenizer=tokenizer, aggregation_strategy="simple")
+# Define function to extract entities from text using the Hugging Face NER pipeline
+def extract_entities(text):
+    entities = {"skills": [], "experience": [], "education": []}
+    ner_results = ner_pipeline(text)
+    for entity in ner_results:
+        label = entity['entity_group']
+        if "SKILL" in label:
+            entities["skills"].append(entity['word'])
+        elif "EXPERIENCE" in label or "JOB" in label:
+            entities["experience"].append(entity['word'])
+        elif "DEGREE" in label or "EDUCATION" in label:
+            entities["education"].append(entity['word'])
+    return entities
 def match_cv_to_job(cv_text, job_description):
     debug_info = "Debug Info:\n"
+    # Extract entities from CV and job description
+    cv_entities = extract_entities(cv_text)
+    job_entities = extract_entities(job_description)
+    # Calculate similarity score between entities
+    match_score = 0
+    for key in cv_entities:
+        if key in job_entities:
+            match_score += len(set(cv_entities[key]) & set(job_entities[key])) / len(set(job_entities[key])) if job_entities[key] else 0
+    # Average score by number of categories
+    ner_match_score = (match_score / 3) * 100  # Normalized score for NER entities
+    debug_info += f"NER Match Score: {ner_match_score:.2f}%\n"
     # Calculate overall similarity score using embeddings
     cv_embedding = model.encode(cv_text, convert_to_tensor=True)
     job_embedding = model.encode(job_description, convert_to_tensor=True)
     similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
+    # Combine scores with weights (embedding similarity + NER matching)
+    combined_score = (similarity_score * 0.7) + (ner_match_score / 100) * 0.3  # Weighted combined score
     match_percentage = combined_score * 100
     debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# CV and Job Description Matcher with Embeddings and NER Matching")
     cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
     job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)