saifeddinemk commited on
Commit
abe3356
1 Parent(s): 0e74d2d

Init Commit

Browse files
Files changed (1) hide show
  1. app.py +34 -18
app.py CHANGED
@@ -1,37 +1,53 @@
1
  from sentence_transformers import SentenceTransformer, util
2
- from fuzzywuzzy import fuzz
3
  import gradio as gr
4
 
5
  # Load the SentenceTransformer model
6
  model = SentenceTransformer('msmarco-distilbert-base-v4')
7
 
8
- # Define job-specific keywords manually
9
- TARGET_KEYWORDS = ["skill", "experience", "degree"]
 
 
10
 
11
- def fuzzy_match_keywords(cv_text, job_text, keywords):
12
- match_score = 0
13
- for keyword in keywords:
14
- score = fuzz.partial_ratio(cv_text.lower(), keyword.lower())
15
- match_score += score if score > 80 else 0 # Only consider high-confidence matches
16
- return match_score / len(keywords) if keywords else 0
 
 
 
 
 
 
 
17
 
18
  def match_cv_to_job(cv_text, job_description):
19
  debug_info = "Debug Info:\n"
20
 
21
- # Compute fuzzy matching score for manually defined keywords
22
- fuzzy_skill_score = fuzzy_match_keywords(cv_text, job_description, TARGET_KEYWORDS)
23
- debug_info += f"Fuzzy Skill Score: {fuzzy_skill_score:.2f}\n"
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Calculate overall similarity score using embeddings
26
  cv_embedding = model.encode(cv_text, convert_to_tensor=True)
27
  job_embedding = model.encode(job_description, convert_to_tensor=True)
28
  similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
29
 
30
- # Combine scores with weights (embedding similarity + fuzzy matching)
31
- combined_score = (
32
- similarity_score * 0.7 + # Higher weight for embedding similarity
33
- (fuzzy_skill_score / 100) * 0.3 # Fuzzy matching for keywords
34
- )
35
  match_percentage = combined_score * 100
36
  debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
37
 
@@ -39,7 +55,7 @@ def match_cv_to_job(cv_text, job_description):
39
 
40
  # Gradio interface
41
  with gr.Blocks() as demo:
42
- gr.Markdown("# CV and Job Description Matcher with Embeddings and Fuzzy Matching")
43
 
44
  cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
45
  job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
 
1
  from sentence_transformers import SentenceTransformer, util
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
  import gradio as gr
4
 
5
  # Load the SentenceTransformer model
6
  model = SentenceTransformer('msmarco-distilbert-base-v4')
7
 
8
+ # Load Hugging Face NER model and tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
10
+ ner_model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
11
+ ner_pipeline = pipeline("ner", model=ner_model, tokenizer=tokenizer, aggregation_strategy="simple")
12
 
13
+ # Define function to extract entities from text using the Hugging Face NER pipeline
14
+ def extract_entities(text):
15
+ entities = {"skills": [], "experience": [], "education": []}
16
+ ner_results = ner_pipeline(text)
17
+ for entity in ner_results:
18
+ label = entity['entity_group']
19
+ if "SKILL" in label:
20
+ entities["skills"].append(entity['word'])
21
+ elif "EXPERIENCE" in label or "JOB" in label:
22
+ entities["experience"].append(entity['word'])
23
+ elif "DEGREE" in label or "EDUCATION" in label:
24
+ entities["education"].append(entity['word'])
25
+ return entities
26
 
27
  def match_cv_to_job(cv_text, job_description):
28
  debug_info = "Debug Info:\n"
29
 
30
+ # Extract entities from CV and job description
31
+ cv_entities = extract_entities(cv_text)
32
+ job_entities = extract_entities(job_description)
33
+
34
+ # Calculate similarity score between entities
35
+ match_score = 0
36
+ for key in cv_entities:
37
+ if key in job_entities:
38
+ match_score += len(set(cv_entities[key]) & set(job_entities[key])) / len(set(job_entities[key])) if job_entities[key] else 0
39
+
40
+ # Average score by number of categories
41
+ ner_match_score = (match_score / 3) * 100 # Normalized score for NER entities
42
+ debug_info += f"NER Match Score: {ner_match_score:.2f}%\n"
43
 
44
  # Calculate overall similarity score using embeddings
45
  cv_embedding = model.encode(cv_text, convert_to_tensor=True)
46
  job_embedding = model.encode(job_description, convert_to_tensor=True)
47
  similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
48
 
49
+ # Combine scores with weights (embedding similarity + NER matching)
50
+ combined_score = (similarity_score * 0.7) + (ner_match_score / 100) * 0.3 # Weighted combined score
 
 
 
51
  match_percentage = combined_score * 100
52
  debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
53
 
 
55
 
56
  # Gradio interface
57
  with gr.Blocks() as demo:
58
+ gr.Markdown("# CV and Job Description Matcher with Embeddings and NER Matching")
59
 
60
  cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
61
  job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)