Spaces:
Sleeping
Sleeping
saifeddinemk
commited on
Commit
•
abe3356
1
Parent(s):
0e74d2d
Init Commit
Browse files
app.py
CHANGED
@@ -1,37 +1,53 @@
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
-
from
|
3 |
import gradio as gr
|
4 |
|
5 |
# Load the SentenceTransformer model
|
6 |
model = SentenceTransformer('msmarco-distilbert-base-v4')
|
7 |
|
8 |
-
#
|
9 |
-
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def match_cv_to_job(cv_text, job_description):
|
19 |
debug_info = "Debug Info:\n"
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Calculate overall similarity score using embeddings
|
26 |
cv_embedding = model.encode(cv_text, convert_to_tensor=True)
|
27 |
job_embedding = model.encode(job_description, convert_to_tensor=True)
|
28 |
similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
|
29 |
|
30 |
-
# Combine scores with weights (embedding similarity +
|
31 |
-
combined_score = (
|
32 |
-
similarity_score * 0.7 + # Higher weight for embedding similarity
|
33 |
-
(fuzzy_skill_score / 100) * 0.3 # Fuzzy matching for keywords
|
34 |
-
)
|
35 |
match_percentage = combined_score * 100
|
36 |
debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
|
37 |
|
@@ -39,7 +55,7 @@ def match_cv_to_job(cv_text, job_description):
|
|
39 |
|
40 |
# Gradio interface
|
41 |
with gr.Blocks() as demo:
|
42 |
-
gr.Markdown("# CV and Job Description Matcher with Embeddings and
|
43 |
|
44 |
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
|
45 |
job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
|
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
3 |
import gradio as gr
|
4 |
|
5 |
# Load the SentenceTransformer model
|
6 |
model = SentenceTransformer('msmarco-distilbert-base-v4')
|
7 |
|
8 |
+
# Load Hugging Face NER model and tokenizer
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
10 |
+
ner_model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
11 |
+
ner_pipeline = pipeline("ner", model=ner_model, tokenizer=tokenizer, aggregation_strategy="simple")
|
12 |
|
13 |
+
# Define function to extract entities from text using the Hugging Face NER pipeline
|
14 |
+
def extract_entities(text):
|
15 |
+
entities = {"skills": [], "experience": [], "education": []}
|
16 |
+
ner_results = ner_pipeline(text)
|
17 |
+
for entity in ner_results:
|
18 |
+
label = entity['entity_group']
|
19 |
+
if "SKILL" in label:
|
20 |
+
entities["skills"].append(entity['word'])
|
21 |
+
elif "EXPERIENCE" in label or "JOB" in label:
|
22 |
+
entities["experience"].append(entity['word'])
|
23 |
+
elif "DEGREE" in label or "EDUCATION" in label:
|
24 |
+
entities["education"].append(entity['word'])
|
25 |
+
return entities
|
26 |
|
27 |
def match_cv_to_job(cv_text, job_description):
|
28 |
debug_info = "Debug Info:\n"
|
29 |
|
30 |
+
# Extract entities from CV and job description
|
31 |
+
cv_entities = extract_entities(cv_text)
|
32 |
+
job_entities = extract_entities(job_description)
|
33 |
+
|
34 |
+
# Calculate similarity score between entities
|
35 |
+
match_score = 0
|
36 |
+
for key in cv_entities:
|
37 |
+
if key in job_entities:
|
38 |
+
match_score += len(set(cv_entities[key]) & set(job_entities[key])) / len(set(job_entities[key])) if job_entities[key] else 0
|
39 |
+
|
40 |
+
# Average score by number of categories
|
41 |
+
ner_match_score = (match_score / 3) * 100 # Normalized score for NER entities
|
42 |
+
debug_info += f"NER Match Score: {ner_match_score:.2f}%\n"
|
43 |
|
44 |
# Calculate overall similarity score using embeddings
|
45 |
cv_embedding = model.encode(cv_text, convert_to_tensor=True)
|
46 |
job_embedding = model.encode(job_description, convert_to_tensor=True)
|
47 |
similarity_score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
|
48 |
|
49 |
+
# Combine scores with weights (embedding similarity + NER matching)
|
50 |
+
combined_score = (similarity_score * 0.7) + (ner_match_score / 100) * 0.3 # Weighted combined score
|
|
|
|
|
|
|
51 |
match_percentage = combined_score * 100
|
52 |
debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
|
53 |
|
|
|
55 |
|
56 |
# Gradio interface
|
57 |
with gr.Blocks() as demo:
|
58 |
+
gr.Markdown("# CV and Job Description Matcher with Embeddings and NER Matching")
|
59 |
|
60 |
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
|
61 |
job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
|