Spaces:
Sleeping
Sleeping
saifeddinemk
commited on
Commit
•
77fff65
1
Parent(s):
e49a8dc
Initial commit
Browse files
app.py
CHANGED
@@ -1,7 +1,9 @@
|
|
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
3 |
from fuzzywuzzy import fuzz
|
4 |
import gradio as gr
|
|
|
5 |
|
6 |
# Load the SentenceTransformer model for embeddings
|
7 |
model = SentenceTransformer('fine_tuned_job_resume_similarity_model')
|
@@ -19,6 +21,14 @@ TARGET_KEYWORDS = [
|
|
19 |
"problem-solving", "teamwork", "leadership", "technical", "planning", "operations"
|
20 |
]
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# Define function to dynamically extract entities into generalized categories
|
23 |
def extract_entities(text):
|
24 |
entities = {"qualifications": [], "responsibilities": [], "other": []}
|
@@ -42,7 +52,9 @@ def fuzzy_match_keywords(cv_text, job_text, keywords):
|
|
42 |
match_score += score if score > 60 else 0 # Consider only high-confidence matches
|
43 |
return match_score / len(keywords) if keywords else 0
|
44 |
|
45 |
-
def match_cv_to_job(
|
|
|
|
|
46 |
debug_info = "Debug Info:\n"
|
47 |
|
48 |
# Extract entities from CV and job description
|
@@ -76,8 +88,8 @@ def match_cv_to_job(cv_text, job_description):
|
|
76 |
(ner_match_score / 100) * 0.3 + # NER-based entity match
|
77 |
(fuzzy_keyword_score / 100) * 0.2 # Fuzzy matching for keywords
|
78 |
)
|
79 |
-
match_percentage =
|
80 |
-
debug_info += f"Overall Match Percentage: {
|
81 |
|
82 |
return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info
|
83 |
|
@@ -85,13 +97,13 @@ def match_cv_to_job(cv_text, job_description):
|
|
85 |
with gr.Blocks() as demo:
|
86 |
gr.Markdown("# CV and Job Description Matcher for All Industries with NER and Fuzzy Matching")
|
87 |
|
88 |
-
|
89 |
-
job_description = gr.Textbox(label="Job Description", placeholder="Enter the
|
90 |
|
91 |
match_button = gr.Button("Calculate Match Percentage")
|
92 |
output = gr.JSON(label="Match Result")
|
93 |
debug_output = gr.Textbox(label="Debug Info", lines=10)
|
94 |
|
95 |
-
match_button.click(fn=match_cv_to_job, inputs=[
|
96 |
|
97 |
demo.launch()
|
|
|
1 |
+
|
2 |
from sentence_transformers import SentenceTransformer, util
|
3 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
4 |
from fuzzywuzzy import fuzz
|
5 |
import gradio as gr
|
6 |
+
import fitz # PyMuPDF for PDF extraction
|
7 |
|
8 |
# Load the SentenceTransformer model for embeddings
|
9 |
model = SentenceTransformer('fine_tuned_job_resume_similarity_model')
|
|
|
21 |
"problem-solving", "teamwork", "leadership", "technical", "planning", "operations"
|
22 |
]
|
23 |
|
24 |
+
# Function to extract text from PDF files
|
25 |
+
def extract_text_from_pdf(pdf_file):
|
26 |
+
text = ""
|
27 |
+
with fitz.open(pdf_file) as doc:
|
28 |
+
for page in doc:
|
29 |
+
text += page.get_text("text")
|
30 |
+
return text
|
31 |
+
|
32 |
# Define function to dynamically extract entities into generalized categories
|
33 |
def extract_entities(text):
|
34 |
entities = {"qualifications": [], "responsibilities": [], "other": []}
|
|
|
52 |
match_score += score if score > 60 else 0 # Consider only high-confidence matches
|
53 |
return match_score / len(keywords) if keywords else 0
|
54 |
|
55 |
+
def match_cv_to_job(pdf_file, job_description):
|
56 |
+
# Extract text from PDF file
|
57 |
+
cv_text = extract_text_from_pdf(pdf_file)
|
58 |
debug_info = "Debug Info:\n"
|
59 |
|
60 |
# Extract entities from CV and job description
|
|
|
88 |
(ner_match_score / 100) * 0.3 + # NER-based entity match
|
89 |
(fuzzy_keyword_score / 100) * 0.2 # Fuzzy matching for keywords
|
90 |
)
|
91 |
+
match_percentage = combined_score * 100
|
92 |
+
debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
|
93 |
|
94 |
return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info
|
95 |
|
|
|
97 |
with gr.Blocks() as demo:
|
98 |
gr.Markdown("# CV and Job Description Matcher for All Industries with NER and Fuzzy Matching")
|
99 |
|
100 |
+
pdf_input = gr.File(label="Upload CV (PDF format)")
|
101 |
+
job_description = gr.Textbox(label="Job Description", placeholder="Enter the job description text here", lines=10)
|
102 |
|
103 |
match_button = gr.Button("Calculate Match Percentage")
|
104 |
output = gr.JSON(label="Match Result")
|
105 |
debug_output = gr.Textbox(label="Debug Info", lines=10)
|
106 |
|
107 |
+
match_button.click(fn=match_cv_to_job, inputs=[pdf_input, job_description], outputs=[output, debug_output])
|
108 |
|
109 |
demo.launch()
|