saifeddinemk commited on
Commit
77fff65
1 Parent(s): e49a8dc

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -1,7 +1,9 @@
 
1
  from sentence_transformers import SentenceTransformer, util
2
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
  from fuzzywuzzy import fuzz
4
  import gradio as gr
 
5
 
6
  # Load the SentenceTransformer model for embeddings
7
  model = SentenceTransformer('fine_tuned_job_resume_similarity_model')
@@ -19,6 +21,14 @@ TARGET_KEYWORDS = [
19
  "problem-solving", "teamwork", "leadership", "technical", "planning", "operations"
20
  ]
21
 
 
 
 
 
 
 
 
 
22
  # Define function to dynamically extract entities into generalized categories
23
  def extract_entities(text):
24
  entities = {"qualifications": [], "responsibilities": [], "other": []}
@@ -42,7 +52,9 @@ def fuzzy_match_keywords(cv_text, job_text, keywords):
42
  match_score += score if score > 60 else 0 # Consider only high-confidence matches
43
  return match_score / len(keywords) if keywords else 0
44
 
45
- def match_cv_to_job(cv_text, job_description):
 
 
46
  debug_info = "Debug Info:\n"
47
 
48
  # Extract entities from CV and job description
@@ -76,8 +88,8 @@ def match_cv_to_job(cv_text, job_description):
76
  (ner_match_score / 100) * 0.3 + # NER-based entity match
77
  (fuzzy_keyword_score / 100) * 0.2 # Fuzzy matching for keywords
78
  )
79
- match_percentage = similarity_score * 100
80
- debug_info += f"Overall Match Percentage: {round(match_percentage):.2f}%\n"
81
 
82
  return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info
83
 
@@ -85,13 +97,13 @@ def match_cv_to_job(cv_text, job_description):
85
  with gr.Blocks() as demo:
86
  gr.Markdown("# CV and Job Description Matcher for All Industries with NER and Fuzzy Matching")
87
 
88
- cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
89
- job_description = gr.Textbox(label="Job Description", placeholder="Enter the entire job description text here", lines=10)
90
 
91
  match_button = gr.Button("Calculate Match Percentage")
92
  output = gr.JSON(label="Match Result")
93
  debug_output = gr.Textbox(label="Debug Info", lines=10)
94
 
95
- match_button.click(fn=match_cv_to_job, inputs=[cv_text, job_description], outputs=[output, debug_output])
96
 
97
  demo.launch()
 
1
+
2
  from sentence_transformers import SentenceTransformer, util
3
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
4
  from fuzzywuzzy import fuzz
5
  import gradio as gr
6
+ import fitz # PyMuPDF for PDF extraction
7
 
8
  # Load the SentenceTransformer model for embeddings
9
  model = SentenceTransformer('fine_tuned_job_resume_similarity_model')
 
21
  "problem-solving", "teamwork", "leadership", "technical", "planning", "operations"
22
  ]
23
 
24
+ # Function to extract text from PDF files
25
+ def extract_text_from_pdf(pdf_file):
26
+ text = ""
27
+ with fitz.open(pdf_file) as doc:
28
+ for page in doc:
29
+ text += page.get_text("text")
30
+ return text
31
+
32
  # Define function to dynamically extract entities into generalized categories
33
  def extract_entities(text):
34
  entities = {"qualifications": [], "responsibilities": [], "other": []}
 
52
  match_score += score if score > 60 else 0 # Consider only high-confidence matches
53
  return match_score / len(keywords) if keywords else 0
54
 
55
+ def match_cv_to_job(pdf_file, job_description):
56
+ # Extract text from PDF file
57
+ cv_text = extract_text_from_pdf(pdf_file)
58
  debug_info = "Debug Info:\n"
59
 
60
  # Extract entities from CV and job description
 
88
  (ner_match_score / 100) * 0.3 + # NER-based entity match
89
  (fuzzy_keyword_score / 100) * 0.2 # Fuzzy matching for keywords
90
  )
91
+ match_percentage = combined_score * 100
92
+ debug_info += f"Overall Match Percentage: {match_percentage:.2f}%\n"
93
 
94
  return {"Match Percentage": f"{match_percentage:.2f}%"}, debug_info
95
 
 
97
  with gr.Blocks() as demo:
98
  gr.Markdown("# CV and Job Description Matcher for All Industries with NER and Fuzzy Matching")
99
 
100
+ pdf_input = gr.File(label="Upload CV (PDF format)")
101
+ job_description = gr.Textbox(label="Job Description", placeholder="Enter the job description text here", lines=10)
102
 
103
  match_button = gr.Button("Calculate Match Percentage")
104
  output = gr.JSON(label="Match Result")
105
  debug_output = gr.Textbox(label="Debug Info", lines=10)
106
 
107
+ match_button.click(fn=match_cv_to_job, inputs=[pdf_input, job_description], outputs=[output, debug_output])
108
 
109
  demo.launch()