saifeddinemk commited on
Commit
ce96780
1 Parent(s): 8f30f12

Init Commit

Browse files
Files changed (1) hide show
  1. app.py +6 -37
app.py CHANGED
@@ -1,5 +1,4 @@
1
  from sentence_transformers import SentenceTransformer, util
2
- from transformers import pipeline
3
  import gradio as gr
4
  import nltk
5
 
@@ -9,61 +8,31 @@ try:
9
  except Exception as e:
10
  print(f"Error loading SentenceTransformer model: {e}")
11
 
12
- # Load a summarization pipeline
13
- try:
14
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
- except Exception as e:
16
- print(f"Error loading summarization pipeline: {e}")
17
-
18
  # Download NLTK punkt tokenizer if not already installed (run this once)
19
  nltk.download('punkt')
20
 
21
- def summarize_text(text, max_length=100, min_length=25):
22
- try:
23
- # Summarize the input text
24
- summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
25
- return summary[0]["summary_text"]
26
- except Exception as e:
27
- return f"Error summarizing text: {e}"
28
-
29
  def match_cv_to_jobs(cv_text, job_descriptions):
30
  debug_info = "Debug Info:\n"
31
  results = []
32
 
33
- # Summarize the CV text
34
- try:
35
- summarized_cv = summarize_text(cv_text, max_length=150)
36
- debug_info += f"Summarized CV Text: {summarized_cv}\n"
37
- except Exception as e:
38
- debug_info += f"Error summarizing CV text: {e}\n"
39
- return [], debug_info
40
-
41
- # Summarize the job description
42
- try:
43
- summarized_job_desc = summarize_text(job_descriptions, max_length=150)
44
- debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
45
- except Exception as e:
46
- debug_info += f"Error summarizing job descriptions: {e}\n"
47
- return [], debug_info
48
-
49
- # Encode the summarized CV text
50
  try:
51
- cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
52
  debug_info += f"CV Embedding: {cv_embedding}\n"
53
  except Exception as e:
54
  debug_info += f"Error encoding CV text: {e}\n"
55
  return [], debug_info
56
 
57
- # Split summarized job description into sentences
58
  try:
59
- description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
60
  except Exception as e:
61
  debug_info += f"Error tokenizing job description: {e}\n"
62
  return [], debug_info
63
 
64
  for sentence in description_sentences:
65
  try:
66
- # Encode each sentence from the summarized job description
67
  sentence_embedding = model.encode(sentence, convert_to_tensor=True)
68
  debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
69
 
@@ -89,7 +58,7 @@ def match_cv_to_jobs(cv_text, job_descriptions):
89
 
90
  # Gradio interface
91
  with gr.Blocks() as demo:
92
- gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")
93
 
94
  # Input fields for CV and job descriptions
95
  cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
 
1
  from sentence_transformers import SentenceTransformer, util
 
2
  import gradio as gr
3
  import nltk
4
 
 
8
  except Exception as e:
9
  print(f"Error loading SentenceTransformer model: {e}")
10
 
 
 
 
 
 
 
11
  # Download NLTK punkt tokenizer if not already installed (run this once)
12
  nltk.download('punkt')
13
 
 
 
 
 
 
 
 
 
14
  def match_cv_to_jobs(cv_text, job_descriptions):
15
  debug_info = "Debug Info:\n"
16
  results = []
17
 
18
+ # Encode the CV text directly without summarization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  try:
20
+ cv_embedding = model.encode(cv_text, convert_to_tensor=True)
21
  debug_info += f"CV Embedding: {cv_embedding}\n"
22
  except Exception as e:
23
  debug_info += f"Error encoding CV text: {e}\n"
24
  return [], debug_info
25
 
26
+ # Split job description into sentences
27
  try:
28
+ description_sentences = nltk.tokenize.sent_tokenize(job_descriptions)
29
  except Exception as e:
30
  debug_info += f"Error tokenizing job description: {e}\n"
31
  return [], debug_info
32
 
33
  for sentence in description_sentences:
34
  try:
35
+ # Encode each sentence from the job description
36
  sentence_embedding = model.encode(sentence, convert_to_tensor=True)
37
  debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
38
 
 
58
 
59
  # Gradio interface
60
  with gr.Blocks() as demo:
61
+ gr.Markdown("# CV and Job Description Matcher with Sentence Similarity")
62
 
63
  # Input fields for CV and job descriptions
64
  cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)