saifeddinemk commited on
Commit
289726b
1 Parent(s): 4a7388d

Init Commit

Browse files
Files changed (1) hide show
  1. app.py +65 -30
app.py CHANGED
@@ -1,56 +1,91 @@
1
  from sentence_transformers import SentenceTransformer, util
2
- from transformers import pipeline
3
  import gradio as gr
4
  import nltk
5
 
6
  # Load the SentenceTransformer model for sentence similarity
7
- model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
 
 
8
 
9
- # Load a summarization pipeline (you can use 'facebook/bart-large-cnn' or any other summarization model)
10
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
11
 
12
- # Download NLTK punkt tokenizer if not already installed (you may need to run this once)
13
  nltk.download('punkt')
14
 
15
  def summarize_text(text, max_length=100, min_length=25):
16
- # Summarize the input text
17
- summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
18
- return summary[0]["summary_text"]
 
 
 
 
 
19
 
20
  def match_cv_to_jobs(cv_text, job_descriptions):
21
  debug_info = "Debug Info:\n"
22
  results = []
23
 
24
- # Summarize the CV text and the job descriptions
25
- summarized_cv = summarize_text(cv_text, max_length=150)
26
- debug_info += f"Summarized CV Text: {summarized_cv}\n"
27
-
28
- summarized_job_desc = summarize_text(job_descriptions, max_length=150)
29
- debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
 
 
 
 
 
 
 
 
 
30
 
31
  # Encode the summarized CV text
32
- cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
33
- debug_info += f"CV Embedding: {cv_embedding}\n"
 
 
 
 
34
 
35
  # Split summarized job description into sentences
36
- description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
 
 
 
 
37
 
38
  for sentence in description_sentences:
39
- # Encode each sentence from the summarized job description
40
- sentence_embedding = model.encode(sentence, convert_to_tensor=True)
41
- debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
42
-
43
- # Compute similarity score
44
- similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
45
- debug_info += f"Similarity Score for sentence: {similarity_score}\n"
46
-
47
- results.append({
48
- "Job Description Sentence": sentence,
49
- "Similarity Score": similarity_score
50
- })
 
 
 
 
51
 
52
  # Sort results by similarity score in descending order
53
- results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
 
 
 
54
 
55
  return results, debug_info
56
 
 
1
  from sentence_transformers import SentenceTransformer, util
2
+ from transformers import pipeline, PipelineException
3
  import gradio as gr
4
  import nltk
5
 
6
  # Load the SentenceTransformer model for sentence similarity
7
+ try:
8
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
9
+ except Exception as e:
10
+ print(f"Error loading SentenceTransformer model: {e}")
11
 
12
+ # Load a summarization pipeline
13
+ try:
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+ except Exception as e:
16
+ print(f"Error loading summarization pipeline: {e}")
17
 
18
+ # Download NLTK punkt tokenizer if not already installed (run this once)
19
  nltk.download('punkt')
20
 
21
  def summarize_text(text, max_length=100, min_length=25):
22
+ try:
23
+ # Summarize the input text
24
+ summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
25
+ return summary[0]["summary_text"]
26
+ except PipelineException as e:
27
+ return f"Error summarizing text: {e}"
28
+ except Exception as e:
29
+ return f"Unexpected error during summarization: {e}"
30
 
31
  def match_cv_to_jobs(cv_text, job_descriptions):
32
  debug_info = "Debug Info:\n"
33
  results = []
34
 
35
+ # Summarize the CV text
36
+ try:
37
+ summarized_cv = summarize_text(cv_text, max_length=150)
38
+ debug_info += f"Summarized CV Text: {summarized_cv}\n"
39
+ except Exception as e:
40
+ debug_info += f"Error summarizing CV text: {e}\n"
41
+ return [], debug_info
42
+
43
+ # Summarize the job description
44
+ try:
45
+ summarized_job_desc = summarize_text(job_descriptions, max_length=150)
46
+ debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
47
+ except Exception as e:
48
+ debug_info += f"Error summarizing job descriptions: {e}\n"
49
+ return [], debug_info
50
 
51
  # Encode the summarized CV text
52
+ try:
53
+ cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
54
+ debug_info += f"CV Embedding: {cv_embedding}\n"
55
+ except Exception as e:
56
+ debug_info += f"Error encoding CV text: {e}\n"
57
+ return [], debug_info
58
 
59
  # Split summarized job description into sentences
60
+ try:
61
+ description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
62
+ except Exception as e:
63
+ debug_info += f"Error tokenizing job description: {e}\n"
64
+ return [], debug_info
65
 
66
  for sentence in description_sentences:
67
+ try:
68
+ # Encode each sentence from the summarized job description
69
+ sentence_embedding = model.encode(sentence, convert_to_tensor=True)
70
+ debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
71
+
72
+ # Compute similarity score
73
+ similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
74
+ debug_info += f"Similarity Score for sentence: {similarity_score}\n"
75
+
76
+ results.append({
77
+ "Job Description Sentence": sentence,
78
+ "Similarity Score": similarity_score
79
+ })
80
+ except Exception as e:
81
+ debug_info += f"Error processing sentence '{sentence}': {e}\n"
82
+ continue
83
 
84
  # Sort results by similarity score in descending order
85
+ try:
86
+ results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
87
+ except Exception as e:
88
+ debug_info += f"Error sorting results: {e}\n"
89
 
90
  return results, debug_info
91