Spaces:

saifeddinemk
/

cv_job

Sleeping

App Files Files Community

saifeddinemk commited on Nov 11, 2024

Commit

289726b

1 Parent(s): 4a7388d

Init Commit

Browse files

Files changed (1) hide show

app.py +65 -30

app.py CHANGED Viewed

@@ -1,56 +1,91 @@
 from sentence_transformers import SentenceTransformer, util
-from transformers import pipeline
 import gradio as gr
 import nltk
 # Load the SentenceTransformer model for sentence similarity
-model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-# Load a summarization pipeline (you can use 'facebook/bart-large-cnn' or any other summarization model)
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-# Download NLTK punkt tokenizer if not already installed (you may need to run this once)
 nltk.download('punkt')
 def summarize_text(text, max_length=100, min_length=25):
-    # Summarize the input text
-    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
-    return summary[0]["summary_text"]
 def match_cv_to_jobs(cv_text, job_descriptions):
     debug_info = "Debug Info:\n"
     results = []
-    # Summarize the CV text and the job descriptions
-    summarized_cv = summarize_text(cv_text, max_length=150)
-    debug_info += f"Summarized CV Text: {summarized_cv}\n"
-    summarized_job_desc = summarize_text(job_descriptions, max_length=150)
-    debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
     # Encode the summarized CV text
-    cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
-    debug_info += f"CV Embedding: {cv_embedding}\n"
     # Split summarized job description into sentences
-    description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
     for sentence in description_sentences:
-        # Encode each sentence from the summarized job description
-        sentence_embedding = model.encode(sentence, convert_to_tensor=True)
-        debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
-        # Compute similarity score
-        similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
-        debug_info += f"Similarity Score for sentence: {similarity_score}\n"
-        results.append({
-            "Job Description Sentence": sentence,
-            "Similarity Score": similarity_score
-        })
     # Sort results by similarity score in descending order
-    results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
     return results, debug_info

 from sentence_transformers import SentenceTransformer, util
+from transformers import pipeline, PipelineException
 import gradio as gr
 import nltk
 # Load the SentenceTransformer model for sentence similarity
+try:
+    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+except Exception as e:
+    print(f"Error loading SentenceTransformer model: {e}")
+# Load a summarization pipeline
+try:
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+except Exception as e:
+    print(f"Error loading summarization pipeline: {e}")
+# Download NLTK punkt tokenizer if not already installed (run this once)
 nltk.download('punkt')
 def summarize_text(text, max_length=100, min_length=25):
+    try:
+        # Summarize the input text
+        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
+        return summary[0]["summary_text"]
+    except PipelineException as e:
+        return f"Error summarizing text: {e}"
+    except Exception as e:
+        return f"Unexpected error during summarization: {e}"
 def match_cv_to_jobs(cv_text, job_descriptions):
     debug_info = "Debug Info:\n"
     results = []
+    # Summarize the CV text
+    try:
+        summarized_cv = summarize_text(cv_text, max_length=150)
+        debug_info += f"Summarized CV Text: {summarized_cv}\n"
+    except Exception as e:
+        debug_info += f"Error summarizing CV text: {e}\n"
+        return [], debug_info
+    # Summarize the job description
+    try:
+        summarized_job_desc = summarize_text(job_descriptions, max_length=150)
+        debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
+    except Exception as e:
+        debug_info += f"Error summarizing job descriptions: {e}\n"
+        return [], debug_info
     # Encode the summarized CV text
+    try:
+        cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
+        debug_info += f"CV Embedding: {cv_embedding}\n"
+    except Exception as e:
+        debug_info += f"Error encoding CV text: {e}\n"
+        return [], debug_info
     # Split summarized job description into sentences
+    try:
+        description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
+    except Exception as e:
+        debug_info += f"Error tokenizing job description: {e}\n"
+        return [], debug_info
     for sentence in description_sentences:
+        try:
+            # Encode each sentence from the summarized job description
+            sentence_embedding = model.encode(sentence, convert_to_tensor=True)
+            debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
+            # Compute similarity score
+            similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
+            debug_info += f"Similarity Score for sentence: {similarity_score}\n"
+            results.append({
+                "Job Description Sentence": sentence,
+                "Similarity Score": similarity_score
+            })
+        except Exception as e:
+            debug_info += f"Error processing sentence '{sentence}': {e}\n"
+            continue
     # Sort results by similarity score in descending order
+    try:
+        results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
+    except Exception as e:
+        debug_info += f"Error sorting results: {e}\n"
     return results, debug_info