Spaces:
Sleeping
Sleeping
File size: 4,268 Bytes
f5d2489 289726b 9208e17 f5d2489 b397dc0 f5d2489 289726b 725f549 289726b 96ed827 289726b f5d2489 96ed827 f5d2489 289726b f5d2489 f65dc03 9208e17 289726b f5d2489 289726b 732403f f5d2489 289726b 732403f f5d2489 289726b f5d2489 289726b 9208e17 f65dc03 9208e17 f5d2489 9208e17 e079d59 9208e17 f5d2489 9208e17 e079d59 9208e17 f65dc03 9f26a6c e079d59 f5d2489 91207a8 9208e17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, PipelineException
import gradio as gr
import nltk
# Load the SentenceTransformer model for sentence similarity
try:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
except Exception as e:
print(f"Error loading SentenceTransformer model: {e}")
# Load a summarization pipeline
try:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
except Exception as e:
print(f"Error loading summarization pipeline: {e}")
# Download NLTK punkt tokenizer if not already installed (run this once)
nltk.download('punkt')
def summarize_text(text, max_length=100, min_length=25):
try:
# Summarize the input text
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
return summary[0]["summary_text"]
except PipelineException as e:
return f"Error summarizing text: {e}"
except Exception as e:
return f"Unexpected error during summarization: {e}"
def match_cv_to_jobs(cv_text, job_descriptions):
debug_info = "Debug Info:\n"
results = []
# Summarize the CV text
try:
summarized_cv = summarize_text(cv_text, max_length=150)
debug_info += f"Summarized CV Text: {summarized_cv}\n"
except Exception as e:
debug_info += f"Error summarizing CV text: {e}\n"
return [], debug_info
# Summarize the job description
try:
summarized_job_desc = summarize_text(job_descriptions, max_length=150)
debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
except Exception as e:
debug_info += f"Error summarizing job descriptions: {e}\n"
return [], debug_info
# Encode the summarized CV text
try:
cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
debug_info += f"CV Embedding: {cv_embedding}\n"
except Exception as e:
debug_info += f"Error encoding CV text: {e}\n"
return [], debug_info
# Split summarized job description into sentences
try:
description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
except Exception as e:
debug_info += f"Error tokenizing job description: {e}\n"
return [], debug_info
for sentence in description_sentences:
try:
# Encode each sentence from the summarized job description
sentence_embedding = model.encode(sentence, convert_to_tensor=True)
debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
# Compute similarity score
similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
debug_info += f"Similarity Score for sentence: {similarity_score}\n"
results.append({
"Job Description Sentence": sentence,
"Similarity Score": similarity_score
})
except Exception as e:
debug_info += f"Error processing sentence '{sentence}': {e}\n"
continue
# Sort results by similarity score in descending order
try:
results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
except Exception as e:
debug_info += f"Error sorting results: {e}\n"
return results, debug_info
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")
# Input fields for CV and job descriptions
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
# Button and output area
match_button = gr.Button("Match CV to Job Descriptions")
output = gr.JSON(label="Match Results")
debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
# Set button click to run the function
match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])
demo.launch()
|