Spaces:
Sleeping
Sleeping
File size: 2,971 Bytes
f5d2489 9208e17 f5d2489 b397dc0 f5d2489 289726b 725f549 289726b f71b7ff 96ed827 f5d2489 f65dc03 9208e17 ce96780 289726b ce96780 289726b 732403f ce96780 289726b ce96780 289726b 732403f f5d2489 289726b ce96780 289726b f5d2489 289726b 9208e17 f65dc03 9208e17 ce96780 9208e17 e079d59 9208e17 f5d2489 9208e17 e079d59 9208e17 f65dc03 9f26a6c e079d59 f5d2489 91207a8 9208e17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import nltk
# Load the SentenceTransformer model for sentence similarity
try:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
except Exception as e:
print(f"Error loading SentenceTransformer model: {e}")
# Download NLTK punkt tokenizer if not already installed (run this once)
nltk.download('punkt_tab')
def match_cv_to_jobs(cv_text, job_descriptions):
debug_info = "Debug Info:\n"
results = []
# Encode the CV text directly without summarization
try:
cv_embedding = model.encode(cv_text, convert_to_tensor=True)
debug_info += f"CV Embedding: {cv_embedding}\n"
except Exception as e:
debug_info += f"Error encoding CV text: {e}\n"
return [], debug_info
# Split job description into sentences
try:
description_sentences = nltk.tokenize.sent_tokenize(job_descriptions)
except Exception as e:
debug_info += f"Error tokenizing job description: {e}\n"
return [], debug_info
for sentence in description_sentences:
try:
# Encode each sentence from the job description
sentence_embedding = model.encode(sentence, convert_to_tensor=True)
debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
# Compute similarity score
similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
debug_info += f"Similarity Score for sentence: {similarity_score}\n"
results.append({
"Job Description Sentence": sentence,
"Similarity Score": similarity_score
})
except Exception as e:
debug_info += f"Error processing sentence '{sentence}': {e}\n"
continue
# Sort results by similarity score in descending order
try:
results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
except Exception as e:
debug_info += f"Error sorting results: {e}\n"
return results, debug_info
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# CV and Job Description Matcher with Sentence Similarity")
# Input fields for CV and job descriptions
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
# Button and output area
match_button = gr.Button("Match CV to Job Descriptions")
output = gr.JSON(label="Match Results")
debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
# Set button click to run the function
match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])
demo.launch()
|