Spaces:

saifeddinemk
/

cv_job

Sleeping

File size: 2,971 Bytes

from sentence_transformers import SentenceTransformer, util
import gradio as gr
import nltk

# Load the SentenceTransformer model for sentence similarity
try:
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
except Exception as e:
    print(f"Error loading SentenceTransformer model: {e}")

# Download NLTK punkt tokenizer if not already installed (run this once)
nltk.download('punkt_tab')

def match_cv_to_jobs(cv_text, job_descriptions):
    debug_info = "Debug Info:\n"
    results = []
    
    # Encode the CV text directly without summarization
    try:
        cv_embedding = model.encode(cv_text, convert_to_tensor=True)
        debug_info += f"CV Embedding: {cv_embedding}\n"
    except Exception as e:
        debug_info += f"Error encoding CV text: {e}\n"
        return [], debug_info
    
    # Split job description into sentences
    try:
        description_sentences = nltk.tokenize.sent_tokenize(job_descriptions)
    except Exception as e:
        debug_info += f"Error tokenizing job description: {e}\n"
        return [], debug_info
    
    for sentence in description_sentences:
        try:
            # Encode each sentence from the job description
            sentence_embedding = model.encode(sentence, convert_to_tensor=True)
            debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
            
            # Compute similarity score
            similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
            debug_info += f"Similarity Score for sentence: {similarity_score}\n"
            
            results.append({
                "Job Description Sentence": sentence,
                "Similarity Score": similarity_score
            })
        except Exception as e:
            debug_info += f"Error processing sentence '{sentence}': {e}\n"
            continue
    
    # Sort results by similarity score in descending order
    try:
        results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
    except Exception as e:
        debug_info += f"Error sorting results: {e}\n"
    
    return results, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# CV and Job Description Matcher with Sentence Similarity")
    
    # Input fields for CV and job descriptions
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
    
    # Button and output area
    match_button = gr.Button("Match CV to Job Descriptions")
    output = gr.JSON(label="Match Results")
    debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
    
    # Set button click to run the function
    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])

demo.launch()