File size: 2,971 Bytes
f5d2489
9208e17
f5d2489
b397dc0
f5d2489
289726b
 
 
 
725f549
289726b
f71b7ff
96ed827
f5d2489
f65dc03
9208e17
 
ce96780
289726b
ce96780
289726b
 
 
 
732403f
ce96780
289726b
ce96780
289726b
 
 
732403f
f5d2489
289726b
ce96780
289726b
 
 
 
 
 
 
 
 
 
 
 
 
 
f5d2489
 
289726b
 
 
 
9208e17
f65dc03
9208e17
 
 
ce96780
9208e17
e079d59
9208e17
f5d2489
9208e17
e079d59
 
9208e17
f65dc03
9f26a6c
e079d59
f5d2489
91207a8
9208e17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import nltk

# Load the SentenceTransformer model for sentence similarity
try:
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
except Exception as e:
    print(f"Error loading SentenceTransformer model: {e}")

# Download NLTK punkt tokenizer if not already installed (run this once)
nltk.download('punkt_tab')

def match_cv_to_jobs(cv_text, job_descriptions):
    debug_info = "Debug Info:\n"
    results = []
    
    # Encode the CV text directly without summarization
    try:
        cv_embedding = model.encode(cv_text, convert_to_tensor=True)
        debug_info += f"CV Embedding: {cv_embedding}\n"
    except Exception as e:
        debug_info += f"Error encoding CV text: {e}\n"
        return [], debug_info
    
    # Split job description into sentences
    try:
        description_sentences = nltk.tokenize.sent_tokenize(job_descriptions)
    except Exception as e:
        debug_info += f"Error tokenizing job description: {e}\n"
        return [], debug_info
    
    for sentence in description_sentences:
        try:
            # Encode each sentence from the job description
            sentence_embedding = model.encode(sentence, convert_to_tensor=True)
            debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
            
            # Compute similarity score
            similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
            debug_info += f"Similarity Score for sentence: {similarity_score}\n"
            
            results.append({
                "Job Description Sentence": sentence,
                "Similarity Score": similarity_score
            })
        except Exception as e:
            debug_info += f"Error processing sentence '{sentence}': {e}\n"
            continue
    
    # Sort results by similarity score in descending order
    try:
        results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
    except Exception as e:
        debug_info += f"Error sorting results: {e}\n"
    
    return results, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# CV and Job Description Matcher with Sentence Similarity")
    
    # Input fields for CV and job descriptions
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
    
    # Button and output area
    match_button = gr.Button("Match CV to Job Descriptions")
    output = gr.JSON(label="Match Results")
    debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
    
    # Set button click to run the function
    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])

demo.launch()