File size: 4,268 Bytes
f5d2489
289726b
9208e17
f5d2489
b397dc0
f5d2489
289726b
 
 
 
725f549
289726b
 
 
 
 
96ed827
289726b
f5d2489
96ed827
f5d2489
289726b
 
 
 
 
 
 
 
f5d2489
 
f65dc03
9208e17
 
289726b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5d2489
 
289726b
 
 
 
 
 
732403f
f5d2489
289726b
 
 
 
 
732403f
f5d2489
289726b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5d2489
 
289726b
 
 
 
9208e17
f65dc03
9208e17
 
 
f5d2489
9208e17
e079d59
9208e17
f5d2489
9208e17
e079d59
 
9208e17
f65dc03
9f26a6c
e079d59
f5d2489
91207a8
9208e17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, PipelineException
import gradio as gr
import nltk

# Load the SentenceTransformer model for sentence similarity
try:
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
except Exception as e:
    print(f"Error loading SentenceTransformer model: {e}")

# Load a summarization pipeline
try:
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
except Exception as e:
    print(f"Error loading summarization pipeline: {e}")

# Download NLTK punkt tokenizer if not already installed (run this once)
nltk.download('punkt')

def summarize_text(text, max_length=100, min_length=25):
    try:
        # Summarize the input text
        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
        return summary[0]["summary_text"]
    except PipelineException as e:
        return f"Error summarizing text: {e}"
    except Exception as e:
        return f"Unexpected error during summarization: {e}"

def match_cv_to_jobs(cv_text, job_descriptions):
    debug_info = "Debug Info:\n"
    results = []
    
    # Summarize the CV text
    try:
        summarized_cv = summarize_text(cv_text, max_length=150)
        debug_info += f"Summarized CV Text: {summarized_cv}\n"
    except Exception as e:
        debug_info += f"Error summarizing CV text: {e}\n"
        return [], debug_info

    # Summarize the job description
    try:
        summarized_job_desc = summarize_text(job_descriptions, max_length=150)
        debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
    except Exception as e:
        debug_info += f"Error summarizing job descriptions: {e}\n"
        return [], debug_info
    
    # Encode the summarized CV text
    try:
        cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
        debug_info += f"CV Embedding: {cv_embedding}\n"
    except Exception as e:
        debug_info += f"Error encoding CV text: {e}\n"
        return [], debug_info
    
    # Split summarized job description into sentences
    try:
        description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
    except Exception as e:
        debug_info += f"Error tokenizing job description: {e}\n"
        return [], debug_info
    
    for sentence in description_sentences:
        try:
            # Encode each sentence from the summarized job description
            sentence_embedding = model.encode(sentence, convert_to_tensor=True)
            debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
            
            # Compute similarity score
            similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
            debug_info += f"Similarity Score for sentence: {similarity_score}\n"
            
            results.append({
                "Job Description Sentence": sentence,
                "Similarity Score": similarity_score
            })
        except Exception as e:
            debug_info += f"Error processing sentence '{sentence}': {e}\n"
            continue
    
    # Sort results by similarity score in descending order
    try:
        results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
    except Exception as e:
        debug_info += f"Error sorting results: {e}\n"
    
    return results, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")
    
    # Input fields for CV and job descriptions
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
    
    # Button and output area
    match_button = gr.Button("Match CV to Job Descriptions")
    output = gr.JSON(label="Match Results")
    debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
    
    # Set button click to run the function
    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])

demo.launch()