from sentence_transformers import SentenceTransformer, util from transformers import pipeline import gradio as gr import nltk # Load the SentenceTransformer model for sentence similarity try: model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') except Exception as e: print(f"Error loading SentenceTransformer model: {e}") # Load a summarization pipeline try: summarizer = pipeline("summarization", model="facebook/bart-large-cnn") except Exception as e: print(f"Error loading summarization pipeline: {e}") # Download NLTK punkt tokenizer if not already installed (run this once) nltk.download('punkt') def summarize_text(text, max_length=100, min_length=25): try: # Summarize the input text summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]["summary_text"] except Exception as e: return f"Error summarizing text: {e}" def match_cv_to_jobs(cv_text, job_descriptions): debug_info = "Debug Info:\n" results = [] # Summarize the CV text try: summarized_cv = summarize_text(cv_text, max_length=150) debug_info += f"Summarized CV Text: {summarized_cv}\n" except Exception as e: debug_info += f"Error summarizing CV text: {e}\n" return [], debug_info # Summarize the job description try: summarized_job_desc = summarize_text(job_descriptions, max_length=150) debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n" except Exception as e: debug_info += f"Error summarizing job descriptions: {e}\n" return [], debug_info # Encode the summarized CV text try: cv_embedding = model.encode(summarized_cv, convert_to_tensor=True) debug_info += f"CV Embedding: {cv_embedding}\n" except Exception as e: debug_info += f"Error encoding CV text: {e}\n" return [], debug_info # Split summarized job description into sentences try: description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc) except Exception as e: debug_info += f"Error tokenizing job description: {e}\n" return [], debug_info for sentence in description_sentences: try: # Encode each sentence from the summarized job description sentence_embedding = model.encode(sentence, convert_to_tensor=True) debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n" # Compute similarity score similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item() debug_info += f"Similarity Score for sentence: {similarity_score}\n" results.append({ "Job Description Sentence": sentence, "Similarity Score": similarity_score }) except Exception as e: debug_info += f"Error processing sentence '{sentence}': {e}\n" continue # Sort results by similarity score in descending order try: results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True) except Exception as e: debug_info += f"Error sorting results: {e}\n" return results, debug_info # Gradio interface with gr.Blocks() as demo: gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity") # Input fields for CV and job descriptions cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10) job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10) # Button and output area match_button = gr.Button("Match CV to Job Descriptions") output = gr.JSON(label="Match Results") debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info # Set button click to run the function match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output]) demo.launch()