import streamlit as st import pandas as pd import re import json import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import torch from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer import time # Set page title and configuration st.set_page_config( page_title="Resume-Job Fit Analyzer", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # Download NLTK resources if needed @st.cache_resource def download_nltk_resources(): try: nltk.data.find('tokenizers/punkt') nltk.data.find('corpora/stopwords') except LookupError: nltk.download('punkt') nltk.download('stopwords') return stopwords.words('english') stop_words = download_nltk_resources() # Load models @st.cache_resource def load_models(): """Load and cache the NLP models""" models = {} # Use BART for resume parsing models['parser'] = pipeline( "text2text-generation", model="facebook/bart-base", # This would be the fine-tuned model in production device=0 if torch.cuda.is_available() else -1 ) # Use Qwen for evaluation models['evaluator'] = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") return models # Extract skills from text def extract_skills(text, skill_keywords): """Extract skills from text based on a predefined list of skills""" found_skills = [] text_lower = text.lower() for skill in skill_keywords: # Create a regular expression pattern for whole word matching pattern = r'\b' + re.escape(skill.lower()) + r'\b' if re.search(pattern, text_lower): found_skills.append(skill) return list(set(found_skills)) # Parse resume def parse_resume(resume_text, models): """Extract structured information from resume text""" # In production, this would use the fine-tuned BART model # For now, we'll implement a simple rule-based parser # Clean the text clean_text = re.sub(r'\s+', ' ', resume_text).strip() # Extract common skill keywords (this would be a more extensive list in production) tech_skills = [ "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", "React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring", "TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP", "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", "REST API", "GraphQL", "Microservices", "Serverless" ] soft_skills = [ "Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking", "Time management", "Adaptability", "Creativity", "Collaboration", "Presentation" ] # Extract skills found_tech_skills = extract_skills(clean_text, tech_skills) found_soft_skills = extract_skills(clean_text, soft_skills) # Extract experience using regex patterns (simplified) experience_pattern = r'(?:Experience|EXPERIENCE|Work Experience|WORK EXPERIENCE).*?(?:Education|EDUCATION|Skills|SKILLS|$)' experience_match = re.search(experience_pattern, clean_text, re.DOTALL) experience_text = experience_match.group(0) if experience_match else "" # Extract education using regex patterns (simplified) education_pattern = r'(?:Education|EDUCATION).*?(?:Skills|SKILLS|Experience|EXPERIENCE|$)' education_match = re.search(education_pattern, clean_text, re.DOTALL) education_text = education_match.group(0) if education_match else "" # Estimate years of experience (simplified) years_exp = 0 year_patterns = [ r'(\d{4})\s*-\s*(?:present|current|now|2023|2024|2025)', r'(\d{4})\s*-\s*(\d{4})' ] for pattern in year_patterns: matches = re.findall(pattern, clean_text, re.IGNORECASE) for match in matches: if isinstance(match, tuple): start_year = int(match[0]) end_year = int(match[1]) if match[1].isdigit() else 2025 years_exp += (end_year - start_year) else: start_year = int(match) years_exp += (2025 - start_year) # Cap reasonable years years_exp = min(years_exp, 30) # Create structured data structured_data = { "skills": { "technical": found_tech_skills, "soft": found_soft_skills }, "experience": { "years": years_exp, "summary": experience_text[:300] + "..." if len(experience_text) > 300 else experience_text }, "education": education_text[:300] + "..." if len(education_text) > 300 else education_text } return structured_data # Parse job description def parse_job_description(job_text): """Extract key requirements from job description""" # Clean the text clean_text = re.sub(r'\s+', ' ', job_text).strip() # Extract common skill keywords (same as resume parser) tech_skills = [ "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", "React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring", "TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP", "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", "REST API", "GraphQL", "Microservices", "Serverless" ] soft_skills = [ "Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking", "Time management", "Adaptability", "Creativity", "Collaboration", "Presentation" ] # Extract skills required_tech_skills = extract_skills(clean_text, tech_skills) required_soft_skills = extract_skills(clean_text, soft_skills) # Extract years of experience requirement (simplified) exp_patterns = [ r'(\d+)\+?\s*(?:years|yrs|yr)(?:\s*of)?\s*(?:experience|exp)', r'(?:experience|exp)(?:\s*of)?\s*(\d+)\+?\s*(?:years|yrs|yr)' ] required_years = 0 for pattern in exp_patterns: matches = re.findall(pattern, clean_text, re.IGNORECASE) if matches: # Take the highest mentioned years required_years = max([int(y) for y in matches if y.isdigit()] + [required_years]) # Extract job title title_pattern = r'^(.*?)(?:\n|$)' title_match = re.search(title_pattern, clean_text) job_title = title_match.group(1).strip() if title_match else "Not specified" # Create structured data structured_data = { "title": job_title, "requirements": { "technical_skills": required_tech_skills, "soft_skills": required_soft_skills, "years_experience": required_years }, "full_text": job_text } return structured_data # Calculate match score def calculate_match_score(resume_data, job_data): """Calculate how well the resume matches the job description""" scores = {} # Calculate skill match percentage required_tech_skills = set(job_data["requirements"]["technical_skills"]) candidate_tech_skills = set(resume_data["skills"]["technical"]) required_soft_skills = set(job_data["requirements"]["soft_skills"]) candidate_soft_skills = set(resume_data["skills"]["soft"]) if required_tech_skills: tech_match = len(candidate_tech_skills.intersection(required_tech_skills)) / len(required_tech_skills) scores["technical_skills"] = { "score": int(tech_match * 100), "matched": list(candidate_tech_skills.intersection(required_tech_skills)), "missing": list(required_tech_skills - candidate_tech_skills) } else: scores["technical_skills"] = {"score": 0, "matched": [], "missing": []} if required_soft_skills: soft_match = len(candidate_soft_skills.intersection(required_soft_skills)) / len(required_soft_skills) scores["soft_skills"] = { "score": int(soft_match * 100), "matched": list(candidate_soft_skills.intersection(required_soft_skills)), "missing": list(required_soft_skills - candidate_soft_skills) } else: scores["soft_skills"] = {"score": 0, "matched": [], "missing": []} # Experience match required_years = job_data["requirements"]["years_experience"] candidate_years = resume_data["experience"]["years"] if required_years > 0: if candidate_years >= required_years: exp_score = 100 else: exp_score = int((candidate_years / required_years) * 100) scores["experience"] = { "score": exp_score, "candidate_years": candidate_years, "required_years": required_years } else: scores["experience"] = { "score": 100 if candidate_years > 0 else 50, "candidate_years": candidate_years, "required_years": "Not specified" } # Calculate overall score (weighted) tech_weight = 0.6 soft_weight = 0.2 exp_weight = 0.2 overall_score = ( scores["technical_skills"]["score"] * tech_weight + scores["soft_skills"]["score"] * soft_weight + scores["experience"]["score"] * exp_weight ) scores["overall"] = int(overall_score) return scores # Generate expert assessment using Qwen def generate_assessment(resume_data, job_data, match_scores, models): """Generate an expert assessment using Qwen model""" # Prepare context job_title = job_data["title"] matched_skills = match_scores["technical_skills"]["matched"] missing_skills = match_scores["technical_skills"]["missing"] experience_match = match_scores["experience"] overall_score = match_scores["overall"] # Determine fit classification fit_status = "FIT" if overall_score >= 70 else "NOT FIT" # Create prompt for Qwen prompt = f""" <|im_start|>system You are an expert resume evaluator. Analyze how well a candidate fits a job posting and provide professional feedback. <|im_end|> <|im_start|>user Evaluate this candidate for a {job_title} position. Overall match score: {overall_score}% Technical skills match: {match_scores["technical_skills"]["score"]}% Soft skills match: {match_scores["soft_skills"]["score"]}% Experience match: {experience_match["score"]}% Candidate has: {experience_match["candidate_years"]} years of experience Position requires: {experience_match["required_years"]} years of experience Matched technical skills: {", ".join(matched_skills) if matched_skills else "None"} Missing technical skills: {", ".join(missing_skills) if missing_skills else "None"} Create a professional assessment of this candidate. First state whether they are a FIT or NOT FIT for the position, then explain why with specific strengths and development areas. <|im_end|> <|im_start|>assistant """ try: # Generate the assessment using Qwen tokenizer = models['evaluator_tokenizer'] qwen_model = models['evaluator'] inputs = tokenizer(prompt, return_tensors="pt") outputs = qwen_model.generate( inputs.input_ids, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9 ) assessment = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract the assistant's response if "<|im_start|>assistant" in assessment: assessment = assessment.split("<|im_start|>assistant")[-1] # Clean up any remaining markers assessment = re.sub(r'<\|im_(start|end)\|>', '', assessment) assessment = assessment.strip() # If no assessment was generated, create a fallback if not assessment or len(assessment) < 50: assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status) except Exception as e: st.error(f"Error generating assessment: {str(e)}") assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status) return assessment, fit_status # Generate fallback assessment def generate_fallback_assessment(resume_data, job_data, match_scores, fit_status): """Generate a fallback assessment if the model fails""" job_title = job_data["title"] matched_skills = match_scores["technical_skills"]["matched"] missing_skills = match_scores["technical_skills"]["missing"] overall_score = match_scores["overall"] if fit_status == "FIT": assessment = f"""FIT: This candidate demonstrates a strong alignment with the {job_title} position, achieving an overall match score of {overall_score}%. Their proficiency in {', '.join(matched_skills) if matched_skills else 'relevant skills'} positions them well to contribute effectively from the start. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {', '.join(missing_skills) if missing_skills else 'additional specialized areas relevant to this role'}. """ else: assessment = f"""NOT FIT: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {overall_score}%. While they demonstrate some relevant capabilities in {', '.join(matched_skills) if matched_skills else 'a few areas'}, they would need to develop expertise in critical areas such as {', '.join(missing_skills) if missing_skills else 'key technical requirements for this position'}. The candidate may become more competitive for this role by focusing on these skill gaps and gaining more relevant experience. """ return assessment # Create the main header and interface st.title("Resume-Job Fit Analyzer") st.markdown("### Evaluate how well a resume matches a job description") # Setup columns for input col1, col2 = st.columns(2) with col1: # Resume input st.subheader("Resume") resume_text = st.text_area("Paste resume text here", height=300, placeholder="Paste the candidate's resume text here...") with col2: # Job description input st.subheader("Job Description") job_description = st.text_area("Paste job description here", height=300, placeholder="Paste the job description here...") # Analysis button analyze_button = st.button("Analyze Match", type="primary", use_container_width=True) # Main analysis logic if analyze_button: if not resume_text or not job_description: st.error("Please provide both a resume and a job description.") else: with st.spinner("Analyzing resume and job match..."): # Record start time start_time = time.time() # Load models (uses caching so only loads once) models = load_models() # Parse resume and job description resume_data = parse_resume(resume_text, models) job_data = parse_job_description(job_description) # Calculate match score match_scores = calculate_match_score(resume_data, job_data) # Generate assessment assessment, fit_status = generate_assessment(resume_data, job_data, match_scores, models) # Calculate execution time execution_time = time.time() - start_time # Display results st.success(f"Analysis complete in {execution_time:.2f} seconds") # Display fit status prominently st.markdown(f"## Overall Result: {fit_status}") # Display match score st.subheader("Match Score") score_col1, score_col2, score_col3 = st.columns(3) with score_col1: st.metric("Overall Match", f"{match_scores['overall']}%") with score_col2: st.metric("Technical Skills", f"{match_scores['technical_skills']['score']}%") with score_col3: st.metric("Experience Match", f"{match_scores['experience']['score']}%") # Show skills breakdown st.subheader("Skills Breakdown") skill_col1, skill_col2 = st.columns(2) with skill_col1: st.markdown("##### Matched Skills") if match_scores["technical_skills"]["matched"]: for skill in match_scores["technical_skills"]["matched"]: st.markdown(f"✅ {skill}") else: st.markdown("No matched skills found") with skill_col2: st.markdown("##### Missing Skills") if match_scores["technical_skills"]["missing"]: for skill in match_scores["technical_skills"]["missing"]: st.markdown(f"❌ {skill}") else: st.markdown("No missing skills detected") # Show experience comparison st.subheader("Experience") exp_col1, exp_col2 = st.columns(2) with exp_col1: st.markdown(f"**Required**: {job_data['requirements']['years_experience']} years") with exp_col2: st.markdown(f"**Candidate has**: {resume_data['experience']['years']} years") # Display detailed assessment st.subheader("Expert Assessment") st.markdown(assessment) # Show parsed data (expandable) with st.expander("View Parsed Data"): col1, col2 = st.columns(2) with col1: st.subheader("Resume Data") st.json(resume_data) with col2: st.subheader("Job Requirements") st.json(job_data)