from PyPDF2 import PdfReader from agents.agents import get_agent_groq import json import re import time from agents import prompts def parse_resume(path): loader = PdfReader(path) text='' print(len(loader.pages)) for i in range(len(loader.pages)): text+= loader.pages[i].extract_text() return text def parse_resumes(resumes_list): resumes_text=[] for resume in resumes_list: loader = PdfReader(resume) text='' #print(len(loader.pages)) for i in range(len(loader.pages)): text+= loader.pages[i].extract_text() resumes_text.append(text) return resumes_text def parse_(resumes_list): resumes_text=[] for resume in resumes_list: text=parse_resume(resume) resumes_text.append(text) return resumes_text from typing_extensions import Annotated, TypedDict, Optional # Define TypedDict for structured output class ResumeAnalysis(TypedDict): candidate_name: Annotated[str, ..., "Name of the candidate with the highest score"] overall_match_score: Annotated[int, ..., "sum of scores for skills_keywords_score, experience_score, education_certifications_score, and preferred_qualifications_score (Whole Number)"] skills_keywords_score: Annotated[int, ..., "Score for Skills and Keywords (0-40)"] skills_keywords_explanation: Annotated[str, ..., "Explanation for Skills and Keywords"] experience_score: Annotated[int, ..., "Score for Experience (0-30)"] experience_explanation: Annotated[str, ..., "Explanation for Experience"] education_certifications_score: Annotated[int, ..., "Score for Education & Certifications (0-20)"] education_certifications_explanation: Annotated[str, ..., "Explanation for Education & Certifications"] preferred_qualifications_score: Annotated[int, ..., "Score for Preferred Qualifications (0-10)"] preferred_qualifications_explanation: Annotated[str, ..., "Explanation for Preferred Qualifications"] score_interpretation: Annotated[str, ..., "donot mention any numbers here, just Interpretation in words of the overall_match_score"] # Use structured output with the LLM def generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template): # Send the structured prompt to the agent and expect a structured response agent = get_agent_groq().with_structured_output(ResumeAnalysis) # using structured output LLM response = agent.invoke( prompt_template.format( resume=resume_text, job_listing=job_listing_text, job_title_text=job_title_text, must_have=must_have ) ) response['overall_match_score']=response['skills_keywords_score']+response['education_certifications_score']+response['experience_score']+response['preferred_qualifications_score'] print(response) return response # response is already structured as per ResumeAnalysis def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template): agent = get_agent_groq() resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have)) #print('response of agent',resp) text_res=extract(resp.content) #text_res=extract(text_res) #chain = prompt | agent #print(text_res) #text = resp.content return text_res def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template): prompt_templates = prompts.prompt_template_modern generate_individual_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_templates) #chain = prompt | agent agent = get_agent_groq() response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have)) #print(response.content) text_res=extract_sel(response.content) #print(text_res) return text_res # Analyzing each resume individually and handling delays to avoid token limits def generate_individual_analysis(resumes, job_listing_text, job_title_text, must_have, prompt_template, delay=10): #agent = get_agent_groq() all_results = [] for resume_text in resumes: structured_response= generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template) #agent = get_agent_groq().with_structured_output(ResumeAnalysis) # print(response) if structured_response: all_results.append(structured_response) # Adding delay to avoid the 6000 tokens per minute limit time.sleep(delay) # Sorting results by match score (or any other criteria you prefer) best_match = max(all_results, key=lambda x: x.get("overall_match_score", 0)) print('best_match',best_match) print('all_results',all_results) return all_results def extract(content): json_pattern = r'```\n(.*?)\n```' json_string = re.search(json_pattern, content, re.DOTALL).group(1) # Load the extracted JSON string into a dictionary data = json.loads(json_string) new={} # Print the extracted variables and their values for key, value in data.items(): print(f"{key}: {value}") new[key]=value return new def extract_mist(json_string): # Load the extracted JSON string into a dictionary data = json.loads(json_string) new={} # Print the extracted variables and their values for key, value in data.items(): print(f"{key}: {value}") new[key]=value return new def extract_sel(content): try: # Split the content by identifying each candidate section using the candidate names (bolded) candidates = re.split(r'\*\*(.*?)\*\*', content) # Split on the pattern of bolded names # The split result will have alternating candidate names and JSON sections candidate_json_list = [] for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts candidate_name = candidates[i].strip() # Candidate name json_string = candidates[i+1].strip() # JSON string part # Load the JSON string into a dictionary candidate_data = json.loads(json_string) candidate_json_list.append(candidate_data) return candidate_json_list except json.JSONDecodeError as e: print(f"Error decoding JSON: {e}") return [] def generate_adv(job_listing_text,job_title_text, prompt_template): # if model_selection=="Groq": agent = get_agent_groq() resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text)) text = resp.content print(text) return text