Spaces:

datasciencedojo
/

SmartHire-Assistant

Sleeping

App Files Files Community

SmartHire-Assistant / utils /utils.py

datasciencedojo

Update utils/utils.py

416760a verified about 1 month ago

raw

history blame

3.37 kB

	from PyPDF2 import PdfReader
	from agents.agents import get_agent_groq
	import json
	import re


	def parse_resume(path):
	loader = PdfReader(path)
	text=''
	print(len(loader.pages))
	for i in range(len(loader.pages)):
	text+= loader.pages[i].extract_text()
	return text
	def parse_resumes(resumes_list):
	resumes_text=[]
	for resume in resumes_list:
	loader = PdfReader(resume)
	text=''
	#print(len(loader.pages))
	for i in range(len(loader.pages)):
	text+= loader.pages[i].extract_text()
	resumes_text.append(text)
	return resumes_text


	def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
	agent = get_agent_groq()
	resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
	text_res=extract(resp.content)
	#text_res=extract(text_res)
	#chain = prompt \| agent
	print(text_res)
	#text = resp.content
	return text_res

	def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
	#chain = prompt \| agent
	agent = get_agent_groq()
	response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
	print(response.content)
	text_res=extract_sel(response.content)
	print(text_res)
	return text_res

	def extract(content):

	json_pattern = r'```\n(.*?)\n```'
	json_string = re.search(json_pattern, content, re.DOTALL).group(1)

	# Load the extracted JSON string into a dictionary
	data = json.loads(json_string)
	new={}
	# Print the extracted variables and their values
	for key, value in data.items():
	print(f"{key}: {value}")
	new[key]=value
	return new
	def extract_mist(json_string):
	# Load the extracted JSON string into a dictionary
	data = json.loads(json_string)
	new={}
	# Print the extracted variables and their values
	for key, value in data.items():
	print(f"{key}: {value}")
	new[key]=value
	return new


	def extract_sel(content):
	try:
	# Split the content by identifying each candidate section using the candidate names (bolded)
	candidates = re.split(r'\\(.?)\\*', content) # Split on the pattern of bolded names

	# The split result will have alternating candidate names and JSON sections
	candidate_json_list = []

	for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts
	candidate_name = candidates[i].strip() # Candidate name
	json_string = candidates[i+1].strip() # JSON string part

	# Load the JSON string into a dictionary
	candidate_data = json.loads(json_string)
	candidate_json_list.append(candidate_data)

	return candidate_json_list

	except json.JSONDecodeError as e:
	print(f"Error decoding JSON: {e}")
	return []

	def generate_adv(job_listing_text,job_title_text, prompt_template):
	# if model_selection=="Groq":
	agent = get_agent_groq()
	resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
	text = resp.content
	print(text)
	return text