Spaces:
Sleeping
Sleeping
datasciencedojo
commited on
Commit
•
416760a
1
Parent(s):
4c56ef8
Update utils/utils.py
Browse files- utils/utils.py +97 -0
utils/utils.py
CHANGED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
from agents.agents import get_agent_groq
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
def parse_resume(path):
|
8 |
+
loader = PdfReader(path)
|
9 |
+
text=''
|
10 |
+
print(len(loader.pages))
|
11 |
+
for i in range(len(loader.pages)):
|
12 |
+
text+= loader.pages[i].extract_text()
|
13 |
+
return text
|
14 |
+
def parse_resumes(resumes_list):
|
15 |
+
resumes_text=[]
|
16 |
+
for resume in resumes_list:
|
17 |
+
loader = PdfReader(resume)
|
18 |
+
text=''
|
19 |
+
#print(len(loader.pages))
|
20 |
+
for i in range(len(loader.pages)):
|
21 |
+
text+= loader.pages[i].extract_text()
|
22 |
+
resumes_text.append(text)
|
23 |
+
return resumes_text
|
24 |
+
|
25 |
+
|
26 |
+
def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
|
27 |
+
agent = get_agent_groq()
|
28 |
+
resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
|
29 |
+
text_res=extract(resp.content)
|
30 |
+
#text_res=extract(text_res)
|
31 |
+
#chain = prompt | agent
|
32 |
+
print(text_res)
|
33 |
+
#text = resp.content
|
34 |
+
return text_res
|
35 |
+
|
36 |
+
def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
|
37 |
+
#chain = prompt | agent
|
38 |
+
agent = get_agent_groq()
|
39 |
+
response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
|
40 |
+
print(response.content)
|
41 |
+
text_res=extract_sel(response.content)
|
42 |
+
print(text_res)
|
43 |
+
return text_res
|
44 |
+
|
45 |
+
def extract(content):
|
46 |
+
|
47 |
+
json_pattern = r'```\n(.*?)\n```'
|
48 |
+
json_string = re.search(json_pattern, content, re.DOTALL).group(1)
|
49 |
+
|
50 |
+
# Load the extracted JSON string into a dictionary
|
51 |
+
data = json.loads(json_string)
|
52 |
+
new={}
|
53 |
+
# Print the extracted variables and their values
|
54 |
+
for key, value in data.items():
|
55 |
+
print(f"{key}: {value}")
|
56 |
+
new[key]=value
|
57 |
+
return new
|
58 |
+
def extract_mist(json_string):
|
59 |
+
# Load the extracted JSON string into a dictionary
|
60 |
+
data = json.loads(json_string)
|
61 |
+
new={}
|
62 |
+
# Print the extracted variables and their values
|
63 |
+
for key, value in data.items():
|
64 |
+
print(f"{key}: {value}")
|
65 |
+
new[key]=value
|
66 |
+
return new
|
67 |
+
|
68 |
+
|
69 |
+
def extract_sel(content):
|
70 |
+
try:
|
71 |
+
# Split the content by identifying each candidate section using the candidate names (bolded)
|
72 |
+
candidates = re.split(r'\*\*(.*?)\*\*', content) # Split on the pattern of bolded names
|
73 |
+
|
74 |
+
# The split result will have alternating candidate names and JSON sections
|
75 |
+
candidate_json_list = []
|
76 |
+
|
77 |
+
for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts
|
78 |
+
candidate_name = candidates[i].strip() # Candidate name
|
79 |
+
json_string = candidates[i+1].strip() # JSON string part
|
80 |
+
|
81 |
+
# Load the JSON string into a dictionary
|
82 |
+
candidate_data = json.loads(json_string)
|
83 |
+
candidate_json_list.append(candidate_data)
|
84 |
+
|
85 |
+
return candidate_json_list
|
86 |
+
|
87 |
+
except json.JSONDecodeError as e:
|
88 |
+
print(f"Error decoding JSON: {e}")
|
89 |
+
return []
|
90 |
+
|
91 |
+
def generate_adv(job_listing_text,job_title_text, prompt_template):
|
92 |
+
# if model_selection=="Groq":
|
93 |
+
agent = get_agent_groq()
|
94 |
+
resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
|
95 |
+
text = resp.content
|
96 |
+
print(text)
|
97 |
+
return text
|