Spaces:
Running
Running
Commit
·
71fdb6d
1
Parent(s):
ca85905
Add utility and service packages, enhance profile model, and implement PDF handling
Browse files- __pycache__/agentProfile.cpython-311.pyc +0 -0
- __pycache__/api.cpython-311.pyc +0 -0
- __pycache__/models.cpython-311.pyc +0 -0
- agentProfile.py +326 -321
- agents/__init__.py +12 -0
- agents/grammar_corrector.py +57 -0
- agents/profile_extractor.py +346 -0
- api.py +30 -10
- app.py +167 -162
- config.py +43 -0
- models.py +1 -0
- services/__init__.py +6 -0
- services/storage_service.py +97 -0
- temp.pdf +0 -0
- utils/__init__.py +6 -0
- utils/pdf_utils.py +58 -0
__pycache__/agentProfile.cpython-311.pyc
CHANGED
Binary files a/__pycache__/agentProfile.cpython-311.pyc and b/__pycache__/agentProfile.cpython-311.pyc differ
|
|
__pycache__/api.cpython-311.pyc
CHANGED
Binary files a/__pycache__/api.cpython-311.pyc and b/__pycache__/api.cpython-311.pyc differ
|
|
__pycache__/models.cpython-311.pyc
CHANGED
Binary files a/__pycache__/models.cpython-311.pyc and b/__pycache__/models.cpython-311.pyc differ
|
|
agentProfile.py
CHANGED
@@ -1,199 +1,198 @@
|
|
1 |
import groq
|
2 |
from pdfextractor import extract_text_from_pdf
|
3 |
from models import Profile, SocialMedia, Project, Skill, Education
|
4 |
-
from typing import List, Dict, Any
|
5 |
from langchain.output_parsers import PydanticOutputParser
|
6 |
from langchain.prompts import PromptTemplate
|
7 |
from langchain_groq import ChatGroq
|
8 |
import json
|
9 |
-
import
|
10 |
|
11 |
-
|
12 |
|
13 |
-
|
14 |
-
"""
|
15 |
-
Extracts profile information from the PDF text using Groq's LLM with LangChain.
|
16 |
-
|
17 |
-
Args:
|
18 |
-
pdf_text (str): The text extracted from the PDF.
|
19 |
-
|
20 |
-
Returns:
|
21 |
-
Profile: A Profile object populated with the extracted information.
|
22 |
"""
|
23 |
-
|
24 |
-
llm = ChatGroq(
|
25 |
-
groq_api_key=GROQ_API_KEY,
|
26 |
-
model_name="qwen-2.5-32b",
|
27 |
-
temperature=0.5,
|
28 |
-
max_tokens=2048
|
29 |
-
)
|
30 |
-
|
31 |
-
# Create a parser for Profile data structure
|
32 |
-
parser = PydanticOutputParser(pydantic_object=Profile)
|
33 |
-
|
34 |
-
# Define the format instructions for the LLM
|
35 |
-
format_instructions = """
|
36 |
-
Extract the following information from the resume:
|
37 |
-
1. Full name
|
38 |
-
2. Professional title
|
39 |
-
3. Email address
|
40 |
-
4. Bio (a 50-100 word professional summary)
|
41 |
-
5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
|
42 |
-
6. Social media links (LinkedIn, GitHub, Instagram)
|
43 |
-
7. Projects (with title, description, and tech stack)
|
44 |
-
8. Skills
|
45 |
-
9. Education history (with school, degree, field of study, start date and end date)
|
46 |
-
|
47 |
-
Return the information in the following JSON format:
|
48 |
-
{
|
49 |
-
"name": "Full Name",
|
50 |
-
"title": "Professional Title",
|
51 |
-
"email": "[email protected]",
|
52 |
-
"bio": "Professional biography...",
|
53 |
-
"tagline": "Catchy professional tagline",
|
54 |
-
"social": {
|
55 |
-
"linkedin": "LinkedIn URL or null",
|
56 |
-
"github": "GitHub URL or null",
|
57 |
-
"instagram": "Instagram URL or null"
|
58 |
-
},
|
59 |
-
"projects": [
|
60 |
-
{
|
61 |
-
"title": "Project Title",
|
62 |
-
"description": "Project Description",
|
63 |
-
"techStack": "Technologies used"
|
64 |
-
}
|
65 |
-
],
|
66 |
-
"skills": [
|
67 |
-
{"name": "Skill 1"},
|
68 |
-
{"name": "Skill 2"}
|
69 |
-
],
|
70 |
-
"educations": [
|
71 |
-
{
|
72 |
-
"school": "University Name",
|
73 |
-
"degree": "Degree Type (e.g., Bachelor's, Master's)",
|
74 |
-
"fieldOfStudy": "Major or Field",
|
75 |
-
"startDate": "Start Year",
|
76 |
-
"endDate": "End Year or Present"
|
77 |
-
}
|
78 |
-
]
|
79 |
-
}
|
80 |
-
|
81 |
-
If any information is not available, use null for that field.
|
82 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
result = chain.invoke({"pdf_text": pdf_text})
|
103 |
response_text = result.content
|
104 |
|
105 |
-
#
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
profile_dict = json.loads(json_str)
|
113 |
-
|
114 |
-
# Create a Profile object from the dictionary
|
115 |
-
profile = Profile.model_validate(profile_dict)
|
116 |
-
|
117 |
-
# Check for missing information and try to extract it if necessary
|
118 |
-
profile = fill_missing_information(profile, pdf_text, llm)
|
119 |
-
|
120 |
-
return profile
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
except Exception as json_error:
|
126 |
-
print(f"Error parsing JSON response: {json_error}")
|
127 |
-
print(f"Raw response: {response_text}")
|
128 |
-
# Fallback to the old method
|
129 |
-
return extract_profile_information_fallback(pdf_text)
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
"""
|
138 |
-
Attempts to fill in any missing information in the profile by asking
|
139 |
-
specific questions to the LLM.
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
"""
|
149 |
-
# Check and fill name if missing
|
150 |
-
if not profile.name or profile.name == "N/A":
|
151 |
-
try:
|
152 |
-
response = llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:1000])
|
153 |
-
name = response.content.strip()
|
154 |
-
if name and name != "N/A":
|
155 |
-
profile.name = name
|
156 |
-
except Exception as e:
|
157 |
-
print(f"Error extracting name: {e}")
|
158 |
-
|
159 |
-
# Check and fill title if missing
|
160 |
-
if not profile.title or profile.title == "N/A":
|
161 |
-
try:
|
162 |
-
response = llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:1000])
|
163 |
-
title = response.content.strip()
|
164 |
-
if title and title != "N/A":
|
165 |
-
profile.title = title
|
166 |
-
except Exception as e:
|
167 |
-
print(f"Error extracting title: {e}")
|
168 |
-
|
169 |
-
# Check and fill email if missing
|
170 |
-
if not profile.email or profile.email == "N/A":
|
171 |
-
try:
|
172 |
-
response = llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
|
173 |
-
email = response.content.strip()
|
174 |
-
if email and email != "N/A" and "@" in email:
|
175 |
-
profile.email = email
|
176 |
-
except Exception as e:
|
177 |
-
print(f"Error extracting email: {e}")
|
178 |
-
|
179 |
-
# Similar checks for other fields
|
180 |
-
if not profile.bio or profile.bio == "N/A":
|
181 |
-
try:
|
182 |
-
response = llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
|
183 |
-
bio = response.content.strip()
|
184 |
-
if bio and bio != "N/A":
|
185 |
-
profile.bio = bio
|
186 |
-
except Exception as e:
|
187 |
-
print(f"Error creating bio: {e}")
|
188 |
-
|
189 |
-
# Check for education
|
190 |
-
if not profile.educations:
|
191 |
-
try:
|
192 |
-
education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
|
193 |
-
response = llm.invoke(education_prompt + "\n\n" + pdf_text)
|
194 |
-
education_text = response.content.strip()
|
195 |
-
|
196 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
# Try to extract JSON from the response
|
198 |
json_start = education_text.find('[')
|
199 |
json_end = education_text.rfind(']') + 1
|
@@ -211,168 +210,174 @@ def fill_missing_information(profile: Profile, pdf_text: str, llm) -> Profile:
|
|
211 |
endDate=edu.get("endDate", "")
|
212 |
)
|
213 |
profile.educations.append(education)
|
214 |
-
except Exception as
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
218 |
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
def extract_profile_information_fallback(pdf_text: str) -> Profile:
|
222 |
-
"""
|
223 |
-
Fallback method to extract profile information using the original approach.
|
224 |
-
This is used if the LangChain extraction fails.
|
225 |
-
"""
|
226 |
-
client = groq.Groq(api_key=GROQ_API_KEY)
|
227 |
|
228 |
-
|
229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
try:
|
231 |
chat_completion = client.chat.completions.create(
|
232 |
messages=[
|
233 |
{
|
234 |
"role": "user",
|
235 |
-
"content":
|
236 |
}
|
237 |
],
|
238 |
-
model=
|
239 |
-
temperature=
|
240 |
-
max_tokens=
|
241 |
)
|
242 |
return chat_completion.choices[0].message.content
|
243 |
except Exception as e:
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
# Prompts for each field
|
248 |
-
name_prompt = f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}"
|
249 |
-
title_prompt = f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}"
|
250 |
-
email_prompt = f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}"
|
251 |
-
bio_prompt = f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}"
|
252 |
-
tagline_prompt = f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}"
|
253 |
-
|
254 |
-
linkedin_prompt = f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}"
|
255 |
-
github_prompt = f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}"
|
256 |
-
instagram_prompt = f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}"
|
257 |
-
|
258 |
-
# Extract information using the LLM
|
259 |
-
name = get_llm_response(name_prompt).strip()
|
260 |
-
title = get_llm_response(title_prompt).strip()
|
261 |
-
email = get_llm_response(email_prompt).strip()
|
262 |
-
bio = get_llm_response(bio_prompt).strip()
|
263 |
-
tagline = get_llm_response(tagline_prompt).strip()
|
264 |
-
|
265 |
-
linkedin = get_llm_response(linkedin_prompt).strip()
|
266 |
-
github = get_llm_response(github_prompt).strip()
|
267 |
-
instagram = get_llm_response(instagram_prompt).strip()
|
268 |
|
269 |
-
project_prompt = f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}"
|
270 |
-
skill_prompt = f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}"
|
271 |
|
272 |
-
|
273 |
-
|
|
|
274 |
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
# Create SocialMedia object
|
280 |
-
social_media = SocialMedia(linkedin=linkedin if linkedin != 'N/A' else None,
|
281 |
-
github=github if github != 'N/A' else None,
|
282 |
-
instagram=instagram if instagram != 'N/A' else None)
|
283 |
-
|
284 |
-
# Process Projects
|
285 |
-
projects: List[Project] = []
|
286 |
-
if project_info != "N/A":
|
287 |
-
project_lines = project_info.split("\n")
|
288 |
-
for line in project_lines:
|
289 |
-
if ":" in line:
|
290 |
-
try:
|
291 |
-
project_title, project_description_techstack = line.split(":", 1)
|
292 |
-
project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
|
293 |
-
|
294 |
-
# Create a Project instance
|
295 |
-
project = Project(title=project_title.strip(), description=project_description.strip(), techStack=tech_stack.strip())
|
296 |
-
projects.append(project)
|
297 |
-
except ValueError as e:
|
298 |
-
print(f"Error parsing project: {line}. Error: {e}")
|
299 |
-
|
300 |
-
# Process Skills
|
301 |
-
skills: List[Skill] = []
|
302 |
-
if skills_info != "N/A":
|
303 |
-
skill_list = [skill.strip() for skill in skills_info.split(",")]
|
304 |
-
for skill_name in skill_list:
|
305 |
-
skills.append(Skill(name=skill_name))
|
306 |
-
|
307 |
-
# Process Education
|
308 |
-
educations: List[Education] = []
|
309 |
-
if education_info != "N/A":
|
310 |
-
education_lines = education_info.split("\n")
|
311 |
-
for line in education_lines:
|
312 |
-
if ":" in line:
|
313 |
-
try:
|
314 |
-
parts = line.split(":")
|
315 |
-
if len(parts) >= 5:
|
316 |
-
school = parts[0].strip()
|
317 |
-
degree = parts[1].strip()
|
318 |
-
field = parts[2].strip()
|
319 |
-
start_date = parts[3].strip()
|
320 |
-
end_date = parts[4].strip()
|
321 |
-
|
322 |
-
education = Education(
|
323 |
-
school=school,
|
324 |
-
degree=degree,
|
325 |
-
fieldOfStudy=field,
|
326 |
-
startDate=start_date,
|
327 |
-
endDate=end_date
|
328 |
-
)
|
329 |
-
educations.append(education)
|
330 |
-
except Exception as e:
|
331 |
-
print(f"Error parsing education: {line}. Error: {e}")
|
332 |
-
|
333 |
-
# Create and return the Profile object
|
334 |
-
profile = Profile(
|
335 |
-
name=name if name != 'N/A' else "N/A",
|
336 |
-
title=title if title != 'N/A' else "N/A",
|
337 |
-
email=email if email != 'N/A' else "N/A",
|
338 |
-
bio=bio if bio != 'N/A' else "N/A",
|
339 |
-
tagline=tagline if tagline != 'N/A' else None,
|
340 |
-
social = social_media if (social_media.github != None or social_media.instagram != None or social_media.linkedin != None ) else None,
|
341 |
-
chatbot = None,
|
342 |
-
profileImg = None,
|
343 |
-
heroImg = None,
|
344 |
-
projects = projects,
|
345 |
-
skills = skills,
|
346 |
-
educations = educations
|
347 |
-
)
|
348 |
-
|
349 |
-
return profile
|
350 |
|
351 |
def correct_grammar(text: str) -> str:
|
352 |
-
"""
|
353 |
-
|
354 |
-
|
355 |
-
Args:
|
356 |
-
text (str): The text to correct.
|
357 |
-
|
358 |
-
Returns:
|
359 |
-
str: The corrected text.
|
360 |
-
"""
|
361 |
-
client = groq.Groq(api_key=GROQ_API_KEY)
|
362 |
-
|
363 |
-
try:
|
364 |
-
chat_completion = client.chat.completions.create(
|
365 |
-
messages=[
|
366 |
-
{
|
367 |
-
"role": "user",
|
368 |
-
"content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
|
369 |
-
}
|
370 |
-
],
|
371 |
-
model="qwen-2.5-32b",
|
372 |
-
temperature=0.3,
|
373 |
-
max_tokens=1024
|
374 |
-
)
|
375 |
-
return chat_completion.choices[0].message.content
|
376 |
-
except Exception as e:
|
377 |
-
print(f"Error during grammar correction: {e}")
|
378 |
-
return text # Return original text if correction fails
|
|
|
1 |
import groq
|
2 |
from pdfextractor import extract_text_from_pdf
|
3 |
from models import Profile, SocialMedia, Project, Skill, Education
|
4 |
+
from typing import List, Dict, Any, Optional
|
5 |
from langchain.output_parsers import PydanticOutputParser
|
6 |
from langchain.prompts import PromptTemplate
|
7 |
from langchain_groq import ChatGroq
|
8 |
import json
|
9 |
+
from config import get_settings
|
10 |
|
11 |
+
settings = get_settings()
|
12 |
|
13 |
+
class ProfileExtractor:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"""
|
15 |
+
Class for extracting profile information from resume text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"""
|
17 |
+
def __init__(self):
|
18 |
+
self.groq_api_key = settings.GROQ_API_KEY
|
19 |
+
self.model_name = settings.MODEL_NAME
|
20 |
+
self.temperature = settings.TEMPERATURE
|
21 |
+
self.max_tokens = settings.MAX_TOKENS
|
22 |
+
self.llm = self._initialize_llm()
|
23 |
|
24 |
+
def _initialize_llm(self) -> ChatGroq:
|
25 |
+
"""Initialize the language model client"""
|
26 |
+
return ChatGroq(
|
27 |
+
groq_api_key=self.groq_api_key,
|
28 |
+
model_name=self.model_name,
|
29 |
+
temperature=self.temperature,
|
30 |
+
max_tokens=self.max_tokens
|
31 |
+
)
|
32 |
|
33 |
+
def extract_profile(self, pdf_text: str) -> Profile:
|
34 |
+
"""
|
35 |
+
Main method to extract profile information from PDF text
|
36 |
+
|
37 |
+
Args:
|
38 |
+
pdf_text: Text extracted from a resume PDF
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
Profile object with extracted information
|
42 |
+
"""
|
43 |
+
try:
|
44 |
+
profile = self._extract_with_langchain(pdf_text)
|
45 |
+
return profile
|
46 |
+
except Exception as e:
|
47 |
+
if settings.DEBUG:
|
48 |
+
print(f"LangChain extraction failed: {e}")
|
49 |
+
return self._extract_with_fallback(pdf_text)
|
50 |
|
51 |
+
def _extract_with_langchain(self, pdf_text: str) -> Profile:
|
52 |
+
"""Extract profile with structured LangChain approach"""
|
53 |
+
# Define the format instructions for the LLM
|
54 |
+
format_instructions = """
|
55 |
+
Extract the following information from the resume:
|
56 |
+
1. Full name
|
57 |
+
2. Professional title
|
58 |
+
3. Email address
|
59 |
+
4. Bio (a 50-100 word professional summary)
|
60 |
+
5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
|
61 |
+
6. Social media links (LinkedIn, GitHub, Instagram)
|
62 |
+
7. Projects (with title, description, and tech stack)
|
63 |
+
8. Skills
|
64 |
+
9. Education history (with school, degree, field of study, start date and end date)
|
65 |
+
|
66 |
+
Return the information in the following JSON format:
|
67 |
+
{
|
68 |
+
"name": "Full Name",
|
69 |
+
"title": "Professional Title",
|
70 |
+
"email": "[email protected]",
|
71 |
+
"bio": "Professional biography...",
|
72 |
+
"tagline": "Catchy professional tagline",
|
73 |
+
"social": {
|
74 |
+
"linkedin": "LinkedIn URL or null",
|
75 |
+
"github": "GitHub URL or null",
|
76 |
+
"instagram": "Instagram URL or null"
|
77 |
+
},
|
78 |
+
"projects": [
|
79 |
+
{
|
80 |
+
"title": "Project Title",
|
81 |
+
"description": "Project Description",
|
82 |
+
"techStack": "Technologies used"
|
83 |
+
}
|
84 |
+
],
|
85 |
+
"skills": [
|
86 |
+
{"name": "Skill 1"},
|
87 |
+
{"name": "Skill 2"}
|
88 |
+
],
|
89 |
+
"educations": [
|
90 |
+
{
|
91 |
+
"school": "University Name",
|
92 |
+
"degree": "Degree Type (e.g., Bachelor's, Master's)",
|
93 |
+
"fieldOfStudy": "Major or Field",
|
94 |
+
"startDate": "Start Year",
|
95 |
+
"endDate": "End Year or Present"
|
96 |
+
}
|
97 |
+
]
|
98 |
+
}
|
99 |
+
|
100 |
+
If any information is not available, use null for that field.
|
101 |
+
"""
|
102 |
+
|
103 |
+
# Create the prompt template
|
104 |
+
template = """
|
105 |
+
You are a professional resume parser. Extract structured information from the following resume:
|
106 |
+
|
107 |
+
{pdf_text}
|
108 |
+
|
109 |
+
{format_instructions}
|
110 |
+
"""
|
111 |
+
|
112 |
+
prompt = PromptTemplate(
|
113 |
+
template=template,
|
114 |
+
input_variables=["pdf_text"],
|
115 |
+
partial_variables={"format_instructions": format_instructions}
|
116 |
+
)
|
117 |
+
|
118 |
+
# Get the structured information from the LLM
|
119 |
+
chain = prompt | self.llm
|
120 |
result = chain.invoke({"pdf_text": pdf_text})
|
121 |
response_text = result.content
|
122 |
|
123 |
+
# Extract JSON from the response text (in case the LLM adds extra text)
|
124 |
+
json_start = response_text.find('{')
|
125 |
+
json_end = response_text.rfind('}') + 1
|
126 |
+
|
127 |
+
if json_start >= 0 and json_end > json_start:
|
128 |
+
json_str = response_text[json_start:json_end]
|
129 |
+
profile_dict = json.loads(json_str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
# Create a Profile object from the dictionary
|
132 |
+
profile = Profile.model_validate(profile_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
+
# Check for missing information and try to extract it if necessary
|
135 |
+
profile = self._fill_missing_information(profile, pdf_text)
|
136 |
+
|
137 |
+
return profile
|
138 |
+
else:
|
139 |
+
raise ValueError("No JSON found in the response")
|
|
|
|
|
|
|
140 |
|
141 |
+
def _fill_missing_information(self, profile: Profile, pdf_text: str) -> Profile:
|
142 |
+
"""
|
143 |
+
Attempts to fill in any missing information in the profile
|
144 |
+
"""
|
145 |
+
# Check and fill name if missing
|
146 |
+
if not profile.name or profile.name == "N/A":
|
147 |
+
try:
|
148 |
+
response = self.llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:settings.CHUNK_SIZE])
|
149 |
+
name = response.content.strip()
|
150 |
+
if name and name != "N/A":
|
151 |
+
profile.name = name
|
152 |
+
except Exception as e:
|
153 |
+
if settings.DEBUG:
|
154 |
+
print(f"Error extracting name: {e}")
|
155 |
|
156 |
+
# Check and fill title if missing
|
157 |
+
if not profile.title or profile.title == "N/A":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
try:
|
159 |
+
response = self.llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:settings.CHUNK_SIZE])
|
160 |
+
title = response.content.strip()
|
161 |
+
if title and title != "N/A":
|
162 |
+
profile.title = title
|
163 |
+
except Exception as e:
|
164 |
+
if settings.DEBUG:
|
165 |
+
print(f"Error extracting title: {e}")
|
166 |
+
|
167 |
+
# Check and fill email if missing
|
168 |
+
if not profile.email or profile.email == "N/A":
|
169 |
+
try:
|
170 |
+
response = self.llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
|
171 |
+
email = response.content.strip()
|
172 |
+
if email and email != "N/A" and "@" in email:
|
173 |
+
profile.email = email
|
174 |
+
except Exception as e:
|
175 |
+
if settings.DEBUG:
|
176 |
+
print(f"Error extracting email: {e}")
|
177 |
+
|
178 |
+
# Check and fill bio if missing
|
179 |
+
if not profile.bio or profile.bio == "N/A":
|
180 |
+
try:
|
181 |
+
response = self.llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
|
182 |
+
bio = response.content.strip()
|
183 |
+
if bio and bio != "N/A":
|
184 |
+
profile.bio = bio
|
185 |
+
except Exception as e:
|
186 |
+
if settings.DEBUG:
|
187 |
+
print(f"Error creating bio: {e}")
|
188 |
+
|
189 |
+
# Check for education if missing
|
190 |
+
if not profile.educations:
|
191 |
+
try:
|
192 |
+
education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
|
193 |
+
response = self.llm.invoke(education_prompt + "\n\n" + pdf_text)
|
194 |
+
education_text = response.content.strip()
|
195 |
+
|
196 |
# Try to extract JSON from the response
|
197 |
json_start = education_text.find('[')
|
198 |
json_end = education_text.rfind(']') + 1
|
|
|
210 |
endDate=edu.get("endDate", "")
|
211 |
)
|
212 |
profile.educations.append(education)
|
213 |
+
except Exception as e:
|
214 |
+
if settings.DEBUG:
|
215 |
+
print(f"Error extracting education: {e}")
|
216 |
+
|
217 |
+
return profile
|
218 |
|
219 |
+
def _extract_with_fallback(self, pdf_text: str) -> Profile:
|
220 |
+
"""Fallback method for profile extraction using direct API calls"""
|
221 |
+
client = groq.Groq(api_key=self.groq_api_key)
|
222 |
+
|
223 |
+
def get_llm_response(prompt: str) -> str:
|
224 |
+
"""Helper function to get a response from the LLM."""
|
225 |
+
try:
|
226 |
+
chat_completion = client.chat.completions.create(
|
227 |
+
messages=[{"role": "user", "content": prompt}],
|
228 |
+
model=self.model_name,
|
229 |
+
temperature=settings.FALLBACK_TEMPERATURE,
|
230 |
+
max_tokens=settings.MAX_TOKENS
|
231 |
+
)
|
232 |
+
return chat_completion.choices[0].message.content
|
233 |
+
except Exception as e:
|
234 |
+
if settings.DEBUG:
|
235 |
+
print(f"Error during LLM call: {e}")
|
236 |
+
return "" # Return empty string on failure
|
237 |
+
|
238 |
+
# Extract basic information
|
239 |
+
name = get_llm_response(f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}").strip()
|
240 |
+
title = get_llm_response(f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}").strip()
|
241 |
+
email = get_llm_response(f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}").strip()
|
242 |
+
bio = get_llm_response(f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}").strip()
|
243 |
+
tagline = get_llm_response(f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}").strip()
|
244 |
+
|
245 |
+
# Extract social media
|
246 |
+
linkedin = get_llm_response(f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}").strip()
|
247 |
+
github = get_llm_response(f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}").strip()
|
248 |
+
instagram = get_llm_response(f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}").strip()
|
249 |
+
|
250 |
+
# Extract projects and skills
|
251 |
+
project_info = get_llm_response(f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}").strip()
|
252 |
+
skills_info = get_llm_response(f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}").strip()
|
253 |
+
|
254 |
+
# Extract education
|
255 |
+
education_info = get_llm_response(f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}").strip()
|
256 |
+
|
257 |
+
# Process the extracted information
|
258 |
+
social_media = SocialMedia(
|
259 |
+
linkedin=linkedin if linkedin != 'N/A' else None,
|
260 |
+
github=github if github != 'N/A' else None,
|
261 |
+
instagram=instagram if instagram != 'N/A' else None
|
262 |
+
)
|
263 |
+
|
264 |
+
# Process projects
|
265 |
+
projects = []
|
266 |
+
if project_info != "N/A":
|
267 |
+
project_lines = project_info.split("\n")
|
268 |
+
for line in project_lines:
|
269 |
+
if ":" in line:
|
270 |
+
try:
|
271 |
+
project_title, project_description_techstack = line.split(":", 1)
|
272 |
+
project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
|
273 |
+
|
274 |
+
projects.append(Project(
|
275 |
+
title=project_title.strip(),
|
276 |
+
description=project_description.strip(),
|
277 |
+
techStack=tech_stack.strip()
|
278 |
+
))
|
279 |
+
except ValueError as e:
|
280 |
+
if settings.DEBUG:
|
281 |
+
print(f"Error parsing project: {line}. Error: {e}")
|
282 |
+
|
283 |
+
# Process skills
|
284 |
+
skills = []
|
285 |
+
if skills_info != "N/A":
|
286 |
+
skill_list = [skill.strip() for skill in skills_info.split(",")]
|
287 |
+
for skill_name in skill_list:
|
288 |
+
if skill_name:
|
289 |
+
skills.append(Skill(name=skill_name))
|
290 |
+
|
291 |
+
# Process education
|
292 |
+
educations = []
|
293 |
+
if education_info != "N/A":
|
294 |
+
education_lines = education_info.split("\n")
|
295 |
+
for line in education_lines:
|
296 |
+
if ":" in line:
|
297 |
+
try:
|
298 |
+
parts = line.split(":")
|
299 |
+
if len(parts) >= 5:
|
300 |
+
educations.append(Education(
|
301 |
+
school=parts[0].strip(),
|
302 |
+
degree=parts[1].strip(),
|
303 |
+
fieldOfStudy=parts[2].strip(),
|
304 |
+
startDate=parts[3].strip(),
|
305 |
+
endDate=parts[4].strip()
|
306 |
+
))
|
307 |
+
except Exception as e:
|
308 |
+
if settings.DEBUG:
|
309 |
+
print(f"Error parsing education: {line}. Error: {e}")
|
310 |
+
|
311 |
+
# Create the profile object
|
312 |
+
profile = Profile(
|
313 |
+
name=name if name != 'N/A' else "N/A",
|
314 |
+
title=title if title != 'N/A' else "N/A",
|
315 |
+
email=email if email != 'N/A' else "N/A",
|
316 |
+
bio=bio if bio != 'N/A' else "N/A",
|
317 |
+
tagline=tagline if tagline != 'N/A' else None,
|
318 |
+
social=social_media if (social_media.github or social_media.instagram or social_media.linkedin) else None,
|
319 |
+
chatbot=None,
|
320 |
+
profileImg=None,
|
321 |
+
heroImg=None,
|
322 |
+
projects=projects,
|
323 |
+
skills=skills,
|
324 |
+
educations=educations
|
325 |
+
)
|
326 |
+
|
327 |
+
return profile
|
328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
|
330 |
+
class GrammarCorrector:
|
331 |
+
"""Class for correcting grammar in text using LLM"""
|
332 |
+
|
333 |
+
def __init__(self):
|
334 |
+
self.groq_api_key = settings.GROQ_API_KEY
|
335 |
+
self.model_name = settings.MODEL_NAME
|
336 |
+
self.temperature = settings.GRAMMAR_CORRECTION_TEMPERATURE
|
337 |
+
|
338 |
+
def correct_grammar(self, text: str) -> str:
|
339 |
+
"""
|
340 |
+
Corrects grammar in user input using Groq's LLM.
|
341 |
+
|
342 |
+
Args:
|
343 |
+
text: The text to correct
|
344 |
+
|
345 |
+
Returns:
|
346 |
+
The corrected text
|
347 |
+
"""
|
348 |
+
if not text:
|
349 |
+
return text
|
350 |
+
|
351 |
+
client = groq.Groq(api_key=self.groq_api_key)
|
352 |
+
|
353 |
try:
|
354 |
chat_completion = client.chat.completions.create(
|
355 |
messages=[
|
356 |
{
|
357 |
"role": "user",
|
358 |
+
"content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
|
359 |
}
|
360 |
],
|
361 |
+
model=self.model_name,
|
362 |
+
temperature=self.temperature,
|
363 |
+
max_tokens=settings.MAX_TOKENS
|
364 |
)
|
365 |
return chat_completion.choices[0].message.content
|
366 |
except Exception as e:
|
367 |
+
if settings.DEBUG:
|
368 |
+
print(f"Error during grammar correction: {e}")
|
369 |
+
return text # Return original text if correction fails
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
|
|
|
|
371 |
|
372 |
+
# Create module-level instances for easier imports
|
373 |
+
profile_extractor = ProfileExtractor()
|
374 |
+
grammar_corrector = GrammarCorrector()
|
375 |
|
376 |
+
# Export functions for backward compatibility
|
377 |
+
def extract_profile_information(pdf_text: str) -> Profile:
|
378 |
+
"""Legacy function for backward compatibility"""
|
379 |
+
return profile_extractor.extract_profile(pdf_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
def correct_grammar(text: str) -> str:
|
382 |
+
"""Legacy function for backward compatibility"""
|
383 |
+
return grammar_corrector.correct_grammar(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agents/__init__.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Agents package for specialized AI tasks
|
3 |
+
"""
|
4 |
+
from .profile_extractor import ProfileExtractor, extract_profile_information
|
5 |
+
from .grammar_corrector import GrammarCorrector, correct_grammar
|
6 |
+
|
7 |
+
__all__ = [
|
8 |
+
'ProfileExtractor',
|
9 |
+
'GrammarCorrector',
|
10 |
+
'extract_profile_information',
|
11 |
+
'correct_grammar'
|
12 |
+
]
|
agents/grammar_corrector.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Agent for correcting grammar in text
|
3 |
+
"""
|
4 |
+
import groq
|
5 |
+
from config import get_settings
|
6 |
+
|
7 |
+
settings = get_settings()
|
8 |
+
|
9 |
+
class GrammarCorrector:
|
10 |
+
"""Class for correcting grammar in text using LLM"""
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
self.groq_api_key = settings.GROQ_API_KEY
|
14 |
+
self.model_name = settings.MODEL_NAME
|
15 |
+
self.temperature = settings.GRAMMAR_CORRECTION_TEMPERATURE
|
16 |
+
|
17 |
+
def correct_grammar(self, text: str) -> str:
|
18 |
+
"""
|
19 |
+
Corrects grammar in user input using Groq's LLM.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
text: The text to correct
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
The corrected text
|
26 |
+
"""
|
27 |
+
if not text:
|
28 |
+
return text
|
29 |
+
|
30 |
+
client = groq.Groq(api_key=self.groq_api_key)
|
31 |
+
|
32 |
+
try:
|
33 |
+
chat_completion = client.chat.completions.create(
|
34 |
+
messages=[
|
35 |
+
{
|
36 |
+
"role": "user",
|
37 |
+
"content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
|
38 |
+
}
|
39 |
+
],
|
40 |
+
model=self.model_name,
|
41 |
+
temperature=self.temperature,
|
42 |
+
max_tokens=settings.MAX_TOKENS
|
43 |
+
)
|
44 |
+
return chat_completion.choices[0].message.content
|
45 |
+
except Exception as e:
|
46 |
+
if settings.DEBUG:
|
47 |
+
print(f"Error during grammar correction: {e}")
|
48 |
+
return text # Return original text if correction fails
|
49 |
+
|
50 |
+
|
51 |
+
# Create module-level instance for easier imports
|
52 |
+
grammar_corrector = GrammarCorrector()
|
53 |
+
|
54 |
+
# Export function for backward compatibility
|
55 |
+
def correct_grammar(text: str) -> str:
|
56 |
+
"""Legacy function for backward compatibility"""
|
57 |
+
return grammar_corrector.correct_grammar(text)
|
agents/profile_extractor.py
ADDED
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Agent for extracting profile information from resumes
|
3 |
+
"""
|
4 |
+
import groq
|
5 |
+
from models import Profile, SocialMedia, Project, Skill, Education
|
6 |
+
from typing import List, Dict, Any, Optional
|
7 |
+
from langchain.output_parsers import PydanticOutputParser
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
from langchain_groq import ChatGroq
|
10 |
+
import json
|
11 |
+
from config import get_settings
|
12 |
+
import logging
|
13 |
+
|
14 |
+
settings = get_settings()
|
15 |
+
|
16 |
+
# Configure logging
|
17 |
+
logging.basicConfig(
|
18 |
+
level=logging.DEBUG if settings.DEBUG else logging.INFO,
|
19 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
20 |
+
)
|
21 |
+
logger = logging.getLogger(__name__)
|
22 |
+
|
23 |
+
class ProfileExtractor:
|
24 |
+
"""
|
25 |
+
Class for extracting profile information from resume text
|
26 |
+
"""
|
27 |
+
def __init__(self):
|
28 |
+
logger.debug("Initializing ProfileExtractor")
|
29 |
+
self.groq_api_key = settings.GROQ_API_KEY
|
30 |
+
self.model_name = settings.MODEL_NAME
|
31 |
+
self.temperature = settings.TEMPERATURE
|
32 |
+
self.max_tokens = settings.MAX_TOKENS
|
33 |
+
self.llm = self._initialize_llm()
|
34 |
+
|
35 |
+
def _initialize_llm(self) -> ChatGroq:
|
36 |
+
"""Initialize the language model client"""
|
37 |
+
logger.debug("Initializing language model client")
|
38 |
+
return ChatGroq(
|
39 |
+
groq_api_key=self.groq_api_key,
|
40 |
+
model_name=self.model_name,
|
41 |
+
temperature=self.temperature,
|
42 |
+
max_tokens=self.max_tokens
|
43 |
+
)
|
44 |
+
|
45 |
+
def extract_profile(self, pdf_text: str) -> Profile:
|
46 |
+
"""
|
47 |
+
Main method to extract profile information from PDF text
|
48 |
+
|
49 |
+
Args:
|
50 |
+
pdf_text: Text extracted from a resume PDF
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
Profile object with extracted information
|
54 |
+
"""
|
55 |
+
logger.info("Extracting profile information")
|
56 |
+
try:
|
57 |
+
profile = self._extract_with_langchain(pdf_text)
|
58 |
+
logger.info("Profile extracted successfully with LangChain")
|
59 |
+
return profile
|
60 |
+
except Exception as e:
|
61 |
+
logger.error(f"LangChain extraction failed: {e}")
|
62 |
+
if settings.DEBUG:
|
63 |
+
print(f"LangChain extraction failed: {e}")
|
64 |
+
return self._extract_with_fallback(pdf_text)
|
65 |
+
|
66 |
+
def _extract_with_langchain(self, pdf_text: str) -> Profile:
|
67 |
+
"""Extract profile with structured LangChain approach"""
|
68 |
+
logger.debug("Extracting profile with LangChain")
|
69 |
+
format_instructions = """
|
70 |
+
Extract the following information from the resume:
|
71 |
+
1. Full name
|
72 |
+
2. Professional title
|
73 |
+
3. Email address
|
74 |
+
4. Bio (a 50-100 word professional summary)
|
75 |
+
5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
|
76 |
+
6. Social media links (LinkedIn, GitHub, Instagram)
|
77 |
+
7. Projects (with title, description, and tech stack)
|
78 |
+
8. Skills
|
79 |
+
9. Education history (with school, degree, field of study, start date and end date)
|
80 |
+
|
81 |
+
Return the information in the following JSON format:
|
82 |
+
{
|
83 |
+
"name": "Full Name",
|
84 |
+
"title": "Professional Title",
|
85 |
+
"email": "[email protected]",
|
86 |
+
"bio": "Professional biography...",
|
87 |
+
"tagline": "Catchy professional tagline",
|
88 |
+
"social": {
|
89 |
+
"linkedin": "LinkedIn URL or null",
|
90 |
+
"github": "GitHub URL or null",
|
91 |
+
"instagram": "Instagram URL or null"
|
92 |
+
},
|
93 |
+
"projects": [
|
94 |
+
{
|
95 |
+
"title": "Project Title",
|
96 |
+
"description": "Project Description",
|
97 |
+
"techStack": "Technologies used"
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"skills": [
|
101 |
+
{"name": "Skill 1"},
|
102 |
+
{"name": "Skill 2"}
|
103 |
+
],
|
104 |
+
"educations": [
|
105 |
+
{
|
106 |
+
"school": "University Name",
|
107 |
+
"degree": "Degree Type (e.g., Bachelor's, Master's)",
|
108 |
+
"fieldOfStudy": "Major or Field",
|
109 |
+
"startDate": "Start Year",
|
110 |
+
"endDate": "End Year or Present"
|
111 |
+
}
|
112 |
+
]
|
113 |
+
}
|
114 |
+
|
115 |
+
If any information is not available, use null for that field.
|
116 |
+
"""
|
117 |
+
|
118 |
+
template = """
|
119 |
+
You are a professional resume parser. Extract structured information from the following resume:
|
120 |
+
|
121 |
+
{pdf_text}
|
122 |
+
|
123 |
+
{format_instructions}
|
124 |
+
"""
|
125 |
+
|
126 |
+
prompt = PromptTemplate(
|
127 |
+
template=template,
|
128 |
+
input_variables=["pdf_text"],
|
129 |
+
partial_variables={"format_instructions": format_instructions}
|
130 |
+
)
|
131 |
+
|
132 |
+
chain = prompt | self.llm
|
133 |
+
result = chain.invoke({"pdf_text": pdf_text})
|
134 |
+
response_text = result.content
|
135 |
+
|
136 |
+
json_start = response_text.find('{')
|
137 |
+
json_end = response_text.rfind('}') + 1
|
138 |
+
|
139 |
+
if json_start >= 0 and json_end > json_start:
|
140 |
+
json_str = response_text[json_start:json_end]
|
141 |
+
profile_dict = json.loads(json_str)
|
142 |
+
profile = Profile.model_validate(profile_dict)
|
143 |
+
profile = self._fill_missing_information(profile, pdf_text)
|
144 |
+
logger.debug("Profile extracted and validated")
|
145 |
+
return profile
|
146 |
+
else:
|
147 |
+
logger.error("No JSON found in the response")
|
148 |
+
raise ValueError("No JSON found in the response")
|
149 |
+
|
150 |
+
def _fill_missing_information(self, profile: Profile, pdf_text: str) -> Profile:
|
151 |
+
"""
|
152 |
+
Attempts to fill in any missing information in the profile
|
153 |
+
"""
|
154 |
+
logger.debug("Filling missing information in the profile")
|
155 |
+
if not profile.name or profile.name == "N/A":
|
156 |
+
try:
|
157 |
+
response = self.llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:settings.CHUNK_SIZE])
|
158 |
+
name = response.content.strip()
|
159 |
+
if name and name != "N/A":
|
160 |
+
profile.name = name
|
161 |
+
logger.debug(f"Extracted name: {name}")
|
162 |
+
except Exception as e:
|
163 |
+
logger.error(f"Error extracting name: {e}")
|
164 |
+
|
165 |
+
if not profile.title or profile.title == "N/A":
|
166 |
+
try:
|
167 |
+
response = self.llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:settings.CHUNK_SIZE])
|
168 |
+
title = response.content.strip()
|
169 |
+
if title and title != "N/A":
|
170 |
+
profile.title = title
|
171 |
+
logger.debug(f"Extracted title: {title}")
|
172 |
+
except Exception as e:
|
173 |
+
logger.error(f"Error extracting title: {e}")
|
174 |
+
|
175 |
+
if not profile.email or profile.email == "N/A":
|
176 |
+
try:
|
177 |
+
response = self.llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
|
178 |
+
email = response.content.strip()
|
179 |
+
if email and email != "N/A" and "@" in email:
|
180 |
+
profile.email = email
|
181 |
+
logger.debug(f"Extracted email: {email}")
|
182 |
+
except Exception as e:
|
183 |
+
logger.error(f"Error extracting email: {e}")
|
184 |
+
|
185 |
+
if not profile.bio or profile.bio == "N/A":
|
186 |
+
try:
|
187 |
+
response = self.llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
|
188 |
+
bio = response.content.strip()
|
189 |
+
if bio and bio != "N/A":
|
190 |
+
profile.bio = bio
|
191 |
+
logger.debug(f"Created bio: {bio}")
|
192 |
+
except Exception as e:
|
193 |
+
logger.error(f"Error creating bio: {e}")
|
194 |
+
|
195 |
+
if not profile.educations:
|
196 |
+
try:
|
197 |
+
education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
|
198 |
+
response = self.llm.invoke(education_prompt + "\n\n" + pdf_text)
|
199 |
+
education_text = response.content.strip()
|
200 |
+
|
201 |
+
json_start = education_text.find('[')
|
202 |
+
json_end = education_text.rfind(']') + 1
|
203 |
+
|
204 |
+
if json_start >= 0 and json_end > json_start:
|
205 |
+
edu_json = education_text[json_start:json_end]
|
206 |
+
educations = json.loads(edu_json)
|
207 |
+
|
208 |
+
for edu in educations:
|
209 |
+
education = Education(
|
210 |
+
school=edu.get("school", "Unknown"),
|
211 |
+
degree=edu.get("degree", ""),
|
212 |
+
fieldOfStudy=edu.get("fieldOfStudy", ""),
|
213 |
+
startDate=edu.get("startDate", ""),
|
214 |
+
endDate=edu.get("endDate", "")
|
215 |
+
)
|
216 |
+
profile.educations.append(education)
|
217 |
+
logger.debug(f"Added education: {education}")
|
218 |
+
except Exception as e:
|
219 |
+
logger.error(f"Error extracting education: {e}")
|
220 |
+
if profile.skills:
|
221 |
+
try:
|
222 |
+
response = self.llm.invoke("Extract a top 8 of skills from this resume text, separated by commas. Respond with just the skills: " + ", ".join([skill.name for skill in profile.skills]))
|
223 |
+
skills = response.content.split(",")
|
224 |
+
for skill in skills:
|
225 |
+
if skill:
|
226 |
+
profile.topSkills.append(Skill(name=skill.strip()))
|
227 |
+
logger.debug(f"Added skill: {skill}")
|
228 |
+
except Exception as e:
|
229 |
+
logger.error(f"Error extracting skills: {e}")
|
230 |
+
|
231 |
+
return profile
|
232 |
+
|
233 |
+
def _extract_with_fallback(self, pdf_text: str) -> Profile:
|
234 |
+
"""Fallback method for profile extraction using direct API calls"""
|
235 |
+
logger.debug("Extracting profile with fallback method")
|
236 |
+
client = groq.Groq(api_key=self.groq_api_key)
|
237 |
+
|
238 |
+
def get_llm_response(prompt: str) -> str:
|
239 |
+
"""Helper function to get a response from the LLM."""
|
240 |
+
try:
|
241 |
+
chat_completion = client.chat.completions.create(
|
242 |
+
messages=[{"role": "user", "content": prompt}],
|
243 |
+
model=self.model_name,
|
244 |
+
temperature=settings.FALLBACK_TEMPERATURE,
|
245 |
+
max_tokens=settings.MAX_TOKENS
|
246 |
+
)
|
247 |
+
return chat_completion.choices[0].message.content
|
248 |
+
except Exception as e:
|
249 |
+
logger.error(f"Error during LLM call: {e}")
|
250 |
+
return "" # Return empty string on failure
|
251 |
+
|
252 |
+
name = get_llm_response(f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}").strip()
|
253 |
+
title = get_llm_response(f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}").strip()
|
254 |
+
email = get_llm_response(f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}").strip()
|
255 |
+
bio = get_llm_response(f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}").strip()
|
256 |
+
tagline = get_llm_response(f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}").strip()
|
257 |
+
|
258 |
+
linkedin = get_llm_response(f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}").strip()
|
259 |
+
github = get_llm_response(f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}").strip()
|
260 |
+
instagram = get_llm_response(f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}").strip()
|
261 |
+
|
262 |
+
project_info = get_llm_response(f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}").strip()
|
263 |
+
skills_info = get_llm_response(f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}").strip()
|
264 |
+
|
265 |
+
education_info = get_llm_response(f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}").strip()
|
266 |
+
|
267 |
+
social_media = SocialMedia(
|
268 |
+
linkedin=linkedin if linkedin != 'N/A' else None,
|
269 |
+
github=github if github != 'N/A' else None,
|
270 |
+
instagram=instagram if instagram != 'N/A' else None
|
271 |
+
)
|
272 |
+
|
273 |
+
projects = []
|
274 |
+
if project_info != "N/A":
|
275 |
+
project_lines = project_info.split("\n")
|
276 |
+
for line in project_lines:
|
277 |
+
if ":" in line:
|
278 |
+
try:
|
279 |
+
project_title, project_description_techstack = line.split(":", 1)
|
280 |
+
project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
|
281 |
+
|
282 |
+
projects.append(Project(
|
283 |
+
title=project_title.strip(),
|
284 |
+
description=project_description.strip(),
|
285 |
+
techStack=tech_stack.strip()
|
286 |
+
))
|
287 |
+
logger.debug(f"Added project: {project_title.strip()}")
|
288 |
+
except ValueError as e:
|
289 |
+
logger.error(f"Error parsing project: {line}. Error: {e}")
|
290 |
+
|
291 |
+
skills = []
|
292 |
+
if skills_info != "N/A":
|
293 |
+
skill_list = [skill.strip() for skill in skills_info.split(",")]
|
294 |
+
for skill_name in skill_list:
|
295 |
+
if skill_name:
|
296 |
+
skills.append(Skill(name=skill_name))
|
297 |
+
logger.debug(f"Added skill: {skill_name}")
|
298 |
+
|
299 |
+
educations = []
|
300 |
+
if education_info != "N/A":
|
301 |
+
education_lines = education_info.split("\n")
|
302 |
+
for line in education_lines:
|
303 |
+
if ":" in line:
|
304 |
+
try:
|
305 |
+
parts = line.split(":")
|
306 |
+
if len(parts) >= 5:
|
307 |
+
educations.append(Education(
|
308 |
+
school=parts[0].strip(),
|
309 |
+
degree=parts[1].strip(),
|
310 |
+
fieldOfStudy=parts[2].strip(),
|
311 |
+
startDate=parts[3].strip(),
|
312 |
+
endDate=parts[4].strip()
|
313 |
+
))
|
314 |
+
logger.debug(f"Added education: {parts[0].strip()}")
|
315 |
+
except Exception as e:
|
316 |
+
logger.error(f"Error parsing education: {line}. Error: {e}")
|
317 |
+
|
318 |
+
profile = Profile(
|
319 |
+
name=name if name != 'N/A' else "N/A",
|
320 |
+
title=title if title != 'N/A' else "N/A",
|
321 |
+
email=email if email != 'N/A' else "N/A",
|
322 |
+
bio=bio if bio != 'N/A' else "N/A",
|
323 |
+
tagline=tagline if tagline != 'N/A' else None,
|
324 |
+
social=social_media if (social_media.github or social_media.instagram or social_media.linkedin) else None,
|
325 |
+
chatbot=None,
|
326 |
+
profileImg=None,
|
327 |
+
heroImg=None,
|
328 |
+
projects=projects,
|
329 |
+
skills=skills,
|
330 |
+
educations=educations
|
331 |
+
)
|
332 |
+
|
333 |
+
logger.info("Profile extracted successfully with fallback method")
|
334 |
+
return profile
|
335 |
+
|
336 |
+
|
337 |
+
# Create module-level instance for easier imports
|
338 |
+
profile_extractor = ProfileExtractor()
|
339 |
+
|
340 |
+
# Export function for backward compatibility
|
341 |
+
def extract_profile_information(pdf_text: str) -> Profile:
|
342 |
+
"""Legacy function for backward compatibility"""
|
343 |
+
return profile_extractor.extract_profile(pdf_text)
|
344 |
+
|
345 |
+
# Export the class and the function
|
346 |
+
__all__ = ['ProfileExtractor', 'extract_profile_information']
|
api.py
CHANGED
@@ -1,9 +1,21 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
from pymongo import MongoClient
|
3 |
from bson.objectid import ObjectId
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
import json
|
6 |
from bson import json_util
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
app = FastAPI(title="Profile API", description="API to retrieve profile information")
|
9 |
|
@@ -18,17 +30,21 @@ app.add_middleware(
|
|
18 |
|
19 |
# MongoDB connection configuration
|
20 |
def get_db_connection():
|
|
|
21 |
try:
|
22 |
-
client = MongoClient(
|
|
|
|
|
|
|
23 |
# Test the connection
|
24 |
client.server_info()
|
25 |
-
return client[
|
26 |
except Exception as e:
|
27 |
-
|
28 |
raise HTTPException(status_code=500, detail=f"Database connection error: {str(e)}")
|
29 |
|
30 |
@app.get("/api/profile/{profile_id}")
|
31 |
-
async def get_profile(profile_id: str):
|
32 |
"""
|
33 |
Retrieve a profile by its MongoDB ID
|
34 |
|
@@ -41,12 +57,13 @@ async def get_profile(profile_id: str):
|
|
41 |
try:
|
42 |
# Connect to MongoDB
|
43 |
db = get_db_connection()
|
44 |
-
collection = db[
|
45 |
|
46 |
# Try to parse the profile_id as an ObjectId
|
47 |
try:
|
48 |
obj_id = ObjectId(profile_id)
|
49 |
-
except:
|
|
|
50 |
raise HTTPException(status_code=400, detail=f"Invalid profile ID format: {profile_id}")
|
51 |
|
52 |
# Find the profile by ID
|
@@ -54,10 +71,12 @@ async def get_profile(profile_id: str):
|
|
54 |
|
55 |
# Check if profile exists
|
56 |
if not profile:
|
|
|
57 |
raise HTTPException(status_code=404, detail=f"Profile with ID {profile_id} not found")
|
58 |
|
59 |
# Convert MongoDB document to JSON serializable format
|
60 |
profile_json = json.loads(json_util.dumps(profile))
|
|
|
61 |
|
62 |
return profile_json
|
63 |
|
@@ -66,11 +85,11 @@ async def get_profile(profile_id: str):
|
|
66 |
raise
|
67 |
except Exception as e:
|
68 |
# Handle any other exceptions
|
|
|
69 |
raise HTTPException(status_code=500, detail=f"Error retrieving profile: {str(e)}")
|
70 |
|
71 |
-
# Add new endpoint for profile images
|
72 |
@app.get("/api/profile/{profile_id}/image")
|
73 |
-
async def get_profile_image(profile_id: str):
|
74 |
"""
|
75 |
Retrieve just the profile image for a given profile ID
|
76 |
|
@@ -82,7 +101,7 @@ async def get_profile_image(profile_id: str):
|
|
82 |
"""
|
83 |
try:
|
84 |
db = get_db_connection()
|
85 |
-
collection = db[
|
86 |
|
87 |
try:
|
88 |
obj_id = ObjectId(profile_id)
|
@@ -106,6 +125,7 @@ async def get_profile_image(profile_id: str):
|
|
106 |
except HTTPException:
|
107 |
raise
|
108 |
except Exception as e:
|
|
|
109 |
raise HTTPException(status_code=500, detail=f"Error retrieving profile image: {str(e)}")
|
110 |
|
111 |
if __name__ == "__main__":
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Depends
|
2 |
from pymongo import MongoClient
|
3 |
from bson.objectid import ObjectId
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
import json
|
6 |
from bson import json_util
|
7 |
+
from config import get_settings
|
8 |
+
from typing import Dict, Any
|
9 |
+
import logging
|
10 |
+
|
11 |
+
settings = get_settings()
|
12 |
+
|
13 |
+
# Configure logging
|
14 |
+
logging.basicConfig(
|
15 |
+
level=logging.DEBUG if settings.DEBUG else logging.INFO,
|
16 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
17 |
+
)
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
|
20 |
app = FastAPI(title="Profile API", description="API to retrieve profile information")
|
21 |
|
|
|
30 |
|
31 |
# MongoDB connection configuration
|
32 |
def get_db_connection():
|
33 |
+
"""Get MongoDB database connection"""
|
34 |
try:
|
35 |
+
client = MongoClient(
|
36 |
+
settings.MONGODB_URI,
|
37 |
+
serverSelectionTimeoutMS=settings.MONGODB_TIMEOUT_MS
|
38 |
+
)
|
39 |
# Test the connection
|
40 |
client.server_info()
|
41 |
+
return client[settings.MONGODB_DB]
|
42 |
except Exception as e:
|
43 |
+
logger.error(f"Error connecting to MongoDB: {e}")
|
44 |
raise HTTPException(status_code=500, detail=f"Database connection error: {str(e)}")
|
45 |
|
46 |
@app.get("/api/profile/{profile_id}")
|
47 |
+
async def get_profile(profile_id: str) -> Dict[str, Any]:
|
48 |
"""
|
49 |
Retrieve a profile by its MongoDB ID
|
50 |
|
|
|
57 |
try:
|
58 |
# Connect to MongoDB
|
59 |
db = get_db_connection()
|
60 |
+
collection = db[settings.MONGODB_COLLECTION]
|
61 |
|
62 |
# Try to parse the profile_id as an ObjectId
|
63 |
try:
|
64 |
obj_id = ObjectId(profile_id)
|
65 |
+
except Exception as id_error:
|
66 |
+
logger.error(f"Invalid profile ID: {profile_id}, error: {id_error}")
|
67 |
raise HTTPException(status_code=400, detail=f"Invalid profile ID format: {profile_id}")
|
68 |
|
69 |
# Find the profile by ID
|
|
|
71 |
|
72 |
# Check if profile exists
|
73 |
if not profile:
|
74 |
+
logger.warning(f"Profile not found: {profile_id}")
|
75 |
raise HTTPException(status_code=404, detail=f"Profile with ID {profile_id} not found")
|
76 |
|
77 |
# Convert MongoDB document to JSON serializable format
|
78 |
profile_json = json.loads(json_util.dumps(profile))
|
79 |
+
logger.debug(f"Retrieved profile: {profile_id}")
|
80 |
|
81 |
return profile_json
|
82 |
|
|
|
85 |
raise
|
86 |
except Exception as e:
|
87 |
# Handle any other exceptions
|
88 |
+
logger.error(f"Error retrieving profile {profile_id}: {e}")
|
89 |
raise HTTPException(status_code=500, detail=f"Error retrieving profile: {str(e)}")
|
90 |
|
|
|
91 |
@app.get("/api/profile/{profile_id}/image")
|
92 |
+
async def get_profile_image(profile_id: str) -> Dict[str, Any]:
|
93 |
"""
|
94 |
Retrieve just the profile image for a given profile ID
|
95 |
|
|
|
101 |
"""
|
102 |
try:
|
103 |
db = get_db_connection()
|
104 |
+
collection = db[settings.MONGODB_COLLECTION]
|
105 |
|
106 |
try:
|
107 |
obj_id = ObjectId(profile_id)
|
|
|
125 |
except HTTPException:
|
126 |
raise
|
127 |
except Exception as e:
|
128 |
+
logger.error(f"Error retrieving profile image {profile_id}: {e}")
|
129 |
raise HTTPException(status_code=500, detail=f"Error retrieving profile image: {str(e)}")
|
130 |
|
131 |
if __name__ == "__main__":
|
app.py
CHANGED
@@ -1,65 +1,32 @@
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
import os
|
4 |
import json
|
5 |
import traceback
|
6 |
import base64
|
7 |
-
|
8 |
-
from
|
|
|
|
|
|
|
|
|
|
|
9 |
from models import Skill, Project, Education, SocialMedia
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
"name": profile.name,
|
14 |
-
"title": profile.title,
|
15 |
-
"email": profile.email,
|
16 |
-
"bio": profile.bio,
|
17 |
-
"tagline": profile.tagline if profile.tagline else "",
|
18 |
-
"social": {
|
19 |
-
"linkedin": profile.social.linkedin if profile.social and profile.social.linkedin else "",
|
20 |
-
"github": profile.social.github if profile.social and profile.social.github else "",
|
21 |
-
"instagram": profile.social.instagram if profile.social and profile.social.instagram else ""
|
22 |
-
},
|
23 |
-
"profileImg": profile.profileImg if profile.profileImg else "",
|
24 |
-
"projects": [
|
25 |
-
{
|
26 |
-
"title": project.title,
|
27 |
-
"description": project.description,
|
28 |
-
"techStack": project.techStack if project.techStack else "",
|
29 |
-
"githubUrl": project.githubUrl if project.githubUrl else "",
|
30 |
-
"demoUrl": project.demoUrl if project.demoUrl else ""
|
31 |
-
} for project in profile.projects
|
32 |
-
] if profile.projects else [],
|
33 |
-
"skills": [skill.name for skill in profile.skills] if profile.skills else [],
|
34 |
-
"educations": [
|
35 |
-
{
|
36 |
-
"school": edu.school,
|
37 |
-
"degree": edu.degree,
|
38 |
-
"fieldOfStudy": edu.fieldOfStudy,
|
39 |
-
"startDate": edu.startDate,
|
40 |
-
"endDate": edu.endDate
|
41 |
-
} for edu in profile.educations
|
42 |
-
] if profile.educations else []
|
43 |
-
}
|
44 |
-
|
45 |
-
def store_profile(profile_dict):
|
46 |
-
try:
|
47 |
-
client = MongoClient("mongodb://localhost:27017/", serverSelectionTimeoutMS=5000)
|
48 |
-
# Test the connection
|
49 |
-
client.server_info()
|
50 |
-
|
51 |
-
db = client["profileDB"]
|
52 |
-
collection = db["profiles"]
|
53 |
-
result = collection.insert_one(profile_dict)
|
54 |
-
return str(result.inserted_id)
|
55 |
-
except Exception as e:
|
56 |
-
st.error(f"Erreur de connexion à MongoDB: {str(e)}")
|
57 |
-
# Fallback: save to JSON file
|
58 |
-
file_path = f"profile_{profile_dict['name'].replace(' ', '_')}.json"
|
59 |
-
with open(file_path, 'w') as f:
|
60 |
-
json.dump(profile_dict, f, indent=2)
|
61 |
-
return f"Sauvegardé dans le fichier {file_path}"
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
def collect_missing_data(profile):
|
64 |
"""
|
65 |
Collects missing data from user input when automatic extraction fails.
|
@@ -92,7 +59,7 @@ def collect_missing_data(profile):
|
|
92 |
|
93 |
if profile.bio and profile.bio != "N/A":
|
94 |
if st.button("Improve Bio Grammar"):
|
95 |
-
profile.bio = correct_grammar(profile.bio)
|
96 |
st.success("Grammar corrected!")
|
97 |
|
98 |
# Optional information
|
@@ -135,7 +102,7 @@ def collect_missing_data(profile):
|
|
135 |
num_new_edu = st.number_input("Number of additional education entries:", min_value=1, max_value=5, value=1)
|
136 |
offset = len(profile.educations) if profile.educations else 0
|
137 |
|
138 |
-
for i in range(num_new_edu):
|
139 |
st.write(f"Additional Education #{i+1}")
|
140 |
school = st.text_input(f"School:", key=f"new_school_{offset+i}")
|
141 |
degree = st.text_input(f"Degree:", key=f"new_degree_{offset+i}")
|
@@ -187,7 +154,7 @@ def collect_missing_data(profile):
|
|
187 |
num_new_proj = st.number_input("Number of additional projects:", min_value=1, max_value=5, value=1)
|
188 |
offset = len(profile.projects) if profile.projects else 0
|
189 |
|
190 |
-
for i in range(num_new_proj):
|
191 |
st.write(f"Additional Project #{i+1}")
|
192 |
title = st.text_input(f"Title:", key=f"new_proj_title_{offset+i}")
|
193 |
description = st.text_area(f"Description:", key=f"new_proj_desc_{offset+i}")
|
@@ -196,8 +163,9 @@ def collect_missing_data(profile):
|
|
196 |
demo_url = st.text_input(f"Demo URL (optional):", key=f"new_proj_demo_{offset+i}")
|
197 |
|
198 |
if title and description: # Only add if title and description are provided
|
199 |
-
|
200 |
-
|
|
|
201 |
st.success("Grammar corrected!")
|
202 |
|
203 |
project_data.append({
|
@@ -233,10 +201,81 @@ def collect_missing_data(profile):
|
|
233 |
|
234 |
return profile
|
235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
def main():
|
237 |
-
|
|
|
|
|
|
|
238 |
|
239 |
-
#
|
240 |
if 'profile' not in st.session_state:
|
241 |
st.session_state.profile = None
|
242 |
if 'extraction_complete' not in st.session_state:
|
@@ -246,135 +285,101 @@ def main():
|
|
246 |
if 'profile_saved' not in st.session_state:
|
247 |
st.session_state.profile_saved = False
|
248 |
|
249 |
-
# Step 1: Upload PDF
|
250 |
if not st.session_state.extraction_complete:
|
251 |
-
uploaded_file = st.file_uploader("Upload a PDF
|
252 |
|
253 |
if uploaded_file is not None:
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
264 |
st.session_state.profile = profile
|
265 |
st.session_state.extraction_complete = True
|
266 |
st.experimental_rerun()
|
267 |
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
os.remove("temp.pdf")
|
280 |
|
281 |
-
# Step 2:
|
282 |
elif not st.session_state.user_input_complete:
|
283 |
-
|
284 |
-
profile = collect_missing_data(profile)
|
285 |
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
-
# Step 3:
|
293 |
elif not st.session_state.profile_saved:
|
294 |
profile = st.session_state.profile
|
295 |
|
296 |
try:
|
297 |
-
#
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
# Show API access information
|
303 |
-
st.info(f"Access this profile via API: http://localhost:8000/api/profile/{inserted_id}")
|
304 |
|
305 |
-
st.
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
if profile.profileImg:
|
310 |
-
st.image(profile.profileImg, width=150)
|
311 |
|
312 |
-
#
|
313 |
-
|
314 |
-
"Field": ["Name", "Title", "Email", "Bio", "Tagline"],
|
315 |
-
"Value": [
|
316 |
-
profile.name,
|
317 |
-
profile.title,
|
318 |
-
profile.email,
|
319 |
-
profile.bio,
|
320 |
-
profile.tagline if profile.tagline else ""
|
321 |
-
]
|
322 |
-
}
|
323 |
-
st.table(basic_data)
|
324 |
-
|
325 |
-
# Display social media if available
|
326 |
-
if profile.social:
|
327 |
-
social_data = {
|
328 |
-
"Platform": ["LinkedIn", "GitHub", "Instagram"],
|
329 |
-
"URL": [
|
330 |
-
profile.social.linkedin if profile.social.linkedin else "",
|
331 |
-
profile.social.github if profile.social.github else "",
|
332 |
-
profile.social.instagram if profile.social.instagram else ""
|
333 |
-
]
|
334 |
-
}
|
335 |
-
st.subheader("Social Media")
|
336 |
-
st.table(social_data)
|
337 |
-
|
338 |
-
# Display education in a table if available
|
339 |
-
if profile.educations:
|
340 |
-
education_data = {
|
341 |
-
"School": [edu.school for edu in profile.educations],
|
342 |
-
"Degree": [edu.degree for edu in profile.educations],
|
343 |
-
"Field of Study": [edu.fieldOfStudy for edu in profile.educations],
|
344 |
-
"Start Date": [edu.startDate for edu in profile.educations],
|
345 |
-
"End Date": [edu.endDate for edu in profile.educations]
|
346 |
-
}
|
347 |
-
st.subheader("Education")
|
348 |
-
st.table(education_data)
|
349 |
|
350 |
-
# Display
|
351 |
-
|
352 |
-
projects_data = {
|
353 |
-
"Title": [project.title for project in profile.projects],
|
354 |
-
"Description": [project.description for project in profile.projects],
|
355 |
-
"Tech Stack": [project.techStack if project.techStack else "" for project in profile.projects],
|
356 |
-
"GitHub": [project.githubUrl if project.githubUrl else "" for project in profile.projects],
|
357 |
-
"Demo": [project.demoUrl if project.demoUrl else "" for project in profile.projects]
|
358 |
-
}
|
359 |
-
st.subheader("Projects")
|
360 |
-
st.table(projects_data)
|
361 |
-
|
362 |
-
# Display skills as a comma separated list if available
|
363 |
-
if profile.skills:
|
364 |
-
st.subheader("Skills")
|
365 |
-
st.write(", ".join([skill.name for skill in profile.skills]))
|
366 |
|
367 |
except Exception as e:
|
|
|
368 |
st.error(f"Error saving profile: {str(e)}")
|
369 |
-
with st.expander("Technical
|
370 |
st.code(traceback.format_exc())
|
371 |
|
372 |
-
#
|
373 |
else:
|
|
|
|
|
|
|
374 |
if st.button("Extract Another Profile"):
|
|
|
375 |
for key in ['profile', 'extraction_complete', 'user_input_complete', 'profile_saved']:
|
376 |
st.session_state[key] = False
|
377 |
st.experimental_rerun()
|
|
|
|
|
|
|
378 |
|
379 |
if __name__ == "__main__":
|
380 |
main()
|
|
|
1 |
+
"""
|
2 |
+
Streamlit web application for resume profile extraction
|
3 |
+
"""
|
4 |
import streamlit as st
|
5 |
+
import os
|
|
|
6 |
import json
|
7 |
import traceback
|
8 |
import base64
|
9 |
+
import logging
|
10 |
+
from typing import Dict, Any
|
11 |
+
|
12 |
+
# Import from our refactored modules
|
13 |
+
from agents import profile_extractor as pe, grammar_corrector as gc
|
14 |
+
from utils import extract_text_from_pdf, save_temp_pdf
|
15 |
+
from services import storage_service
|
16 |
from models import Skill, Project, Education, SocialMedia
|
17 |
+
from config import get_settings
|
18 |
|
19 |
+
# Get settings
|
20 |
+
settings = get_settings()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
# Configure logging
|
23 |
+
logging.basicConfig(
|
24 |
+
level=logging.DEBUG if settings.DEBUG else logging.INFO,
|
25 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
26 |
+
)
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
profile_extractor=pe.ProfileExtractor()
|
29 |
+
grammar_corrector=gc.GrammarCorrector()
|
30 |
def collect_missing_data(profile):
|
31 |
"""
|
32 |
Collects missing data from user input when automatic extraction fails.
|
|
|
59 |
|
60 |
if profile.bio and profile.bio != "N/A":
|
61 |
if st.button("Improve Bio Grammar"):
|
62 |
+
profile.bio = grammar_corrector.correct_grammar(profile.bio)
|
63 |
st.success("Grammar corrected!")
|
64 |
|
65 |
# Optional information
|
|
|
102 |
num_new_edu = st.number_input("Number of additional education entries:", min_value=1, max_value=5, value=1)
|
103 |
offset = len(profile.educations) if profile.educations else 0
|
104 |
|
105 |
+
for i in range(int(num_new_edu)):
|
106 |
st.write(f"Additional Education #{i+1}")
|
107 |
school = st.text_input(f"School:", key=f"new_school_{offset+i}")
|
108 |
degree = st.text_input(f"Degree:", key=f"new_degree_{offset+i}")
|
|
|
154 |
num_new_proj = st.number_input("Number of additional projects:", min_value=1, max_value=5, value=1)
|
155 |
offset = len(profile.projects) if profile.projects else 0
|
156 |
|
157 |
+
for i in range(int(num_new_proj)):
|
158 |
st.write(f"Additional Project #{i+1}")
|
159 |
title = st.text_input(f"Title:", key=f"new_proj_title_{offset+i}")
|
160 |
description = st.text_area(f"Description:", key=f"new_proj_desc_{offset+i}")
|
|
|
163 |
demo_url = st.text_input(f"Demo URL (optional):", key=f"new_proj_demo_{offset+i}")
|
164 |
|
165 |
if title and description: # Only add if title and description are provided
|
166 |
+
correct_grammar_btn = st.button(f"Correct Grammar for Project #{i+1}")
|
167 |
+
if correct_grammar_btn:
|
168 |
+
description = grammar_corrector.correct_grammar(description)
|
169 |
st.success("Grammar corrected!")
|
170 |
|
171 |
project_data.append({
|
|
|
201 |
|
202 |
return profile
|
203 |
|
204 |
+
def display_profile(profile):
|
205 |
+
"""
|
206 |
+
Displays a profile in the Streamlit UI
|
207 |
+
|
208 |
+
Args:
|
209 |
+
profile: The Profile object to display
|
210 |
+
"""
|
211 |
+
st.header("Your Complete Profile")
|
212 |
+
|
213 |
+
# Display profile image if available
|
214 |
+
if profile.profileImg:
|
215 |
+
st.image(profile.profileImg, width=150)
|
216 |
+
|
217 |
+
# Display basic info in a table
|
218 |
+
basic_data = {
|
219 |
+
"Field": ["Name", "Title", "Email", "Bio", "Tagline"],
|
220 |
+
"Value": [
|
221 |
+
profile.name,
|
222 |
+
profile.title,
|
223 |
+
profile.email,
|
224 |
+
profile.bio,
|
225 |
+
profile.tagline if profile.tagline else ""
|
226 |
+
]
|
227 |
+
}
|
228 |
+
st.table(basic_data)
|
229 |
+
|
230 |
+
# Display social media if available
|
231 |
+
if profile.social:
|
232 |
+
social_data = {
|
233 |
+
"Platform": ["LinkedIn", "GitHub", "Instagram"],
|
234 |
+
"URL": [
|
235 |
+
profile.social.linkedin if profile.social.linkedin else "",
|
236 |
+
profile.social.github if profile.social.github else "",
|
237 |
+
profile.social.instagram if profile.social.instagram else ""
|
238 |
+
]
|
239 |
+
}
|
240 |
+
st.subheader("Social Media")
|
241 |
+
st.table(social_data)
|
242 |
+
|
243 |
+
# Display education in a table if available
|
244 |
+
if profile.educations:
|
245 |
+
education_data = {
|
246 |
+
"School": [edu.school for edu in profile.educations],
|
247 |
+
"Degree": [edu.degree for edu in profile.educations],
|
248 |
+
"Field of Study": [edu.fieldOfStudy for edu in profile.educations],
|
249 |
+
"Start Date": [edu.startDate for edu in profile.educations],
|
250 |
+
"End Date": [edu.endDate for edu in profile.educations]
|
251 |
+
}
|
252 |
+
st.subheader("Education")
|
253 |
+
st.table(education_data)
|
254 |
+
|
255 |
+
# Display projects in a table if available
|
256 |
+
if profile.projects:
|
257 |
+
projects_data = {
|
258 |
+
"Title": [project.title for project in profile.projects],
|
259 |
+
"Description": [project.description for project in profile.projects],
|
260 |
+
"Tech Stack": [project.techStack if project.techStack else "" for project in profile.projects],
|
261 |
+
"GitHub": [project.githubUrl if project.githubUrl else "" for project in profile.projects],
|
262 |
+
"Demo": [project.demoUrl if project.demoUrl else "" for project in profile.projects]
|
263 |
+
}
|
264 |
+
st.subheader("Projects")
|
265 |
+
st.table(projects_data)
|
266 |
+
|
267 |
+
# Display skills as a comma separated list if available
|
268 |
+
if profile.skills:
|
269 |
+
st.subheader("Skills")
|
270 |
+
st.write(", ".join([skill.name for skill in profile.skills]))
|
271 |
+
|
272 |
def main():
|
273 |
+
"""Main application function"""
|
274 |
+
st.set_page_config(page_title="Resume Profile Extractor", page_icon="📄", layout="wide")
|
275 |
+
st.title("Professional Profile Extractor")
|
276 |
+
st.write("Upload a resume PDF to extract professional profile information")
|
277 |
|
278 |
+
# Initialize session state variables
|
279 |
if 'profile' not in st.session_state:
|
280 |
st.session_state.profile = None
|
281 |
if 'extraction_complete' not in st.session_state:
|
|
|
285 |
if 'profile_saved' not in st.session_state:
|
286 |
st.session_state.profile_saved = False
|
287 |
|
288 |
+
# Step 1: Upload PDF and Extract Profile
|
289 |
if not st.session_state.extraction_complete:
|
290 |
+
uploaded_file = st.file_uploader("Upload a PDF resume", type="pdf")
|
291 |
|
292 |
if uploaded_file is not None:
|
293 |
+
try:
|
294 |
+
# Save the uploaded file to a temporary location
|
295 |
+
pdf_path = save_temp_pdf(uploaded_file.getvalue())
|
296 |
+
|
297 |
+
# Extract text from the PDF
|
298 |
+
pdf_text = extract_text_from_pdf(pdf_path)
|
299 |
+
|
300 |
+
if not pdf_text:
|
301 |
+
st.error("Could not extract text from the PDF. The file might be scanned or protected.")
|
302 |
+
else:
|
303 |
+
with st.spinner("Extracting profile information..."):
|
304 |
+
# Extract profile information using the profile extractor agent
|
305 |
+
profile = profile_extractor.extract_profile(pdf_text)
|
306 |
st.session_state.profile = profile
|
307 |
st.session_state.extraction_complete = True
|
308 |
st.experimental_rerun()
|
309 |
|
310 |
+
# Clean up temporary file
|
311 |
+
if os.path.exists(pdf_path):
|
312 |
+
os.remove(pdf_path)
|
313 |
+
|
314 |
+
except Exception as e:
|
315 |
+
logger.error(f"Error during profile extraction: {e}")
|
316 |
+
st.error(f"An error occurred during profile extraction: {str(e)}")
|
317 |
+
if "403" in str(e):
|
318 |
+
st.error("Authorization error (403 Forbidden). Please check your API key and permissions.")
|
319 |
+
with st.expander("Technical Details"):
|
320 |
+
st.code(traceback.format_exc())
|
|
|
321 |
|
322 |
+
# Step 2: Allow User to Edit/Complete the Profile
|
323 |
elif not st.session_state.user_input_complete:
|
324 |
+
st.info("We've extracted information from your resume. Please review and complete any missing details.")
|
|
|
325 |
|
326 |
+
# Call the function to collect and complete missing data
|
327 |
+
profile = collect_missing_data(st.session_state.profile)
|
328 |
+
|
329 |
+
# Add buttons for submitting or starting over
|
330 |
+
col1, col2 = st.columns(2)
|
331 |
+
with col1:
|
332 |
+
if st.button("Save Profile"):
|
333 |
+
st.session_state.profile = profile
|
334 |
+
st.session_state.user_input_complete = True
|
335 |
+
st.experimental_rerun()
|
336 |
+
with col2:
|
337 |
+
if st.button("Start Over"):
|
338 |
+
st.session_state.profile = None
|
339 |
+
st.session_state.extraction_complete = False
|
340 |
+
st.experimental_rerun()
|
341 |
|
342 |
+
# Step 3: Save Profile and Display Results
|
343 |
elif not st.session_state.profile_saved:
|
344 |
profile = st.session_state.profile
|
345 |
|
346 |
try:
|
347 |
+
# Store the profile using the storage service
|
348 |
+
inserted_id = storage_service.store_profile(
|
349 |
+
profile,
|
350 |
+
error_handler=st.error
|
351 |
+
)
|
|
|
|
|
352 |
|
353 |
+
st.success(f"Profile saved successfully with ID: {inserted_id}")
|
354 |
|
355 |
+
# Display the Portfolio URL
|
356 |
+
st.info(f"Access to your portfolio: [Portfolio URL](http://localhost:3000/{inserted_id})")
|
|
|
|
|
357 |
|
358 |
+
# Mark as saved in session state
|
359 |
+
st.session_state.profile_saved = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
|
361 |
+
# Display the complete profile
|
362 |
+
display_profile(profile)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
except Exception as e:
|
365 |
+
logger.error(f"Error saving profile: {e}")
|
366 |
st.error(f"Error saving profile: {str(e)}")
|
367 |
+
with st.expander("Technical Details"):
|
368 |
st.code(traceback.format_exc())
|
369 |
|
370 |
+
# Final state - allow extracting another profile
|
371 |
else:
|
372 |
+
st.success("Profile extraction complete!")
|
373 |
+
|
374 |
+
# Show options to extract another profile or view the current one
|
375 |
if st.button("Extract Another Profile"):
|
376 |
+
# Reset session state
|
377 |
for key in ['profile', 'extraction_complete', 'user_input_complete', 'profile_saved']:
|
378 |
st.session_state[key] = False
|
379 |
st.experimental_rerun()
|
380 |
+
else:
|
381 |
+
# Show the profile again
|
382 |
+
display_profile(st.session_state.profile)
|
383 |
|
384 |
if __name__ == "__main__":
|
385 |
main()
|
config.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings
|
2 |
+
from typing import Optional, Dict, Any
|
3 |
+
import os
|
4 |
+
from functools import lru_cache
|
5 |
+
|
6 |
+
class Settings(BaseSettings):
|
7 |
+
"""
|
8 |
+
Application settings loaded from environment variables or .env file
|
9 |
+
"""
|
10 |
+
# API keys
|
11 |
+
GROQ_API_KEY: str
|
12 |
+
|
13 |
+
# LLM settings
|
14 |
+
MODEL_NAME: str = "qwen-2.5-32b"
|
15 |
+
MAX_TOKENS: int = 2048
|
16 |
+
TEMPERATURE: float = 0.5
|
17 |
+
FALLBACK_TEMPERATURE: float = 0.7
|
18 |
+
GRAMMAR_CORRECTION_TEMPERATURE: float = 0.3
|
19 |
+
|
20 |
+
# MongoDB settings
|
21 |
+
MONGODB_URI: str = "mongodb://localhost:27017/"
|
22 |
+
MONGODB_DB: str = "profileDB"
|
23 |
+
MONGODB_COLLECTION: str = "profiles"
|
24 |
+
MONGODB_TIMEOUT_MS: int = 5000
|
25 |
+
|
26 |
+
# Application settings
|
27 |
+
CACHE_SIZE: int = 100
|
28 |
+
CHUNK_SIZE: int = 1000
|
29 |
+
DEBUG: bool = False
|
30 |
+
|
31 |
+
# File settings
|
32 |
+
TEMP_FILE_DIR: str = "./"
|
33 |
+
|
34 |
+
class Config:
|
35 |
+
env_file = ".env"
|
36 |
+
case_sensitive = True
|
37 |
+
|
38 |
+
@lru_cache()
|
39 |
+
def get_settings() -> Settings:
|
40 |
+
"""
|
41 |
+
Get cached settings instance
|
42 |
+
"""
|
43 |
+
return Settings()
|
models.py
CHANGED
@@ -41,4 +41,5 @@ class Profile(BaseModel):
|
|
41 |
chatbot: Optional[Chatbot] = None
|
42 |
projects: List[Project] = []
|
43 |
skills: List[Skill] = []
|
|
|
44 |
educations: List[Education] = []
|
|
|
41 |
chatbot: Optional[Chatbot] = None
|
42 |
projects: List[Project] = []
|
43 |
skills: List[Skill] = []
|
44 |
+
topSkills: List[str] = []
|
45 |
educations: List[Education] = []
|
services/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Services package for backend operations
|
3 |
+
"""
|
4 |
+
from .storage_service import StorageService, storage_service
|
5 |
+
|
6 |
+
__all__ = ['StorageService', 'storage_service']
|
services/storage_service.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Service for storing and retrieving profile data
|
3 |
+
"""
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from models import Profile
|
6 |
+
from config import get_settings
|
7 |
+
import json
|
8 |
+
import logging
|
9 |
+
from typing import Dict, Any, Optional
|
10 |
+
|
11 |
+
settings = get_settings()
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
class StorageService:
|
15 |
+
"""Service for storing and retrieving profile data"""
|
16 |
+
|
17 |
+
def __init__(self):
|
18 |
+
self.mongo_uri = settings.MONGODB_URI
|
19 |
+
self.db_name = settings.MONGODB_DB
|
20 |
+
self.collection_name = settings.MONGODB_COLLECTION
|
21 |
+
self.timeout_ms = settings.MONGODB_TIMEOUT_MS
|
22 |
+
|
23 |
+
def profile_to_dict(self, profile: Profile) -> Dict[str, Any]:
|
24 |
+
"""Convert Profile object to dictionary for MongoDB storage"""
|
25 |
+
return {
|
26 |
+
"name": profile.name,
|
27 |
+
"title": profile.title,
|
28 |
+
"email": profile.email,
|
29 |
+
"bio": profile.bio,
|
30 |
+
"tagline": profile.tagline if profile.tagline else "",
|
31 |
+
"social": {
|
32 |
+
"linkedin": profile.social.linkedin if profile.social and profile.social.linkedin else "",
|
33 |
+
"github": profile.social.github if profile.social and profile.social.github else "",
|
34 |
+
"instagram": profile.social.instagram if profile.social and profile.social.instagram else ""
|
35 |
+
},
|
36 |
+
"profileImg": profile.profileImg if profile.profileImg else "",
|
37 |
+
"projects": [
|
38 |
+
{
|
39 |
+
"title": project.title,
|
40 |
+
"description": project.description,
|
41 |
+
"techStack": project.techStack if project.techStack else "",
|
42 |
+
"githubUrl": project.githubUrl if project.githubUrl else "",
|
43 |
+
"demoUrl": project.demoUrl if project.demoUrl else ""
|
44 |
+
} for project in profile.projects
|
45 |
+
] if profile.projects else [],
|
46 |
+
"skills": [skill.name for skill in profile.skills] if profile.skills else [],
|
47 |
+
"educations": [
|
48 |
+
{
|
49 |
+
"school": edu.school,
|
50 |
+
"degree": edu.degree,
|
51 |
+
"fieldOfStudy": edu.fieldOfStudy,
|
52 |
+
"startDate": edu.startDate,
|
53 |
+
"endDate": edu.endDate
|
54 |
+
} for edu in profile.educations
|
55 |
+
] if profile.educations else []
|
56 |
+
}
|
57 |
+
|
58 |
+
def store_profile(self, profile: Profile, error_handler=None) -> str:
|
59 |
+
"""
|
60 |
+
Store profile data in MongoDB or fallback to JSON file
|
61 |
+
|
62 |
+
Args:
|
63 |
+
profile: The Profile object to store
|
64 |
+
error_handler: Optional function to handle errors (useful for framework-specific error handling)
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
String ID of the stored profile or path to JSON file
|
68 |
+
"""
|
69 |
+
profile_dict = self.profile_to_dict(profile)
|
70 |
+
|
71 |
+
try:
|
72 |
+
client = MongoClient(
|
73 |
+
self.mongo_uri,
|
74 |
+
serverSelectionTimeoutMS=self.timeout_ms
|
75 |
+
)
|
76 |
+
# Test the connection
|
77 |
+
client.server_info()
|
78 |
+
|
79 |
+
db = client[self.db_name]
|
80 |
+
collection = db[self.collection_name]
|
81 |
+
result = collection.insert_one(profile_dict)
|
82 |
+
return str(result.inserted_id)
|
83 |
+
|
84 |
+
except Exception as e:
|
85 |
+
logger.error(f"MongoDB connection error: {e}")
|
86 |
+
if error_handler:
|
87 |
+
error_handler(f"Error connecting to MongoDB: {str(e)}")
|
88 |
+
|
89 |
+
# Fallback: save to JSON file
|
90 |
+
file_path = f"profile_{profile_dict['name'].replace(' ', '_')}.json"
|
91 |
+
with open(file_path, 'w') as f:
|
92 |
+
json.dump(profile_dict, f, indent=2)
|
93 |
+
return f"Saved to file {file_path}"
|
94 |
+
|
95 |
+
|
96 |
+
# Create a global instance
|
97 |
+
storage_service = StorageService()
|
temp.pdf
CHANGED
Binary files a/temp.pdf and b/temp.pdf differ
|
|
utils/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Utilities package for helper functions
|
3 |
+
"""
|
4 |
+
from .pdf_utils import extract_text_from_pdf, save_temp_pdf
|
5 |
+
|
6 |
+
__all__ = ['extract_text_from_pdf', 'save_temp_pdf']
|
utils/pdf_utils.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Utilities for working with PDF files
|
3 |
+
"""
|
4 |
+
import PyPDF2
|
5 |
+
import io
|
6 |
+
import os
|
7 |
+
from config import get_settings
|
8 |
+
import logging
|
9 |
+
|
10 |
+
settings = get_settings()
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
def extract_text_from_pdf(pdf_path):
|
14 |
+
"""
|
15 |
+
Extracts text from a PDF file.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
pdf_path (str): The path to the PDF file.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
str: The extracted text. Returns an empty string if extraction fails.
|
22 |
+
"""
|
23 |
+
text = ""
|
24 |
+
try:
|
25 |
+
with open(pdf_path, 'rb') as file:
|
26 |
+
reader = PyPDF2.PdfReader(file)
|
27 |
+
for page_num in range(len(reader.pages)):
|
28 |
+
page = reader.pages[page_num]
|
29 |
+
text += page.extract_text()
|
30 |
+
|
31 |
+
if not text.strip():
|
32 |
+
logger.warning(f"Extracted empty text from PDF: {pdf_path}")
|
33 |
+
logger.info(f"Extracted text are {text}")
|
34 |
+
return text
|
35 |
+
|
36 |
+
except Exception as e:
|
37 |
+
logger.error(f"Error extracting text from PDF: {e}")
|
38 |
+
return "" # Return empty string on failure
|
39 |
+
|
40 |
+
def save_temp_pdf(file_data, filename="temp.pdf"):
|
41 |
+
"""
|
42 |
+
Save uploaded file data to a temporary PDF file
|
43 |
+
|
44 |
+
Args:
|
45 |
+
file_data: The binary data of the file
|
46 |
+
filename: The name to save the file as
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
Path to the saved file
|
50 |
+
"""
|
51 |
+
filepath = os.path.join(settings.TEMP_FILE_DIR, filename)
|
52 |
+
try:
|
53 |
+
with open(filepath, 'wb') as f:
|
54 |
+
f.write(file_data)
|
55 |
+
return filepath
|
56 |
+
except Exception as e:
|
57 |
+
logger.error(f"Error saving temporary PDF: {e}")
|
58 |
+
raise
|