Soufianesejjari commited on
Commit
71fdb6d
·
1 Parent(s): ca85905

Add utility and service packages, enhance profile model, and implement PDF handling

Browse files
__pycache__/agentProfile.cpython-311.pyc CHANGED
Binary files a/__pycache__/agentProfile.cpython-311.pyc and b/__pycache__/agentProfile.cpython-311.pyc differ
 
__pycache__/api.cpython-311.pyc CHANGED
Binary files a/__pycache__/api.cpython-311.pyc and b/__pycache__/api.cpython-311.pyc differ
 
__pycache__/models.cpython-311.pyc CHANGED
Binary files a/__pycache__/models.cpython-311.pyc and b/__pycache__/models.cpython-311.pyc differ
 
agentProfile.py CHANGED
@@ -1,199 +1,198 @@
1
  import groq
2
  from pdfextractor import extract_text_from_pdf
3
  from models import Profile, SocialMedia, Project, Skill, Education
4
- from typing import List, Dict, Any
5
  from langchain.output_parsers import PydanticOutputParser
6
  from langchain.prompts import PromptTemplate
7
  from langchain_groq import ChatGroq
8
  import json
9
- import os
10
 
11
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
12
 
13
- def extract_profile_information(pdf_text: str) -> Profile:
14
- """
15
- Extracts profile information from the PDF text using Groq's LLM with LangChain.
16
-
17
- Args:
18
- pdf_text (str): The text extracted from the PDF.
19
-
20
- Returns:
21
- Profile: A Profile object populated with the extracted information.
22
  """
23
- # Initialize the Groq client through LangChain
24
- llm = ChatGroq(
25
- groq_api_key=GROQ_API_KEY,
26
- model_name="qwen-2.5-32b",
27
- temperature=0.5,
28
- max_tokens=2048
29
- )
30
-
31
- # Create a parser for Profile data structure
32
- parser = PydanticOutputParser(pydantic_object=Profile)
33
-
34
- # Define the format instructions for the LLM
35
- format_instructions = """
36
- Extract the following information from the resume:
37
- 1. Full name
38
- 2. Professional title
39
- 3. Email address
40
- 4. Bio (a 50-100 word professional summary)
41
- 5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
42
- 6. Social media links (LinkedIn, GitHub, Instagram)
43
- 7. Projects (with title, description, and tech stack)
44
- 8. Skills
45
- 9. Education history (with school, degree, field of study, start date and end date)
46
-
47
- Return the information in the following JSON format:
48
- {
49
- "name": "Full Name",
50
- "title": "Professional Title",
51
- "email": "[email protected]",
52
- "bio": "Professional biography...",
53
- "tagline": "Catchy professional tagline",
54
- "social": {
55
- "linkedin": "LinkedIn URL or null",
56
- "github": "GitHub URL or null",
57
- "instagram": "Instagram URL or null"
58
- },
59
- "projects": [
60
- {
61
- "title": "Project Title",
62
- "description": "Project Description",
63
- "techStack": "Technologies used"
64
- }
65
- ],
66
- "skills": [
67
- {"name": "Skill 1"},
68
- {"name": "Skill 2"}
69
- ],
70
- "educations": [
71
- {
72
- "school": "University Name",
73
- "degree": "Degree Type (e.g., Bachelor's, Master's)",
74
- "fieldOfStudy": "Major or Field",
75
- "startDate": "Start Year",
76
- "endDate": "End Year or Present"
77
- }
78
- ]
79
- }
80
-
81
- If any information is not available, use null for that field.
82
  """
 
 
 
 
 
 
83
 
84
- # Create the prompt template
85
- template = """
86
- You are a professional resume parser. Extract structured information from the following resume:
87
-
88
- {pdf_text}
89
-
90
- {format_instructions}
91
- """
92
 
93
- prompt = PromptTemplate(
94
- template=template,
95
- input_variables=["pdf_text"],
96
- partial_variables={"format_instructions": format_instructions}
97
- )
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- # Get the structured information from the LLM
100
- try:
101
- chain = prompt | llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  result = chain.invoke({"pdf_text": pdf_text})
103
  response_text = result.content
104
 
105
- # Try to parse the JSON response
106
- try:
107
- # Extract JSON from the response text (in case the LLM adds extra text)
108
- json_start = response_text.find('{')
109
- json_end = response_text.rfind('}') + 1
110
- if json_start >= 0 and json_end > json_start:
111
- json_str = response_text[json_start:json_end]
112
- profile_dict = json.loads(json_str)
113
-
114
- # Create a Profile object from the dictionary
115
- profile = Profile.model_validate(profile_dict)
116
-
117
- # Check for missing information and try to extract it if necessary
118
- profile = fill_missing_information(profile, pdf_text, llm)
119
-
120
- return profile
121
 
122
- else:
123
- raise ValueError("No JSON found in the response")
124
-
125
- except Exception as json_error:
126
- print(f"Error parsing JSON response: {json_error}")
127
- print(f"Raw response: {response_text}")
128
- # Fallback to the old method
129
- return extract_profile_information_fallback(pdf_text)
130
 
131
- except Exception as e:
132
- print(f"Error during LangChain extraction: {e}")
133
- # Fallback to the old method
134
- return extract_profile_information_fallback(pdf_text)
135
-
136
- def fill_missing_information(profile: Profile, pdf_text: str, llm) -> Profile:
137
- """
138
- Attempts to fill in any missing information in the profile by asking
139
- specific questions to the LLM.
140
 
141
- Args:
142
- profile (Profile): The partially filled profile.
143
- pdf_text (str): The text extracted from the PDF.
144
- llm: The LLM instance.
 
 
 
 
 
 
 
 
 
 
145
 
146
- Returns:
147
- Profile: The updated profile object.
148
- """
149
- # Check and fill name if missing
150
- if not profile.name or profile.name == "N/A":
151
- try:
152
- response = llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:1000])
153
- name = response.content.strip()
154
- if name and name != "N/A":
155
- profile.name = name
156
- except Exception as e:
157
- print(f"Error extracting name: {e}")
158
-
159
- # Check and fill title if missing
160
- if not profile.title or profile.title == "N/A":
161
- try:
162
- response = llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:1000])
163
- title = response.content.strip()
164
- if title and title != "N/A":
165
- profile.title = title
166
- except Exception as e:
167
- print(f"Error extracting title: {e}")
168
-
169
- # Check and fill email if missing
170
- if not profile.email or profile.email == "N/A":
171
- try:
172
- response = llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
173
- email = response.content.strip()
174
- if email and email != "N/A" and "@" in email:
175
- profile.email = email
176
- except Exception as e:
177
- print(f"Error extracting email: {e}")
178
-
179
- # Similar checks for other fields
180
- if not profile.bio or profile.bio == "N/A":
181
- try:
182
- response = llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
183
- bio = response.content.strip()
184
- if bio and bio != "N/A":
185
- profile.bio = bio
186
- except Exception as e:
187
- print(f"Error creating bio: {e}")
188
-
189
- # Check for education
190
- if not profile.educations:
191
- try:
192
- education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
193
- response = llm.invoke(education_prompt + "\n\n" + pdf_text)
194
- education_text = response.content.strip()
195
-
196
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  # Try to extract JSON from the response
198
  json_start = education_text.find('[')
199
  json_end = education_text.rfind(']') + 1
@@ -211,168 +210,174 @@ def fill_missing_information(profile: Profile, pdf_text: str, llm) -> Profile:
211
  endDate=edu.get("endDate", "")
212
  )
213
  profile.educations.append(education)
214
- except Exception as edu_error:
215
- print(f"Error parsing education JSON: {edu_error}")
216
- except Exception as e:
217
- print(f"Error extracting education: {e}")
 
218
 
219
- return profile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- def extract_profile_information_fallback(pdf_text: str) -> Profile:
222
- """
223
- Fallback method to extract profile information using the original approach.
224
- This is used if the LangChain extraction fails.
225
- """
226
- client = groq.Groq(api_key=GROQ_API_KEY)
227
 
228
- def get_llm_response(prompt: str) -> str:
229
- """Helper function to get a response from the LLM."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  try:
231
  chat_completion = client.chat.completions.create(
232
  messages=[
233
  {
234
  "role": "user",
235
- "content": prompt
236
  }
237
  ],
238
- model="qwen-2.5-32b",
239
- temperature=0.7,
240
- max_tokens=1024
241
  )
242
  return chat_completion.choices[0].message.content
243
  except Exception as e:
244
- print(f"Error during LLM call: {e}")
245
- return "" # Return empty string on failure
246
-
247
- # Prompts for each field
248
- name_prompt = f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}"
249
- title_prompt = f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}"
250
- email_prompt = f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}"
251
- bio_prompt = f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}"
252
- tagline_prompt = f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}"
253
-
254
- linkedin_prompt = f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}"
255
- github_prompt = f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}"
256
- instagram_prompt = f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}"
257
-
258
- # Extract information using the LLM
259
- name = get_llm_response(name_prompt).strip()
260
- title = get_llm_response(title_prompt).strip()
261
- email = get_llm_response(email_prompt).strip()
262
- bio = get_llm_response(bio_prompt).strip()
263
- tagline = get_llm_response(tagline_prompt).strip()
264
-
265
- linkedin = get_llm_response(linkedin_prompt).strip()
266
- github = get_llm_response(github_prompt).strip()
267
- instagram = get_llm_response(instagram_prompt).strip()
268
 
269
- project_prompt = f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}"
270
- skill_prompt = f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}"
271
 
272
- project_info = get_llm_response(project_prompt).strip()
273
- skills_info = get_llm_response(skill_prompt).strip()
 
274
 
275
- # Education prompt
276
- education_prompt = f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}"
277
- education_info = get_llm_response(education_prompt).strip()
278
-
279
- # Create SocialMedia object
280
- social_media = SocialMedia(linkedin=linkedin if linkedin != 'N/A' else None,
281
- github=github if github != 'N/A' else None,
282
- instagram=instagram if instagram != 'N/A' else None)
283
-
284
- # Process Projects
285
- projects: List[Project] = []
286
- if project_info != "N/A":
287
- project_lines = project_info.split("\n")
288
- for line in project_lines:
289
- if ":" in line:
290
- try:
291
- project_title, project_description_techstack = line.split(":", 1)
292
- project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
293
-
294
- # Create a Project instance
295
- project = Project(title=project_title.strip(), description=project_description.strip(), techStack=tech_stack.strip())
296
- projects.append(project)
297
- except ValueError as e:
298
- print(f"Error parsing project: {line}. Error: {e}")
299
-
300
- # Process Skills
301
- skills: List[Skill] = []
302
- if skills_info != "N/A":
303
- skill_list = [skill.strip() for skill in skills_info.split(",")]
304
- for skill_name in skill_list:
305
- skills.append(Skill(name=skill_name))
306
-
307
- # Process Education
308
- educations: List[Education] = []
309
- if education_info != "N/A":
310
- education_lines = education_info.split("\n")
311
- for line in education_lines:
312
- if ":" in line:
313
- try:
314
- parts = line.split(":")
315
- if len(parts) >= 5:
316
- school = parts[0].strip()
317
- degree = parts[1].strip()
318
- field = parts[2].strip()
319
- start_date = parts[3].strip()
320
- end_date = parts[4].strip()
321
-
322
- education = Education(
323
- school=school,
324
- degree=degree,
325
- fieldOfStudy=field,
326
- startDate=start_date,
327
- endDate=end_date
328
- )
329
- educations.append(education)
330
- except Exception as e:
331
- print(f"Error parsing education: {line}. Error: {e}")
332
-
333
- # Create and return the Profile object
334
- profile = Profile(
335
- name=name if name != 'N/A' else "N/A",
336
- title=title if title != 'N/A' else "N/A",
337
- email=email if email != 'N/A' else "N/A",
338
- bio=bio if bio != 'N/A' else "N/A",
339
- tagline=tagline if tagline != 'N/A' else None,
340
- social = social_media if (social_media.github != None or social_media.instagram != None or social_media.linkedin != None ) else None,
341
- chatbot = None,
342
- profileImg = None,
343
- heroImg = None,
344
- projects = projects,
345
- skills = skills,
346
- educations = educations
347
- )
348
-
349
- return profile
350
 
351
  def correct_grammar(text: str) -> str:
352
- """
353
- Corrects grammar in user input using Groq's LLM.
354
-
355
- Args:
356
- text (str): The text to correct.
357
-
358
- Returns:
359
- str: The corrected text.
360
- """
361
- client = groq.Groq(api_key=GROQ_API_KEY)
362
-
363
- try:
364
- chat_completion = client.chat.completions.create(
365
- messages=[
366
- {
367
- "role": "user",
368
- "content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
369
- }
370
- ],
371
- model="qwen-2.5-32b",
372
- temperature=0.3,
373
- max_tokens=1024
374
- )
375
- return chat_completion.choices[0].message.content
376
- except Exception as e:
377
- print(f"Error during grammar correction: {e}")
378
- return text # Return original text if correction fails
 
1
  import groq
2
  from pdfextractor import extract_text_from_pdf
3
  from models import Profile, SocialMedia, Project, Skill, Education
4
+ from typing import List, Dict, Any, Optional
5
  from langchain.output_parsers import PydanticOutputParser
6
  from langchain.prompts import PromptTemplate
7
  from langchain_groq import ChatGroq
8
  import json
9
+ from config import get_settings
10
 
11
+ settings = get_settings()
12
 
13
+ class ProfileExtractor:
 
 
 
 
 
 
 
 
14
  """
15
+ Class for extracting profile information from resume text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
+ def __init__(self):
18
+ self.groq_api_key = settings.GROQ_API_KEY
19
+ self.model_name = settings.MODEL_NAME
20
+ self.temperature = settings.TEMPERATURE
21
+ self.max_tokens = settings.MAX_TOKENS
22
+ self.llm = self._initialize_llm()
23
 
24
+ def _initialize_llm(self) -> ChatGroq:
25
+ """Initialize the language model client"""
26
+ return ChatGroq(
27
+ groq_api_key=self.groq_api_key,
28
+ model_name=self.model_name,
29
+ temperature=self.temperature,
30
+ max_tokens=self.max_tokens
31
+ )
32
 
33
+ def extract_profile(self, pdf_text: str) -> Profile:
34
+ """
35
+ Main method to extract profile information from PDF text
36
+
37
+ Args:
38
+ pdf_text: Text extracted from a resume PDF
39
+
40
+ Returns:
41
+ Profile object with extracted information
42
+ """
43
+ try:
44
+ profile = self._extract_with_langchain(pdf_text)
45
+ return profile
46
+ except Exception as e:
47
+ if settings.DEBUG:
48
+ print(f"LangChain extraction failed: {e}")
49
+ return self._extract_with_fallback(pdf_text)
50
 
51
+ def _extract_with_langchain(self, pdf_text: str) -> Profile:
52
+ """Extract profile with structured LangChain approach"""
53
+ # Define the format instructions for the LLM
54
+ format_instructions = """
55
+ Extract the following information from the resume:
56
+ 1. Full name
57
+ 2. Professional title
58
+ 3. Email address
59
+ 4. Bio (a 50-100 word professional summary)
60
+ 5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
61
+ 6. Social media links (LinkedIn, GitHub, Instagram)
62
+ 7. Projects (with title, description, and tech stack)
63
+ 8. Skills
64
+ 9. Education history (with school, degree, field of study, start date and end date)
65
+
66
+ Return the information in the following JSON format:
67
+ {
68
+ "name": "Full Name",
69
+ "title": "Professional Title",
70
+ "email": "[email protected]",
71
+ "bio": "Professional biography...",
72
+ "tagline": "Catchy professional tagline",
73
+ "social": {
74
+ "linkedin": "LinkedIn URL or null",
75
+ "github": "GitHub URL or null",
76
+ "instagram": "Instagram URL or null"
77
+ },
78
+ "projects": [
79
+ {
80
+ "title": "Project Title",
81
+ "description": "Project Description",
82
+ "techStack": "Technologies used"
83
+ }
84
+ ],
85
+ "skills": [
86
+ {"name": "Skill 1"},
87
+ {"name": "Skill 2"}
88
+ ],
89
+ "educations": [
90
+ {
91
+ "school": "University Name",
92
+ "degree": "Degree Type (e.g., Bachelor's, Master's)",
93
+ "fieldOfStudy": "Major or Field",
94
+ "startDate": "Start Year",
95
+ "endDate": "End Year or Present"
96
+ }
97
+ ]
98
+ }
99
+
100
+ If any information is not available, use null for that field.
101
+ """
102
+
103
+ # Create the prompt template
104
+ template = """
105
+ You are a professional resume parser. Extract structured information from the following resume:
106
+
107
+ {pdf_text}
108
+
109
+ {format_instructions}
110
+ """
111
+
112
+ prompt = PromptTemplate(
113
+ template=template,
114
+ input_variables=["pdf_text"],
115
+ partial_variables={"format_instructions": format_instructions}
116
+ )
117
+
118
+ # Get the structured information from the LLM
119
+ chain = prompt | self.llm
120
  result = chain.invoke({"pdf_text": pdf_text})
121
  response_text = result.content
122
 
123
+ # Extract JSON from the response text (in case the LLM adds extra text)
124
+ json_start = response_text.find('{')
125
+ json_end = response_text.rfind('}') + 1
126
+
127
+ if json_start >= 0 and json_end > json_start:
128
+ json_str = response_text[json_start:json_end]
129
+ profile_dict = json.loads(json_str)
 
 
 
 
 
 
 
 
 
130
 
131
+ # Create a Profile object from the dictionary
132
+ profile = Profile.model_validate(profile_dict)
 
 
 
 
 
 
133
 
134
+ # Check for missing information and try to extract it if necessary
135
+ profile = self._fill_missing_information(profile, pdf_text)
136
+
137
+ return profile
138
+ else:
139
+ raise ValueError("No JSON found in the response")
 
 
 
140
 
141
+ def _fill_missing_information(self, profile: Profile, pdf_text: str) -> Profile:
142
+ """
143
+ Attempts to fill in any missing information in the profile
144
+ """
145
+ # Check and fill name if missing
146
+ if not profile.name or profile.name == "N/A":
147
+ try:
148
+ response = self.llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:settings.CHUNK_SIZE])
149
+ name = response.content.strip()
150
+ if name and name != "N/A":
151
+ profile.name = name
152
+ except Exception as e:
153
+ if settings.DEBUG:
154
+ print(f"Error extracting name: {e}")
155
 
156
+ # Check and fill title if missing
157
+ if not profile.title or profile.title == "N/A":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  try:
159
+ response = self.llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:settings.CHUNK_SIZE])
160
+ title = response.content.strip()
161
+ if title and title != "N/A":
162
+ profile.title = title
163
+ except Exception as e:
164
+ if settings.DEBUG:
165
+ print(f"Error extracting title: {e}")
166
+
167
+ # Check and fill email if missing
168
+ if not profile.email or profile.email == "N/A":
169
+ try:
170
+ response = self.llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
171
+ email = response.content.strip()
172
+ if email and email != "N/A" and "@" in email:
173
+ profile.email = email
174
+ except Exception as e:
175
+ if settings.DEBUG:
176
+ print(f"Error extracting email: {e}")
177
+
178
+ # Check and fill bio if missing
179
+ if not profile.bio or profile.bio == "N/A":
180
+ try:
181
+ response = self.llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
182
+ bio = response.content.strip()
183
+ if bio and bio != "N/A":
184
+ profile.bio = bio
185
+ except Exception as e:
186
+ if settings.DEBUG:
187
+ print(f"Error creating bio: {e}")
188
+
189
+ # Check for education if missing
190
+ if not profile.educations:
191
+ try:
192
+ education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
193
+ response = self.llm.invoke(education_prompt + "\n\n" + pdf_text)
194
+ education_text = response.content.strip()
195
+
196
  # Try to extract JSON from the response
197
  json_start = education_text.find('[')
198
  json_end = education_text.rfind(']') + 1
 
210
  endDate=edu.get("endDate", "")
211
  )
212
  profile.educations.append(education)
213
+ except Exception as e:
214
+ if settings.DEBUG:
215
+ print(f"Error extracting education: {e}")
216
+
217
+ return profile
218
 
219
+ def _extract_with_fallback(self, pdf_text: str) -> Profile:
220
+ """Fallback method for profile extraction using direct API calls"""
221
+ client = groq.Groq(api_key=self.groq_api_key)
222
+
223
+ def get_llm_response(prompt: str) -> str:
224
+ """Helper function to get a response from the LLM."""
225
+ try:
226
+ chat_completion = client.chat.completions.create(
227
+ messages=[{"role": "user", "content": prompt}],
228
+ model=self.model_name,
229
+ temperature=settings.FALLBACK_TEMPERATURE,
230
+ max_tokens=settings.MAX_TOKENS
231
+ )
232
+ return chat_completion.choices[0].message.content
233
+ except Exception as e:
234
+ if settings.DEBUG:
235
+ print(f"Error during LLM call: {e}")
236
+ return "" # Return empty string on failure
237
+
238
+ # Extract basic information
239
+ name = get_llm_response(f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}").strip()
240
+ title = get_llm_response(f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}").strip()
241
+ email = get_llm_response(f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}").strip()
242
+ bio = get_llm_response(f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}").strip()
243
+ tagline = get_llm_response(f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}").strip()
244
+
245
+ # Extract social media
246
+ linkedin = get_llm_response(f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}").strip()
247
+ github = get_llm_response(f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}").strip()
248
+ instagram = get_llm_response(f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}").strip()
249
+
250
+ # Extract projects and skills
251
+ project_info = get_llm_response(f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}").strip()
252
+ skills_info = get_llm_response(f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}").strip()
253
+
254
+ # Extract education
255
+ education_info = get_llm_response(f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}").strip()
256
+
257
+ # Process the extracted information
258
+ social_media = SocialMedia(
259
+ linkedin=linkedin if linkedin != 'N/A' else None,
260
+ github=github if github != 'N/A' else None,
261
+ instagram=instagram if instagram != 'N/A' else None
262
+ )
263
+
264
+ # Process projects
265
+ projects = []
266
+ if project_info != "N/A":
267
+ project_lines = project_info.split("\n")
268
+ for line in project_lines:
269
+ if ":" in line:
270
+ try:
271
+ project_title, project_description_techstack = line.split(":", 1)
272
+ project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
273
+
274
+ projects.append(Project(
275
+ title=project_title.strip(),
276
+ description=project_description.strip(),
277
+ techStack=tech_stack.strip()
278
+ ))
279
+ except ValueError as e:
280
+ if settings.DEBUG:
281
+ print(f"Error parsing project: {line}. Error: {e}")
282
+
283
+ # Process skills
284
+ skills = []
285
+ if skills_info != "N/A":
286
+ skill_list = [skill.strip() for skill in skills_info.split(",")]
287
+ for skill_name in skill_list:
288
+ if skill_name:
289
+ skills.append(Skill(name=skill_name))
290
+
291
+ # Process education
292
+ educations = []
293
+ if education_info != "N/A":
294
+ education_lines = education_info.split("\n")
295
+ for line in education_lines:
296
+ if ":" in line:
297
+ try:
298
+ parts = line.split(":")
299
+ if len(parts) >= 5:
300
+ educations.append(Education(
301
+ school=parts[0].strip(),
302
+ degree=parts[1].strip(),
303
+ fieldOfStudy=parts[2].strip(),
304
+ startDate=parts[3].strip(),
305
+ endDate=parts[4].strip()
306
+ ))
307
+ except Exception as e:
308
+ if settings.DEBUG:
309
+ print(f"Error parsing education: {line}. Error: {e}")
310
+
311
+ # Create the profile object
312
+ profile = Profile(
313
+ name=name if name != 'N/A' else "N/A",
314
+ title=title if title != 'N/A' else "N/A",
315
+ email=email if email != 'N/A' else "N/A",
316
+ bio=bio if bio != 'N/A' else "N/A",
317
+ tagline=tagline if tagline != 'N/A' else None,
318
+ social=social_media if (social_media.github or social_media.instagram or social_media.linkedin) else None,
319
+ chatbot=None,
320
+ profileImg=None,
321
+ heroImg=None,
322
+ projects=projects,
323
+ skills=skills,
324
+ educations=educations
325
+ )
326
+
327
+ return profile
328
 
 
 
 
 
 
 
329
 
330
+ class GrammarCorrector:
331
+ """Class for correcting grammar in text using LLM"""
332
+
333
+ def __init__(self):
334
+ self.groq_api_key = settings.GROQ_API_KEY
335
+ self.model_name = settings.MODEL_NAME
336
+ self.temperature = settings.GRAMMAR_CORRECTION_TEMPERATURE
337
+
338
+ def correct_grammar(self, text: str) -> str:
339
+ """
340
+ Corrects grammar in user input using Groq's LLM.
341
+
342
+ Args:
343
+ text: The text to correct
344
+
345
+ Returns:
346
+ The corrected text
347
+ """
348
+ if not text:
349
+ return text
350
+
351
+ client = groq.Groq(api_key=self.groq_api_key)
352
+
353
  try:
354
  chat_completion = client.chat.completions.create(
355
  messages=[
356
  {
357
  "role": "user",
358
+ "content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
359
  }
360
  ],
361
+ model=self.model_name,
362
+ temperature=self.temperature,
363
+ max_tokens=settings.MAX_TOKENS
364
  )
365
  return chat_completion.choices[0].message.content
366
  except Exception as e:
367
+ if settings.DEBUG:
368
+ print(f"Error during grammar correction: {e}")
369
+ return text # Return original text if correction fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
 
 
371
 
372
+ # Create module-level instances for easier imports
373
+ profile_extractor = ProfileExtractor()
374
+ grammar_corrector = GrammarCorrector()
375
 
376
+ # Export functions for backward compatibility
377
+ def extract_profile_information(pdf_text: str) -> Profile:
378
+ """Legacy function for backward compatibility"""
379
+ return profile_extractor.extract_profile(pdf_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
  def correct_grammar(text: str) -> str:
382
+ """Legacy function for backward compatibility"""
383
+ return grammar_corrector.correct_grammar(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agents/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agents package for specialized AI tasks
3
+ """
4
+ from .profile_extractor import ProfileExtractor, extract_profile_information
5
+ from .grammar_corrector import GrammarCorrector, correct_grammar
6
+
7
+ __all__ = [
8
+ 'ProfileExtractor',
9
+ 'GrammarCorrector',
10
+ 'extract_profile_information',
11
+ 'correct_grammar'
12
+ ]
agents/grammar_corrector.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent for correcting grammar in text
3
+ """
4
+ import groq
5
+ from config import get_settings
6
+
7
+ settings = get_settings()
8
+
9
+ class GrammarCorrector:
10
+ """Class for correcting grammar in text using LLM"""
11
+
12
+ def __init__(self):
13
+ self.groq_api_key = settings.GROQ_API_KEY
14
+ self.model_name = settings.MODEL_NAME
15
+ self.temperature = settings.GRAMMAR_CORRECTION_TEMPERATURE
16
+
17
+ def correct_grammar(self, text: str) -> str:
18
+ """
19
+ Corrects grammar in user input using Groq's LLM.
20
+
21
+ Args:
22
+ text: The text to correct
23
+
24
+ Returns:
25
+ The corrected text
26
+ """
27
+ if not text:
28
+ return text
29
+
30
+ client = groq.Groq(api_key=self.groq_api_key)
31
+
32
+ try:
33
+ chat_completion = client.chat.completions.create(
34
+ messages=[
35
+ {
36
+ "role": "user",
37
+ "content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
38
+ }
39
+ ],
40
+ model=self.model_name,
41
+ temperature=self.temperature,
42
+ max_tokens=settings.MAX_TOKENS
43
+ )
44
+ return chat_completion.choices[0].message.content
45
+ except Exception as e:
46
+ if settings.DEBUG:
47
+ print(f"Error during grammar correction: {e}")
48
+ return text # Return original text if correction fails
49
+
50
+
51
+ # Create module-level instance for easier imports
52
+ grammar_corrector = GrammarCorrector()
53
+
54
+ # Export function for backward compatibility
55
+ def correct_grammar(text: str) -> str:
56
+ """Legacy function for backward compatibility"""
57
+ return grammar_corrector.correct_grammar(text)
agents/profile_extractor.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent for extracting profile information from resumes
3
+ """
4
+ import groq
5
+ from models import Profile, SocialMedia, Project, Skill, Education
6
+ from typing import List, Dict, Any, Optional
7
+ from langchain.output_parsers import PydanticOutputParser
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain_groq import ChatGroq
10
+ import json
11
+ from config import get_settings
12
+ import logging
13
+
14
+ settings = get_settings()
15
+
16
+ # Configure logging
17
+ logging.basicConfig(
18
+ level=logging.DEBUG if settings.DEBUG else logging.INFO,
19
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class ProfileExtractor:
24
+ """
25
+ Class for extracting profile information from resume text
26
+ """
27
+ def __init__(self):
28
+ logger.debug("Initializing ProfileExtractor")
29
+ self.groq_api_key = settings.GROQ_API_KEY
30
+ self.model_name = settings.MODEL_NAME
31
+ self.temperature = settings.TEMPERATURE
32
+ self.max_tokens = settings.MAX_TOKENS
33
+ self.llm = self._initialize_llm()
34
+
35
+ def _initialize_llm(self) -> ChatGroq:
36
+ """Initialize the language model client"""
37
+ logger.debug("Initializing language model client")
38
+ return ChatGroq(
39
+ groq_api_key=self.groq_api_key,
40
+ model_name=self.model_name,
41
+ temperature=self.temperature,
42
+ max_tokens=self.max_tokens
43
+ )
44
+
45
+ def extract_profile(self, pdf_text: str) -> Profile:
46
+ """
47
+ Main method to extract profile information from PDF text
48
+
49
+ Args:
50
+ pdf_text: Text extracted from a resume PDF
51
+
52
+ Returns:
53
+ Profile object with extracted information
54
+ """
55
+ logger.info("Extracting profile information")
56
+ try:
57
+ profile = self._extract_with_langchain(pdf_text)
58
+ logger.info("Profile extracted successfully with LangChain")
59
+ return profile
60
+ except Exception as e:
61
+ logger.error(f"LangChain extraction failed: {e}")
62
+ if settings.DEBUG:
63
+ print(f"LangChain extraction failed: {e}")
64
+ return self._extract_with_fallback(pdf_text)
65
+
66
+ def _extract_with_langchain(self, pdf_text: str) -> Profile:
67
+ """Extract profile with structured LangChain approach"""
68
+ logger.debug("Extracting profile with LangChain")
69
+ format_instructions = """
70
+ Extract the following information from the resume:
71
+ 1. Full name
72
+ 2. Professional title
73
+ 3. Email address
74
+ 4. Bio (a 50-100 word professional summary)
75
+ 5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
76
+ 6. Social media links (LinkedIn, GitHub, Instagram)
77
+ 7. Projects (with title, description, and tech stack)
78
+ 8. Skills
79
+ 9. Education history (with school, degree, field of study, start date and end date)
80
+
81
+ Return the information in the following JSON format:
82
+ {
83
+ "name": "Full Name",
84
+ "title": "Professional Title",
85
+ "email": "[email protected]",
86
+ "bio": "Professional biography...",
87
+ "tagline": "Catchy professional tagline",
88
+ "social": {
89
+ "linkedin": "LinkedIn URL or null",
90
+ "github": "GitHub URL or null",
91
+ "instagram": "Instagram URL or null"
92
+ },
93
+ "projects": [
94
+ {
95
+ "title": "Project Title",
96
+ "description": "Project Description",
97
+ "techStack": "Technologies used"
98
+ }
99
+ ],
100
+ "skills": [
101
+ {"name": "Skill 1"},
102
+ {"name": "Skill 2"}
103
+ ],
104
+ "educations": [
105
+ {
106
+ "school": "University Name",
107
+ "degree": "Degree Type (e.g., Bachelor's, Master's)",
108
+ "fieldOfStudy": "Major or Field",
109
+ "startDate": "Start Year",
110
+ "endDate": "End Year or Present"
111
+ }
112
+ ]
113
+ }
114
+
115
+ If any information is not available, use null for that field.
116
+ """
117
+
118
+ template = """
119
+ You are a professional resume parser. Extract structured information from the following resume:
120
+
121
+ {pdf_text}
122
+
123
+ {format_instructions}
124
+ """
125
+
126
+ prompt = PromptTemplate(
127
+ template=template,
128
+ input_variables=["pdf_text"],
129
+ partial_variables={"format_instructions": format_instructions}
130
+ )
131
+
132
+ chain = prompt | self.llm
133
+ result = chain.invoke({"pdf_text": pdf_text})
134
+ response_text = result.content
135
+
136
+ json_start = response_text.find('{')
137
+ json_end = response_text.rfind('}') + 1
138
+
139
+ if json_start >= 0 and json_end > json_start:
140
+ json_str = response_text[json_start:json_end]
141
+ profile_dict = json.loads(json_str)
142
+ profile = Profile.model_validate(profile_dict)
143
+ profile = self._fill_missing_information(profile, pdf_text)
144
+ logger.debug("Profile extracted and validated")
145
+ return profile
146
+ else:
147
+ logger.error("No JSON found in the response")
148
+ raise ValueError("No JSON found in the response")
149
+
150
+ def _fill_missing_information(self, profile: Profile, pdf_text: str) -> Profile:
151
+ """
152
+ Attempts to fill in any missing information in the profile
153
+ """
154
+ logger.debug("Filling missing information in the profile")
155
+ if not profile.name or profile.name == "N/A":
156
+ try:
157
+ response = self.llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:settings.CHUNK_SIZE])
158
+ name = response.content.strip()
159
+ if name and name != "N/A":
160
+ profile.name = name
161
+ logger.debug(f"Extracted name: {name}")
162
+ except Exception as e:
163
+ logger.error(f"Error extracting name: {e}")
164
+
165
+ if not profile.title or profile.title == "N/A":
166
+ try:
167
+ response = self.llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:settings.CHUNK_SIZE])
168
+ title = response.content.strip()
169
+ if title and title != "N/A":
170
+ profile.title = title
171
+ logger.debug(f"Extracted title: {title}")
172
+ except Exception as e:
173
+ logger.error(f"Error extracting title: {e}")
174
+
175
+ if not profile.email or profile.email == "N/A":
176
+ try:
177
+ response = self.llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
178
+ email = response.content.strip()
179
+ if email and email != "N/A" and "@" in email:
180
+ profile.email = email
181
+ logger.debug(f"Extracted email: {email}")
182
+ except Exception as e:
183
+ logger.error(f"Error extracting email: {e}")
184
+
185
+ if not profile.bio or profile.bio == "N/A":
186
+ try:
187
+ response = self.llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
188
+ bio = response.content.strip()
189
+ if bio and bio != "N/A":
190
+ profile.bio = bio
191
+ logger.debug(f"Created bio: {bio}")
192
+ except Exception as e:
193
+ logger.error(f"Error creating bio: {e}")
194
+
195
+ if not profile.educations:
196
+ try:
197
+ education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
198
+ response = self.llm.invoke(education_prompt + "\n\n" + pdf_text)
199
+ education_text = response.content.strip()
200
+
201
+ json_start = education_text.find('[')
202
+ json_end = education_text.rfind(']') + 1
203
+
204
+ if json_start >= 0 and json_end > json_start:
205
+ edu_json = education_text[json_start:json_end]
206
+ educations = json.loads(edu_json)
207
+
208
+ for edu in educations:
209
+ education = Education(
210
+ school=edu.get("school", "Unknown"),
211
+ degree=edu.get("degree", ""),
212
+ fieldOfStudy=edu.get("fieldOfStudy", ""),
213
+ startDate=edu.get("startDate", ""),
214
+ endDate=edu.get("endDate", "")
215
+ )
216
+ profile.educations.append(education)
217
+ logger.debug(f"Added education: {education}")
218
+ except Exception as e:
219
+ logger.error(f"Error extracting education: {e}")
220
+ if profile.skills:
221
+ try:
222
+ response = self.llm.invoke("Extract a top 8 of skills from this resume text, separated by commas. Respond with just the skills: " + ", ".join([skill.name for skill in profile.skills]))
223
+ skills = response.content.split(",")
224
+ for skill in skills:
225
+ if skill:
226
+ profile.topSkills.append(Skill(name=skill.strip()))
227
+ logger.debug(f"Added skill: {skill}")
228
+ except Exception as e:
229
+ logger.error(f"Error extracting skills: {e}")
230
+
231
+ return profile
232
+
233
+ def _extract_with_fallback(self, pdf_text: str) -> Profile:
234
+ """Fallback method for profile extraction using direct API calls"""
235
+ logger.debug("Extracting profile with fallback method")
236
+ client = groq.Groq(api_key=self.groq_api_key)
237
+
238
+ def get_llm_response(prompt: str) -> str:
239
+ """Helper function to get a response from the LLM."""
240
+ try:
241
+ chat_completion = client.chat.completions.create(
242
+ messages=[{"role": "user", "content": prompt}],
243
+ model=self.model_name,
244
+ temperature=settings.FALLBACK_TEMPERATURE,
245
+ max_tokens=settings.MAX_TOKENS
246
+ )
247
+ return chat_completion.choices[0].message.content
248
+ except Exception as e:
249
+ logger.error(f"Error during LLM call: {e}")
250
+ return "" # Return empty string on failure
251
+
252
+ name = get_llm_response(f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}").strip()
253
+ title = get_llm_response(f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}").strip()
254
+ email = get_llm_response(f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}").strip()
255
+ bio = get_llm_response(f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}").strip()
256
+ tagline = get_llm_response(f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}").strip()
257
+
258
+ linkedin = get_llm_response(f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}").strip()
259
+ github = get_llm_response(f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}").strip()
260
+ instagram = get_llm_response(f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}").strip()
261
+
262
+ project_info = get_llm_response(f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}").strip()
263
+ skills_info = get_llm_response(f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}").strip()
264
+
265
+ education_info = get_llm_response(f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}").strip()
266
+
267
+ social_media = SocialMedia(
268
+ linkedin=linkedin if linkedin != 'N/A' else None,
269
+ github=github if github != 'N/A' else None,
270
+ instagram=instagram if instagram != 'N/A' else None
271
+ )
272
+
273
+ projects = []
274
+ if project_info != "N/A":
275
+ project_lines = project_info.split("\n")
276
+ for line in project_lines:
277
+ if ":" in line:
278
+ try:
279
+ project_title, project_description_techstack = line.split(":", 1)
280
+ project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
281
+
282
+ projects.append(Project(
283
+ title=project_title.strip(),
284
+ description=project_description.strip(),
285
+ techStack=tech_stack.strip()
286
+ ))
287
+ logger.debug(f"Added project: {project_title.strip()}")
288
+ except ValueError as e:
289
+ logger.error(f"Error parsing project: {line}. Error: {e}")
290
+
291
+ skills = []
292
+ if skills_info != "N/A":
293
+ skill_list = [skill.strip() for skill in skills_info.split(",")]
294
+ for skill_name in skill_list:
295
+ if skill_name:
296
+ skills.append(Skill(name=skill_name))
297
+ logger.debug(f"Added skill: {skill_name}")
298
+
299
+ educations = []
300
+ if education_info != "N/A":
301
+ education_lines = education_info.split("\n")
302
+ for line in education_lines:
303
+ if ":" in line:
304
+ try:
305
+ parts = line.split(":")
306
+ if len(parts) >= 5:
307
+ educations.append(Education(
308
+ school=parts[0].strip(),
309
+ degree=parts[1].strip(),
310
+ fieldOfStudy=parts[2].strip(),
311
+ startDate=parts[3].strip(),
312
+ endDate=parts[4].strip()
313
+ ))
314
+ logger.debug(f"Added education: {parts[0].strip()}")
315
+ except Exception as e:
316
+ logger.error(f"Error parsing education: {line}. Error: {e}")
317
+
318
+ profile = Profile(
319
+ name=name if name != 'N/A' else "N/A",
320
+ title=title if title != 'N/A' else "N/A",
321
+ email=email if email != 'N/A' else "N/A",
322
+ bio=bio if bio != 'N/A' else "N/A",
323
+ tagline=tagline if tagline != 'N/A' else None,
324
+ social=social_media if (social_media.github or social_media.instagram or social_media.linkedin) else None,
325
+ chatbot=None,
326
+ profileImg=None,
327
+ heroImg=None,
328
+ projects=projects,
329
+ skills=skills,
330
+ educations=educations
331
+ )
332
+
333
+ logger.info("Profile extracted successfully with fallback method")
334
+ return profile
335
+
336
+
337
+ # Create module-level instance for easier imports
338
+ profile_extractor = ProfileExtractor()
339
+
340
+ # Export function for backward compatibility
341
+ def extract_profile_information(pdf_text: str) -> Profile:
342
+ """Legacy function for backward compatibility"""
343
+ return profile_extractor.extract_profile(pdf_text)
344
+
345
+ # Export the class and the function
346
+ __all__ = ['ProfileExtractor', 'extract_profile_information']
api.py CHANGED
@@ -1,9 +1,21 @@
1
- from fastapi import FastAPI, HTTPException
2
  from pymongo import MongoClient
3
  from bson.objectid import ObjectId
4
  from fastapi.middleware.cors import CORSMiddleware
5
  import json
6
  from bson import json_util
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  app = FastAPI(title="Profile API", description="API to retrieve profile information")
9
 
@@ -18,17 +30,21 @@ app.add_middleware(
18
 
19
  # MongoDB connection configuration
20
  def get_db_connection():
 
21
  try:
22
- client = MongoClient("mongodb://localhost:27017/", serverSelectionTimeoutMS=5000)
 
 
 
23
  # Test the connection
24
  client.server_info()
25
- return client["profileDB"]
26
  except Exception as e:
27
- print(f"Error connecting to MongoDB: {e}")
28
  raise HTTPException(status_code=500, detail=f"Database connection error: {str(e)}")
29
 
30
  @app.get("/api/profile/{profile_id}")
31
- async def get_profile(profile_id: str):
32
  """
33
  Retrieve a profile by its MongoDB ID
34
 
@@ -41,12 +57,13 @@ async def get_profile(profile_id: str):
41
  try:
42
  # Connect to MongoDB
43
  db = get_db_connection()
44
- collection = db["profiles"]
45
 
46
  # Try to parse the profile_id as an ObjectId
47
  try:
48
  obj_id = ObjectId(profile_id)
49
- except:
 
50
  raise HTTPException(status_code=400, detail=f"Invalid profile ID format: {profile_id}")
51
 
52
  # Find the profile by ID
@@ -54,10 +71,12 @@ async def get_profile(profile_id: str):
54
 
55
  # Check if profile exists
56
  if not profile:
 
57
  raise HTTPException(status_code=404, detail=f"Profile with ID {profile_id} not found")
58
 
59
  # Convert MongoDB document to JSON serializable format
60
  profile_json = json.loads(json_util.dumps(profile))
 
61
 
62
  return profile_json
63
 
@@ -66,11 +85,11 @@ async def get_profile(profile_id: str):
66
  raise
67
  except Exception as e:
68
  # Handle any other exceptions
 
69
  raise HTTPException(status_code=500, detail=f"Error retrieving profile: {str(e)}")
70
 
71
- # Add new endpoint for profile images
72
  @app.get("/api/profile/{profile_id}/image")
73
- async def get_profile_image(profile_id: str):
74
  """
75
  Retrieve just the profile image for a given profile ID
76
 
@@ -82,7 +101,7 @@ async def get_profile_image(profile_id: str):
82
  """
83
  try:
84
  db = get_db_connection()
85
- collection = db["profiles"]
86
 
87
  try:
88
  obj_id = ObjectId(profile_id)
@@ -106,6 +125,7 @@ async def get_profile_image(profile_id: str):
106
  except HTTPException:
107
  raise
108
  except Exception as e:
 
109
  raise HTTPException(status_code=500, detail=f"Error retrieving profile image: {str(e)}")
110
 
111
  if __name__ == "__main__":
 
1
+ from fastapi import FastAPI, HTTPException, Depends
2
  from pymongo import MongoClient
3
  from bson.objectid import ObjectId
4
  from fastapi.middleware.cors import CORSMiddleware
5
  import json
6
  from bson import json_util
7
+ from config import get_settings
8
+ from typing import Dict, Any
9
+ import logging
10
+
11
+ settings = get_settings()
12
+
13
+ # Configure logging
14
+ logging.basicConfig(
15
+ level=logging.DEBUG if settings.DEBUG else logging.INFO,
16
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
17
+ )
18
+ logger = logging.getLogger(__name__)
19
 
20
  app = FastAPI(title="Profile API", description="API to retrieve profile information")
21
 
 
30
 
31
  # MongoDB connection configuration
32
  def get_db_connection():
33
+ """Get MongoDB database connection"""
34
  try:
35
+ client = MongoClient(
36
+ settings.MONGODB_URI,
37
+ serverSelectionTimeoutMS=settings.MONGODB_TIMEOUT_MS
38
+ )
39
  # Test the connection
40
  client.server_info()
41
+ return client[settings.MONGODB_DB]
42
  except Exception as e:
43
+ logger.error(f"Error connecting to MongoDB: {e}")
44
  raise HTTPException(status_code=500, detail=f"Database connection error: {str(e)}")
45
 
46
  @app.get("/api/profile/{profile_id}")
47
+ async def get_profile(profile_id: str) -> Dict[str, Any]:
48
  """
49
  Retrieve a profile by its MongoDB ID
50
 
 
57
  try:
58
  # Connect to MongoDB
59
  db = get_db_connection()
60
+ collection = db[settings.MONGODB_COLLECTION]
61
 
62
  # Try to parse the profile_id as an ObjectId
63
  try:
64
  obj_id = ObjectId(profile_id)
65
+ except Exception as id_error:
66
+ logger.error(f"Invalid profile ID: {profile_id}, error: {id_error}")
67
  raise HTTPException(status_code=400, detail=f"Invalid profile ID format: {profile_id}")
68
 
69
  # Find the profile by ID
 
71
 
72
  # Check if profile exists
73
  if not profile:
74
+ logger.warning(f"Profile not found: {profile_id}")
75
  raise HTTPException(status_code=404, detail=f"Profile with ID {profile_id} not found")
76
 
77
  # Convert MongoDB document to JSON serializable format
78
  profile_json = json.loads(json_util.dumps(profile))
79
+ logger.debug(f"Retrieved profile: {profile_id}")
80
 
81
  return profile_json
82
 
 
85
  raise
86
  except Exception as e:
87
  # Handle any other exceptions
88
+ logger.error(f"Error retrieving profile {profile_id}: {e}")
89
  raise HTTPException(status_code=500, detail=f"Error retrieving profile: {str(e)}")
90
 
 
91
  @app.get("/api/profile/{profile_id}/image")
92
+ async def get_profile_image(profile_id: str) -> Dict[str, Any]:
93
  """
94
  Retrieve just the profile image for a given profile ID
95
 
 
101
  """
102
  try:
103
  db = get_db_connection()
104
+ collection = db[settings.MONGODB_COLLECTION]
105
 
106
  try:
107
  obj_id = ObjectId(profile_id)
 
125
  except HTTPException:
126
  raise
127
  except Exception as e:
128
+ logger.error(f"Error retrieving profile image {profile_id}: {e}")
129
  raise HTTPException(status_code=500, detail=f"Error retrieving profile image: {str(e)}")
130
 
131
  if __name__ == "__main__":
app.py CHANGED
@@ -1,65 +1,32 @@
 
 
 
1
  import streamlit as st
2
- from io import BytesIO
3
- import os
4
  import json
5
  import traceback
6
  import base64
7
- from pymongo import MongoClient
8
- from agentProfile import extract_text_from_pdf, extract_profile_information, correct_grammar
 
 
 
 
 
9
  from models import Skill, Project, Education, SocialMedia
 
10
 
11
- def profile_to_dict(profile):
12
- return {
13
- "name": profile.name,
14
- "title": profile.title,
15
- "email": profile.email,
16
- "bio": profile.bio,
17
- "tagline": profile.tagline if profile.tagline else "",
18
- "social": {
19
- "linkedin": profile.social.linkedin if profile.social and profile.social.linkedin else "",
20
- "github": profile.social.github if profile.social and profile.social.github else "",
21
- "instagram": profile.social.instagram if profile.social and profile.social.instagram else ""
22
- },
23
- "profileImg": profile.profileImg if profile.profileImg else "",
24
- "projects": [
25
- {
26
- "title": project.title,
27
- "description": project.description,
28
- "techStack": project.techStack if project.techStack else "",
29
- "githubUrl": project.githubUrl if project.githubUrl else "",
30
- "demoUrl": project.demoUrl if project.demoUrl else ""
31
- } for project in profile.projects
32
- ] if profile.projects else [],
33
- "skills": [skill.name for skill in profile.skills] if profile.skills else [],
34
- "educations": [
35
- {
36
- "school": edu.school,
37
- "degree": edu.degree,
38
- "fieldOfStudy": edu.fieldOfStudy,
39
- "startDate": edu.startDate,
40
- "endDate": edu.endDate
41
- } for edu in profile.educations
42
- ] if profile.educations else []
43
- }
44
-
45
- def store_profile(profile_dict):
46
- try:
47
- client = MongoClient("mongodb://localhost:27017/", serverSelectionTimeoutMS=5000)
48
- # Test the connection
49
- client.server_info()
50
-
51
- db = client["profileDB"]
52
- collection = db["profiles"]
53
- result = collection.insert_one(profile_dict)
54
- return str(result.inserted_id)
55
- except Exception as e:
56
- st.error(f"Erreur de connexion à MongoDB: {str(e)}")
57
- # Fallback: save to JSON file
58
- file_path = f"profile_{profile_dict['name'].replace(' ', '_')}.json"
59
- with open(file_path, 'w') as f:
60
- json.dump(profile_dict, f, indent=2)
61
- return f"Sauvegardé dans le fichier {file_path}"
62
 
 
 
 
 
 
 
 
 
63
  def collect_missing_data(profile):
64
  """
65
  Collects missing data from user input when automatic extraction fails.
@@ -92,7 +59,7 @@ def collect_missing_data(profile):
92
 
93
  if profile.bio and profile.bio != "N/A":
94
  if st.button("Improve Bio Grammar"):
95
- profile.bio = correct_grammar(profile.bio)
96
  st.success("Grammar corrected!")
97
 
98
  # Optional information
@@ -135,7 +102,7 @@ def collect_missing_data(profile):
135
  num_new_edu = st.number_input("Number of additional education entries:", min_value=1, max_value=5, value=1)
136
  offset = len(profile.educations) if profile.educations else 0
137
 
138
- for i in range(num_new_edu):
139
  st.write(f"Additional Education #{i+1}")
140
  school = st.text_input(f"School:", key=f"new_school_{offset+i}")
141
  degree = st.text_input(f"Degree:", key=f"new_degree_{offset+i}")
@@ -187,7 +154,7 @@ def collect_missing_data(profile):
187
  num_new_proj = st.number_input("Number of additional projects:", min_value=1, max_value=5, value=1)
188
  offset = len(profile.projects) if profile.projects else 0
189
 
190
- for i in range(num_new_proj):
191
  st.write(f"Additional Project #{i+1}")
192
  title = st.text_input(f"Title:", key=f"new_proj_title_{offset+i}")
193
  description = st.text_area(f"Description:", key=f"new_proj_desc_{offset+i}")
@@ -196,8 +163,9 @@ def collect_missing_data(profile):
196
  demo_url = st.text_input(f"Demo URL (optional):", key=f"new_proj_demo_{offset+i}")
197
 
198
  if title and description: # Only add if title and description are provided
199
- if st.button(f"Correct Grammar for Project #{i+1}"):
200
- description = correct_grammar(description)
 
201
  st.success("Grammar corrected!")
202
 
203
  project_data.append({
@@ -233,10 +201,81 @@ def collect_missing_data(profile):
233
 
234
  return profile
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  def main():
237
- st.title("Profile Extractor from PDF")
 
 
 
238
 
239
- # State management
240
  if 'profile' not in st.session_state:
241
  st.session_state.profile = None
242
  if 'extraction_complete' not in st.session_state:
@@ -246,135 +285,101 @@ def main():
246
  if 'profile_saved' not in st.session_state:
247
  st.session_state.profile_saved = False
248
 
249
- # Step 1: Upload PDF
250
  if not st.session_state.extraction_complete:
251
- uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
252
 
253
  if uploaded_file is not None:
254
- # Read file as bytes and save to a temporary file
255
- bytes_data = uploaded_file.read()
256
- with open("temp.pdf", "wb") as f:
257
- f.write(bytes_data)
258
-
259
- pdf_text = extract_text_from_pdf("temp.pdf")
260
- if pdf_text:
261
- try:
262
- with st.spinner("Extracting information..."):
263
- profile = extract_profile_information(pdf_text)
 
 
 
264
  st.session_state.profile = profile
265
  st.session_state.extraction_complete = True
266
  st.experimental_rerun()
267
 
268
- except Exception as e:
269
- st.error(f"Erreur lors de l'extraction du profil: {str(e)}")
270
- if "403" in str(e):
271
- st.error("Erreur d'autorisation (403 Forbidden). Vérifiez les clés API et les autorisations.")
272
- with st.expander("Détails techniques"):
273
- st.code(traceback.format_exc())
274
- else:
275
- st.error("Could not extract text from the PDF.")
276
-
277
- # Clean up temporary file
278
- if os.path.exists("temp.pdf"):
279
- os.remove("temp.pdf")
280
 
281
- # Step 2: Collect missing data from user
282
  elif not st.session_state.user_input_complete:
283
- profile = st.session_state.profile
284
- profile = collect_missing_data(profile)
285
 
286
- submit = st.button("Save Profile")
287
- if submit:
288
- st.session_state.profile = profile
289
- st.session_state.user_input_complete = True
290
- st.experimental_rerun()
 
 
 
 
 
 
 
 
 
 
291
 
292
- # Step 3: Display and save final profile
293
  elif not st.session_state.profile_saved:
294
  profile = st.session_state.profile
295
 
296
  try:
297
- # Convert profile to dictionary and store in MongoDB
298
- profile_dict = profile_to_dict(profile)
299
- inserted_id = store_profile(profile_dict)
300
- st.success(f"Le profil a été enregistré avec succès avec l'ID : {inserted_id}")
301
-
302
- # Show API access information
303
- st.info(f"Access this profile via API: http://localhost:8000/api/profile/{inserted_id}")
304
 
305
- st.session_state.profile_saved = True
306
 
307
- st.header("Your Complete Profile")
308
- # Display profile image if available
309
- if profile.profileImg:
310
- st.image(profile.profileImg, width=150)
311
 
312
- # Display basic info in a table
313
- basic_data = {
314
- "Field": ["Name", "Title", "Email", "Bio", "Tagline"],
315
- "Value": [
316
- profile.name,
317
- profile.title,
318
- profile.email,
319
- profile.bio,
320
- profile.tagline if profile.tagline else ""
321
- ]
322
- }
323
- st.table(basic_data)
324
-
325
- # Display social media if available
326
- if profile.social:
327
- social_data = {
328
- "Platform": ["LinkedIn", "GitHub", "Instagram"],
329
- "URL": [
330
- profile.social.linkedin if profile.social.linkedin else "",
331
- profile.social.github if profile.social.github else "",
332
- profile.social.instagram if profile.social.instagram else ""
333
- ]
334
- }
335
- st.subheader("Social Media")
336
- st.table(social_data)
337
-
338
- # Display education in a table if available
339
- if profile.educations:
340
- education_data = {
341
- "School": [edu.school for edu in profile.educations],
342
- "Degree": [edu.degree for edu in profile.educations],
343
- "Field of Study": [edu.fieldOfStudy for edu in profile.educations],
344
- "Start Date": [edu.startDate for edu in profile.educations],
345
- "End Date": [edu.endDate for edu in profile.educations]
346
- }
347
- st.subheader("Education")
348
- st.table(education_data)
349
 
350
- # Display projects in a table if available
351
- if profile.projects:
352
- projects_data = {
353
- "Title": [project.title for project in profile.projects],
354
- "Description": [project.description for project in profile.projects],
355
- "Tech Stack": [project.techStack if project.techStack else "" for project in profile.projects],
356
- "GitHub": [project.githubUrl if project.githubUrl else "" for project in profile.projects],
357
- "Demo": [project.demoUrl if project.demoUrl else "" for project in profile.projects]
358
- }
359
- st.subheader("Projects")
360
- st.table(projects_data)
361
-
362
- # Display skills as a comma separated list if available
363
- if profile.skills:
364
- st.subheader("Skills")
365
- st.write(", ".join([skill.name for skill in profile.skills]))
366
 
367
  except Exception as e:
 
368
  st.error(f"Error saving profile: {str(e)}")
369
- with st.expander("Technical details"):
370
  st.code(traceback.format_exc())
371
 
372
- # Reset button (available after profile is saved)
373
  else:
 
 
 
374
  if st.button("Extract Another Profile"):
 
375
  for key in ['profile', 'extraction_complete', 'user_input_complete', 'profile_saved']:
376
  st.session_state[key] = False
377
  st.experimental_rerun()
 
 
 
378
 
379
  if __name__ == "__main__":
380
  main()
 
1
+ """
2
+ Streamlit web application for resume profile extraction
3
+ """
4
  import streamlit as st
5
+ import os
 
6
  import json
7
  import traceback
8
  import base64
9
+ import logging
10
+ from typing import Dict, Any
11
+
12
+ # Import from our refactored modules
13
+ from agents import profile_extractor as pe, grammar_corrector as gc
14
+ from utils import extract_text_from_pdf, save_temp_pdf
15
+ from services import storage_service
16
  from models import Skill, Project, Education, SocialMedia
17
+ from config import get_settings
18
 
19
+ # Get settings
20
+ settings = get_settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Configure logging
23
+ logging.basicConfig(
24
+ level=logging.DEBUG if settings.DEBUG else logging.INFO,
25
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26
+ )
27
+ logger = logging.getLogger(__name__)
28
+ profile_extractor=pe.ProfileExtractor()
29
+ grammar_corrector=gc.GrammarCorrector()
30
  def collect_missing_data(profile):
31
  """
32
  Collects missing data from user input when automatic extraction fails.
 
59
 
60
  if profile.bio and profile.bio != "N/A":
61
  if st.button("Improve Bio Grammar"):
62
+ profile.bio = grammar_corrector.correct_grammar(profile.bio)
63
  st.success("Grammar corrected!")
64
 
65
  # Optional information
 
102
  num_new_edu = st.number_input("Number of additional education entries:", min_value=1, max_value=5, value=1)
103
  offset = len(profile.educations) if profile.educations else 0
104
 
105
+ for i in range(int(num_new_edu)):
106
  st.write(f"Additional Education #{i+1}")
107
  school = st.text_input(f"School:", key=f"new_school_{offset+i}")
108
  degree = st.text_input(f"Degree:", key=f"new_degree_{offset+i}")
 
154
  num_new_proj = st.number_input("Number of additional projects:", min_value=1, max_value=5, value=1)
155
  offset = len(profile.projects) if profile.projects else 0
156
 
157
+ for i in range(int(num_new_proj)):
158
  st.write(f"Additional Project #{i+1}")
159
  title = st.text_input(f"Title:", key=f"new_proj_title_{offset+i}")
160
  description = st.text_area(f"Description:", key=f"new_proj_desc_{offset+i}")
 
163
  demo_url = st.text_input(f"Demo URL (optional):", key=f"new_proj_demo_{offset+i}")
164
 
165
  if title and description: # Only add if title and description are provided
166
+ correct_grammar_btn = st.button(f"Correct Grammar for Project #{i+1}")
167
+ if correct_grammar_btn:
168
+ description = grammar_corrector.correct_grammar(description)
169
  st.success("Grammar corrected!")
170
 
171
  project_data.append({
 
201
 
202
  return profile
203
 
204
+ def display_profile(profile):
205
+ """
206
+ Displays a profile in the Streamlit UI
207
+
208
+ Args:
209
+ profile: The Profile object to display
210
+ """
211
+ st.header("Your Complete Profile")
212
+
213
+ # Display profile image if available
214
+ if profile.profileImg:
215
+ st.image(profile.profileImg, width=150)
216
+
217
+ # Display basic info in a table
218
+ basic_data = {
219
+ "Field": ["Name", "Title", "Email", "Bio", "Tagline"],
220
+ "Value": [
221
+ profile.name,
222
+ profile.title,
223
+ profile.email,
224
+ profile.bio,
225
+ profile.tagline if profile.tagline else ""
226
+ ]
227
+ }
228
+ st.table(basic_data)
229
+
230
+ # Display social media if available
231
+ if profile.social:
232
+ social_data = {
233
+ "Platform": ["LinkedIn", "GitHub", "Instagram"],
234
+ "URL": [
235
+ profile.social.linkedin if profile.social.linkedin else "",
236
+ profile.social.github if profile.social.github else "",
237
+ profile.social.instagram if profile.social.instagram else ""
238
+ ]
239
+ }
240
+ st.subheader("Social Media")
241
+ st.table(social_data)
242
+
243
+ # Display education in a table if available
244
+ if profile.educations:
245
+ education_data = {
246
+ "School": [edu.school for edu in profile.educations],
247
+ "Degree": [edu.degree for edu in profile.educations],
248
+ "Field of Study": [edu.fieldOfStudy for edu in profile.educations],
249
+ "Start Date": [edu.startDate for edu in profile.educations],
250
+ "End Date": [edu.endDate for edu in profile.educations]
251
+ }
252
+ st.subheader("Education")
253
+ st.table(education_data)
254
+
255
+ # Display projects in a table if available
256
+ if profile.projects:
257
+ projects_data = {
258
+ "Title": [project.title for project in profile.projects],
259
+ "Description": [project.description for project in profile.projects],
260
+ "Tech Stack": [project.techStack if project.techStack else "" for project in profile.projects],
261
+ "GitHub": [project.githubUrl if project.githubUrl else "" for project in profile.projects],
262
+ "Demo": [project.demoUrl if project.demoUrl else "" for project in profile.projects]
263
+ }
264
+ st.subheader("Projects")
265
+ st.table(projects_data)
266
+
267
+ # Display skills as a comma separated list if available
268
+ if profile.skills:
269
+ st.subheader("Skills")
270
+ st.write(", ".join([skill.name for skill in profile.skills]))
271
+
272
  def main():
273
+ """Main application function"""
274
+ st.set_page_config(page_title="Resume Profile Extractor", page_icon="📄", layout="wide")
275
+ st.title("Professional Profile Extractor")
276
+ st.write("Upload a resume PDF to extract professional profile information")
277
 
278
+ # Initialize session state variables
279
  if 'profile' not in st.session_state:
280
  st.session_state.profile = None
281
  if 'extraction_complete' not in st.session_state:
 
285
  if 'profile_saved' not in st.session_state:
286
  st.session_state.profile_saved = False
287
 
288
+ # Step 1: Upload PDF and Extract Profile
289
  if not st.session_state.extraction_complete:
290
+ uploaded_file = st.file_uploader("Upload a PDF resume", type="pdf")
291
 
292
  if uploaded_file is not None:
293
+ try:
294
+ # Save the uploaded file to a temporary location
295
+ pdf_path = save_temp_pdf(uploaded_file.getvalue())
296
+
297
+ # Extract text from the PDF
298
+ pdf_text = extract_text_from_pdf(pdf_path)
299
+
300
+ if not pdf_text:
301
+ st.error("Could not extract text from the PDF. The file might be scanned or protected.")
302
+ else:
303
+ with st.spinner("Extracting profile information..."):
304
+ # Extract profile information using the profile extractor agent
305
+ profile = profile_extractor.extract_profile(pdf_text)
306
  st.session_state.profile = profile
307
  st.session_state.extraction_complete = True
308
  st.experimental_rerun()
309
 
310
+ # Clean up temporary file
311
+ if os.path.exists(pdf_path):
312
+ os.remove(pdf_path)
313
+
314
+ except Exception as e:
315
+ logger.error(f"Error during profile extraction: {e}")
316
+ st.error(f"An error occurred during profile extraction: {str(e)}")
317
+ if "403" in str(e):
318
+ st.error("Authorization error (403 Forbidden). Please check your API key and permissions.")
319
+ with st.expander("Technical Details"):
320
+ st.code(traceback.format_exc())
 
321
 
322
+ # Step 2: Allow User to Edit/Complete the Profile
323
  elif not st.session_state.user_input_complete:
324
+ st.info("We've extracted information from your resume. Please review and complete any missing details.")
 
325
 
326
+ # Call the function to collect and complete missing data
327
+ profile = collect_missing_data(st.session_state.profile)
328
+
329
+ # Add buttons for submitting or starting over
330
+ col1, col2 = st.columns(2)
331
+ with col1:
332
+ if st.button("Save Profile"):
333
+ st.session_state.profile = profile
334
+ st.session_state.user_input_complete = True
335
+ st.experimental_rerun()
336
+ with col2:
337
+ if st.button("Start Over"):
338
+ st.session_state.profile = None
339
+ st.session_state.extraction_complete = False
340
+ st.experimental_rerun()
341
 
342
+ # Step 3: Save Profile and Display Results
343
  elif not st.session_state.profile_saved:
344
  profile = st.session_state.profile
345
 
346
  try:
347
+ # Store the profile using the storage service
348
+ inserted_id = storage_service.store_profile(
349
+ profile,
350
+ error_handler=st.error
351
+ )
 
 
352
 
353
+ st.success(f"Profile saved successfully with ID: {inserted_id}")
354
 
355
+ # Display the Portfolio URL
356
+ st.info(f"Access to your portfolio: [Portfolio URL](http://localhost:3000/{inserted_id})")
 
 
357
 
358
+ # Mark as saved in session state
359
+ st.session_state.profile_saved = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
+ # Display the complete profile
362
+ display_profile(profile)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
  except Exception as e:
365
+ logger.error(f"Error saving profile: {e}")
366
  st.error(f"Error saving profile: {str(e)}")
367
+ with st.expander("Technical Details"):
368
  st.code(traceback.format_exc())
369
 
370
+ # Final state - allow extracting another profile
371
  else:
372
+ st.success("Profile extraction complete!")
373
+
374
+ # Show options to extract another profile or view the current one
375
  if st.button("Extract Another Profile"):
376
+ # Reset session state
377
  for key in ['profile', 'extraction_complete', 'user_input_complete', 'profile_saved']:
378
  st.session_state[key] = False
379
  st.experimental_rerun()
380
+ else:
381
+ # Show the profile again
382
+ display_profile(st.session_state.profile)
383
 
384
  if __name__ == "__main__":
385
  main()
config.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+ from typing import Optional, Dict, Any
3
+ import os
4
+ from functools import lru_cache
5
+
6
+ class Settings(BaseSettings):
7
+ """
8
+ Application settings loaded from environment variables or .env file
9
+ """
10
+ # API keys
11
+ GROQ_API_KEY: str
12
+
13
+ # LLM settings
14
+ MODEL_NAME: str = "qwen-2.5-32b"
15
+ MAX_TOKENS: int = 2048
16
+ TEMPERATURE: float = 0.5
17
+ FALLBACK_TEMPERATURE: float = 0.7
18
+ GRAMMAR_CORRECTION_TEMPERATURE: float = 0.3
19
+
20
+ # MongoDB settings
21
+ MONGODB_URI: str = "mongodb://localhost:27017/"
22
+ MONGODB_DB: str = "profileDB"
23
+ MONGODB_COLLECTION: str = "profiles"
24
+ MONGODB_TIMEOUT_MS: int = 5000
25
+
26
+ # Application settings
27
+ CACHE_SIZE: int = 100
28
+ CHUNK_SIZE: int = 1000
29
+ DEBUG: bool = False
30
+
31
+ # File settings
32
+ TEMP_FILE_DIR: str = "./"
33
+
34
+ class Config:
35
+ env_file = ".env"
36
+ case_sensitive = True
37
+
38
+ @lru_cache()
39
+ def get_settings() -> Settings:
40
+ """
41
+ Get cached settings instance
42
+ """
43
+ return Settings()
models.py CHANGED
@@ -41,4 +41,5 @@ class Profile(BaseModel):
41
  chatbot: Optional[Chatbot] = None
42
  projects: List[Project] = []
43
  skills: List[Skill] = []
 
44
  educations: List[Education] = []
 
41
  chatbot: Optional[Chatbot] = None
42
  projects: List[Project] = []
43
  skills: List[Skill] = []
44
+ topSkills: List[str] = []
45
  educations: List[Education] = []
services/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ Services package for backend operations
3
+ """
4
+ from .storage_service import StorageService, storage_service
5
+
6
+ __all__ = ['StorageService', 'storage_service']
services/storage_service.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Service for storing and retrieving profile data
3
+ """
4
+ from pymongo import MongoClient
5
+ from models import Profile
6
+ from config import get_settings
7
+ import json
8
+ import logging
9
+ from typing import Dict, Any, Optional
10
+
11
+ settings = get_settings()
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class StorageService:
15
+ """Service for storing and retrieving profile data"""
16
+
17
+ def __init__(self):
18
+ self.mongo_uri = settings.MONGODB_URI
19
+ self.db_name = settings.MONGODB_DB
20
+ self.collection_name = settings.MONGODB_COLLECTION
21
+ self.timeout_ms = settings.MONGODB_TIMEOUT_MS
22
+
23
+ def profile_to_dict(self, profile: Profile) -> Dict[str, Any]:
24
+ """Convert Profile object to dictionary for MongoDB storage"""
25
+ return {
26
+ "name": profile.name,
27
+ "title": profile.title,
28
+ "email": profile.email,
29
+ "bio": profile.bio,
30
+ "tagline": profile.tagline if profile.tagline else "",
31
+ "social": {
32
+ "linkedin": profile.social.linkedin if profile.social and profile.social.linkedin else "",
33
+ "github": profile.social.github if profile.social and profile.social.github else "",
34
+ "instagram": profile.social.instagram if profile.social and profile.social.instagram else ""
35
+ },
36
+ "profileImg": profile.profileImg if profile.profileImg else "",
37
+ "projects": [
38
+ {
39
+ "title": project.title,
40
+ "description": project.description,
41
+ "techStack": project.techStack if project.techStack else "",
42
+ "githubUrl": project.githubUrl if project.githubUrl else "",
43
+ "demoUrl": project.demoUrl if project.demoUrl else ""
44
+ } for project in profile.projects
45
+ ] if profile.projects else [],
46
+ "skills": [skill.name for skill in profile.skills] if profile.skills else [],
47
+ "educations": [
48
+ {
49
+ "school": edu.school,
50
+ "degree": edu.degree,
51
+ "fieldOfStudy": edu.fieldOfStudy,
52
+ "startDate": edu.startDate,
53
+ "endDate": edu.endDate
54
+ } for edu in profile.educations
55
+ ] if profile.educations else []
56
+ }
57
+
58
+ def store_profile(self, profile: Profile, error_handler=None) -> str:
59
+ """
60
+ Store profile data in MongoDB or fallback to JSON file
61
+
62
+ Args:
63
+ profile: The Profile object to store
64
+ error_handler: Optional function to handle errors (useful for framework-specific error handling)
65
+
66
+ Returns:
67
+ String ID of the stored profile or path to JSON file
68
+ """
69
+ profile_dict = self.profile_to_dict(profile)
70
+
71
+ try:
72
+ client = MongoClient(
73
+ self.mongo_uri,
74
+ serverSelectionTimeoutMS=self.timeout_ms
75
+ )
76
+ # Test the connection
77
+ client.server_info()
78
+
79
+ db = client[self.db_name]
80
+ collection = db[self.collection_name]
81
+ result = collection.insert_one(profile_dict)
82
+ return str(result.inserted_id)
83
+
84
+ except Exception as e:
85
+ logger.error(f"MongoDB connection error: {e}")
86
+ if error_handler:
87
+ error_handler(f"Error connecting to MongoDB: {str(e)}")
88
+
89
+ # Fallback: save to JSON file
90
+ file_path = f"profile_{profile_dict['name'].replace(' ', '_')}.json"
91
+ with open(file_path, 'w') as f:
92
+ json.dump(profile_dict, f, indent=2)
93
+ return f"Saved to file {file_path}"
94
+
95
+
96
+ # Create a global instance
97
+ storage_service = StorageService()
temp.pdf CHANGED
Binary files a/temp.pdf and b/temp.pdf differ
 
utils/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ Utilities package for helper functions
3
+ """
4
+ from .pdf_utils import extract_text_from_pdf, save_temp_pdf
5
+
6
+ __all__ = ['extract_text_from_pdf', 'save_temp_pdf']
utils/pdf_utils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utilities for working with PDF files
3
+ """
4
+ import PyPDF2
5
+ import io
6
+ import os
7
+ from config import get_settings
8
+ import logging
9
+
10
+ settings = get_settings()
11
+ logger = logging.getLogger(__name__)
12
+
13
+ def extract_text_from_pdf(pdf_path):
14
+ """
15
+ Extracts text from a PDF file.
16
+
17
+ Args:
18
+ pdf_path (str): The path to the PDF file.
19
+
20
+ Returns:
21
+ str: The extracted text. Returns an empty string if extraction fails.
22
+ """
23
+ text = ""
24
+ try:
25
+ with open(pdf_path, 'rb') as file:
26
+ reader = PyPDF2.PdfReader(file)
27
+ for page_num in range(len(reader.pages)):
28
+ page = reader.pages[page_num]
29
+ text += page.extract_text()
30
+
31
+ if not text.strip():
32
+ logger.warning(f"Extracted empty text from PDF: {pdf_path}")
33
+ logger.info(f"Extracted text are {text}")
34
+ return text
35
+
36
+ except Exception as e:
37
+ logger.error(f"Error extracting text from PDF: {e}")
38
+ return "" # Return empty string on failure
39
+
40
+ def save_temp_pdf(file_data, filename="temp.pdf"):
41
+ """
42
+ Save uploaded file data to a temporary PDF file
43
+
44
+ Args:
45
+ file_data: The binary data of the file
46
+ filename: The name to save the file as
47
+
48
+ Returns:
49
+ Path to the saved file
50
+ """
51
+ filepath = os.path.join(settings.TEMP_FILE_DIR, filename)
52
+ try:
53
+ with open(filepath, 'wb') as f:
54
+ f.write(file_data)
55
+ return filepath
56
+ except Exception as e:
57
+ logger.error(f"Error saving temporary PDF: {e}")
58
+ raise