Spaces:

Soufianesejjari
/

ibuilder

Running

App Files Files Community

Soufianesejjari commited on Mar 15

Commit

71fdb6d

1 Parent(s): ca85905

Add utility and service packages, enhance profile model, and implement PDF handling

Browse files

Files changed (16) hide show

__pycache__/agentProfile.cpython-311.pyc +0 -0
__pycache__/api.cpython-311.pyc +0 -0
__pycache__/models.cpython-311.pyc +0 -0
agentProfile.py +326 -321
agents/__init__.py +12 -0
agents/grammar_corrector.py +57 -0
agents/profile_extractor.py +346 -0
api.py +30 -10
app.py +167 -162
config.py +43 -0
models.py +1 -0
services/__init__.py +6 -0
services/storage_service.py +97 -0
temp.pdf +0 -0
utils/__init__.py +6 -0
utils/pdf_utils.py +58 -0

__pycache__/agentProfile.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/agentProfile.cpython-311.pyc and b/__pycache__/agentProfile.cpython-311.pyc differ

__pycache__/api.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/api.cpython-311.pyc and b/__pycache__/api.cpython-311.pyc differ

__pycache__/models.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/models.cpython-311.pyc and b/__pycache__/models.cpython-311.pyc differ

agentProfile.py CHANGED Viewed

@@ -1,199 +1,198 @@
 import groq
 from pdfextractor import extract_text_from_pdf
 from models import Profile, SocialMedia, Project, Skill, Education
-from typing import List, Dict, Any
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts import PromptTemplate
 from langchain_groq import ChatGroq
 import json
-import os
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-def extract_profile_information(pdf_text: str) -> Profile:
-    """
-    Extracts profile information from the PDF text using Groq's LLM with LangChain.
-    Args:
-        pdf_text (str): The text extracted from the PDF.
-    Returns:
-        Profile: A Profile object populated with the extracted information.
     """
-    # Initialize the Groq client through LangChain
-    llm = ChatGroq(
-        groq_api_key=GROQ_API_KEY,
-        model_name="qwen-2.5-32b",
-        temperature=0.5,
-        max_tokens=2048
-    )
-    # Create a parser for Profile data structure
-    parser = PydanticOutputParser(pydantic_object=Profile)
-    # Define the format instructions for the LLM
-    format_instructions = """
-    Extract the following information from the resume:
-    1. Full name
-    2. Professional title
-    3. Email address
-    4. Bio (a 50-100 word professional summary)
-    5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
-    6. Social media links (LinkedIn, GitHub, Instagram)
-    7. Projects (with title, description, and tech stack)
-    8. Skills
-    9. Education history (with school, degree, field of study, start date and end date)
-    Return the information in the following JSON format:
-    {
-        "name": "Full Name",
-        "title": "Professional Title",
-        "email": "[email protected]",
-        "bio": "Professional biography...",
-        "tagline": "Catchy professional tagline",
-        "social": {
-            "linkedin": "LinkedIn URL or null",
-            "github": "GitHub URL or null",
-            "instagram": "Instagram URL or null"
-        },
-        "projects": [
-            {
-                "title": "Project Title",
-                "description": "Project Description",
-                "techStack": "Technologies used"
-            }
-        ],
-        "skills": [
-            {"name": "Skill 1"},
-            {"name": "Skill 2"}
-        ],
-        "educations": [
-            {
-                "school": "University Name",
-                "degree": "Degree Type (e.g., Bachelor's, Master's)",
-                "fieldOfStudy": "Major or Field",
-                "startDate": "Start Year",
-                "endDate": "End Year or Present"
-            }
-        ]
-    }
-    If any information is not available, use null for that field.
     """
-    # Create the prompt template
-    template = """
-    You are a professional resume parser. Extract structured information from the following resume:
-    {pdf_text}
-    {format_instructions}
-    """
-    prompt = PromptTemplate(
-        template=template,
-        input_variables=["pdf_text"],
-        partial_variables={"format_instructions": format_instructions}
-    )
-    # Get the structured information from the LLM
-    try:
-        chain = prompt | llm
         result = chain.invoke({"pdf_text": pdf_text})
         response_text = result.content
-        # Try to parse the JSON response
-        try:
-            # Extract JSON from the response text (in case the LLM adds extra text)
-            json_start = response_text.find('{')
-            json_end = response_text.rfind('}') + 1
-            if json_start >= 0 and json_end > json_start:
-                json_str = response_text[json_start:json_end]
-                profile_dict = json.loads(json_str)
-                # Create a Profile object from the dictionary
-                profile = Profile.model_validate(profile_dict)
-                # Check for missing information and try to extract it if necessary
-                profile = fill_missing_information(profile, pdf_text, llm)
-                return profile
-            else:
-                raise ValueError("No JSON found in the response")
-        except Exception as json_error:
-            print(f"Error parsing JSON response: {json_error}")
-            print(f"Raw response: {response_text}")
-            # Fallback to the old method
-            return extract_profile_information_fallback(pdf_text)
-    except Exception as e:
-        print(f"Error during LangChain extraction: {e}")
-        # Fallback to the old method
-        return extract_profile_information_fallback(pdf_text)
-def fill_missing_information(profile: Profile, pdf_text: str, llm) -> Profile:
-    """
-    Attempts to fill in any missing information in the profile by asking
-    specific questions to the LLM.
-    Args:
-        profile (Profile): The partially filled profile.
-        pdf_text (str): The text extracted from the PDF.
-        llm: The LLM instance.
-    Returns:
-        Profile: The updated profile object.
-    """
-    # Check and fill name if missing
-    if not profile.name or profile.name == "N/A":
-        try:
-            response = llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:1000])
-            name = response.content.strip()
-            if name and name != "N/A":
-                profile.name = name
-        except Exception as e:
-            print(f"Error extracting name: {e}")
-    # Check and fill title if missing
-    if not profile.title or profile.title == "N/A":
-        try:
-            response = llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:1000])
-            title = response.content.strip()
-            if title and title != "N/A":
-                profile.title = title
-        except Exception as e:
-            print(f"Error extracting title: {e}")
-    # Check and fill email if missing
-    if not profile.email or profile.email == "N/A":
-        try:
-            response = llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
-            email = response.content.strip()
-            if email and email != "N/A" and "@" in email:
-                profile.email = email
-        except Exception as e:
-            print(f"Error extracting email: {e}")
-    # Similar checks for other fields
-    if not profile.bio or profile.bio == "N/A":
-        try:
-            response = llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
-            bio = response.content.strip()
-            if bio and bio != "N/A":
-                profile.bio = bio
-        except Exception as e:
-            print(f"Error creating bio: {e}")
-    # Check for education
-    if not profile.educations:
-        try:
-            education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
-            response = llm.invoke(education_prompt + "\n\n" + pdf_text)
-            education_text = response.content.strip()
             try:
                 # Try to extract JSON from the response
                 json_start = education_text.find('[')
                 json_end = education_text.rfind(']') + 1
@@ -211,168 +210,174 @@ def fill_missing_information(profile: Profile, pdf_text: str, llm) -> Profile:
                             endDate=edu.get("endDate", "")
                         )
                         profile.educations.append(education)
-            except Exception as edu_error:
-                print(f"Error parsing education JSON: {edu_error}")
-        except Exception as e:
-            print(f"Error extracting education: {e}")
-    return profile
-def extract_profile_information_fallback(pdf_text: str) -> Profile:
-    """
-    Fallback method to extract profile information using the original approach.
-    This is used if the LangChain extraction fails.
-    """
-    client = groq.Groq(api_key=GROQ_API_KEY)
-    def get_llm_response(prompt: str) -> str:
-        """Helper function to get a response from the LLM."""
         try:
             chat_completion = client.chat.completions.create(
                 messages=[
                     {
                         "role": "user",
-                        "content": prompt
                     }
                 ],
-                model="qwen-2.5-32b",
-                temperature=0.7,
-                max_tokens=1024
             )
             return chat_completion.choices[0].message.content
         except Exception as e:
-            print(f"Error during LLM call: {e}")
-            return "" # Return empty string on failure
-    # Prompts for each field
-    name_prompt = f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}"
-    title_prompt = f"Extract the professional title from the following text.  If no title is present, respond with 'N/A'.  Only respond with the title: {pdf_text}"
-    email_prompt = f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}"
-    bio_prompt = f"Create a short professional biography (around 50-100 words) based on the following text.  Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}"
-    tagline_prompt = f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}"
-    linkedin_prompt = f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}"
-    github_prompt = f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}"
-    instagram_prompt = f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}"
-    # Extract information using the LLM
-    name = get_llm_response(name_prompt).strip()
-    title = get_llm_response(title_prompt).strip()
-    email = get_llm_response(email_prompt).strip()
-    bio = get_llm_response(bio_prompt).strip()
-    tagline = get_llm_response(tagline_prompt).strip()
-    linkedin = get_llm_response(linkedin_prompt).strip()
-    github = get_llm_response(github_prompt).strip()
-    instagram = get_llm_response(instagram_prompt).strip()
-    project_prompt = f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}"
-    skill_prompt = f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}"
-    project_info = get_llm_response(project_prompt).strip()
-    skills_info = get_llm_response(skill_prompt).strip()
-    # Education prompt
-    education_prompt = f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}"
-    education_info = get_llm_response(education_prompt).strip()
-    # Create SocialMedia object
-    social_media = SocialMedia(linkedin=linkedin if linkedin != 'N/A' else None,
-                               github=github if github != 'N/A' else None,
-                               instagram=instagram if instagram != 'N/A' else None)
-    # Process Projects
-    projects: List[Project] = []
-    if project_info != "N/A":
-        project_lines = project_info.split("\n")
-        for line in project_lines:
-            if ":" in line:
-                try:
-                    project_title, project_description_techstack = line.split(":", 1)
-                    project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
-                    # Create a Project instance
-                    project = Project(title=project_title.strip(), description=project_description.strip(), techStack=tech_stack.strip())
-                    projects.append(project)
-                except ValueError as e:
-                    print(f"Error parsing project: {line}. Error: {e}")
-    # Process Skills
-    skills: List[Skill] = []
-    if skills_info != "N/A":
-        skill_list = [skill.strip() for skill in skills_info.split(",")]
-        for skill_name in skill_list:
-            skills.append(Skill(name=skill_name))
-    # Process Education
-    educations: List[Education] = []
-    if education_info != "N/A":
-        education_lines = education_info.split("\n")
-        for line in education_lines:
-            if ":" in line:
-                try:
-                    parts = line.split(":")
-                    if len(parts) >= 5:
-                        school = parts[0].strip()
-                        degree = parts[1].strip()
-                        field = parts[2].strip()
-                        start_date = parts[3].strip()
-                        end_date = parts[4].strip()
-                        education = Education(
-                            school=school,
-                            degree=degree,
-                            fieldOfStudy=field,
-                            startDate=start_date,
-                            endDate=end_date
-                        )
-                        educations.append(education)
-                except Exception as e:
-                    print(f"Error parsing education: {line}. Error: {e}")
-    # Create and return the Profile object
-    profile = Profile(
-        name=name if name != 'N/A' else "N/A",
-        title=title if title != 'N/A' else "N/A",
-        email=email if email != 'N/A' else "N/A",
-        bio=bio if bio != 'N/A' else "N/A",
-        tagline=tagline if tagline != 'N/A' else None,
-        social = social_media if (social_media.github != None or social_media.instagram != None or social_media.linkedin != None ) else None,
-        chatbot = None,
-        profileImg = None,
-        heroImg = None,
-        projects = projects,
-        skills = skills,
-        educations = educations
-    )
-    return profile
 def correct_grammar(text: str) -> str:
-    """
-    Corrects grammar in user input using Groq's LLM.
-    Args:
-        text (str): The text to correct.
-    Returns:
-        str: The corrected text.
-    """
-    client = groq.Groq(api_key=GROQ_API_KEY)
-    try:
-        chat_completion = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
-                }
-            ],
-            model="qwen-2.5-32b",
-            temperature=0.3,
-            max_tokens=1024
-        )
-        return chat_completion.choices[0].message.content
-    except Exception as e:
-        print(f"Error during grammar correction: {e}")
-        return text  # Return original text if correction fails

 import groq
 from pdfextractor import extract_text_from_pdf
 from models import Profile, SocialMedia, Project, Skill, Education
+from typing import List, Dict, Any, Optional
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts import PromptTemplate
 from langchain_groq import ChatGroq
 import json
+from config import get_settings
+settings = get_settings()
+class ProfileExtractor:
     """
+    Class for extracting profile information from resume text
     """
+    def __init__(self):
+        self.groq_api_key = settings.GROQ_API_KEY
+        self.model_name = settings.MODEL_NAME
+        self.temperature = settings.TEMPERATURE
+        self.max_tokens = settings.MAX_TOKENS
+        self.llm = self._initialize_llm()
+    def _initialize_llm(self) -> ChatGroq:
+        """Initialize the language model client"""
+        return ChatGroq(
+            groq_api_key=self.groq_api_key,
+            model_name=self.model_name,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens
+        )
+    def extract_profile(self, pdf_text: str) -> Profile:
+        """
+        Main method to extract profile information from PDF text
+        Args:
+            pdf_text: Text extracted from a resume PDF
+        Returns:
+            Profile object with extracted information
+        """
+        try:
+            profile = self._extract_with_langchain(pdf_text)
+            return profile
+        except Exception as e:
+            if settings.DEBUG:
+                print(f"LangChain extraction failed: {e}")
+            return self._extract_with_fallback(pdf_text)
+    def _extract_with_langchain(self, pdf_text: str) -> Profile:
+        """Extract profile with structured LangChain approach"""
+        # Define the format instructions for the LLM
+        format_instructions = """
+        Extract the following information from the resume:
+        1. Full name
+        2. Professional title
+        3. Email address
+        4. Bio (a 50-100 word professional summary)
+        5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
+        6. Social media links (LinkedIn, GitHub, Instagram)
+        7. Projects (with title, description, and tech stack)
+        8. Skills
+        9. Education history (with school, degree, field of study, start date and end date)
+        Return the information in the following JSON format:
+        {
+            "name": "Full Name",
+            "title": "Professional Title",
+            "email": "[email protected]",
+            "bio": "Professional biography...",
+            "tagline": "Catchy professional tagline",
+            "social": {
+                "linkedin": "LinkedIn URL or null",
+                "github": "GitHub URL or null",
+                "instagram": "Instagram URL or null"
+            },
+            "projects": [
+                {
+                    "title": "Project Title",
+                    "description": "Project Description",
+                    "techStack": "Technologies used"
+                }
+            ],
+            "skills": [
+                {"name": "Skill 1"},
+                {"name": "Skill 2"}
+            ],
+            "educations": [
+                {
+                    "school": "University Name",
+                    "degree": "Degree Type (e.g., Bachelor's, Master's)",
+                    "fieldOfStudy": "Major or Field",
+                    "startDate": "Start Year",
+                    "endDate": "End Year or Present"
+                }
+            ]
+        }
+        If any information is not available, use null for that field.
+        """
+        # Create the prompt template
+        template = """
+        You are a professional resume parser. Extract structured information from the following resume:
+        {pdf_text}
+        {format_instructions}
+        """
+        prompt = PromptTemplate(
+            template=template,
+            input_variables=["pdf_text"],
+            partial_variables={"format_instructions": format_instructions}
+        )
+        # Get the structured information from the LLM
+        chain = prompt | self.llm
         result = chain.invoke({"pdf_text": pdf_text})
         response_text = result.content
+        # Extract JSON from the response text (in case the LLM adds extra text)
+        json_start = response_text.find('{')
+        json_end = response_text.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            json_str = response_text[json_start:json_end]
+            profile_dict = json.loads(json_str)
+            # Create a Profile object from the dictionary
+            profile = Profile.model_validate(profile_dict)
+            # Check for missing information and try to extract it if necessary
+            profile = self._fill_missing_information(profile, pdf_text)
+            return profile
+        else:
+            raise ValueError("No JSON found in the response")
+    def _fill_missing_information(self, profile: Profile, pdf_text: str) -> Profile:
+        """
+        Attempts to fill in any missing information in the profile
+        """
+        # Check and fill name if missing
+        if not profile.name or profile.name == "N/A":
+            try:
+                response = self.llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:settings.CHUNK_SIZE])
+                name = response.content.strip()
+                if name and name != "N/A":
+                    profile.name = name
+            except Exception as e:
+                if settings.DEBUG:
+                    print(f"Error extracting name: {e}")
+        # Check and fill title if missing
+        if not profile.title or profile.title == "N/A":
             try:
+                response = self.llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:settings.CHUNK_SIZE])
+                title = response.content.strip()
+                if title and title != "N/A":
+                    profile.title = title
+            except Exception as e:
+                if settings.DEBUG:
+                    print(f"Error extracting title: {e}")
+        # Check and fill email if missing
+        if not profile.email or profile.email == "N/A":
+            try:
+                response = self.llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
+                email = response.content.strip()
+                if email and email != "N/A" and "@" in email:
+                    profile.email = email
+            except Exception as e:
+                if settings.DEBUG:
+                    print(f"Error extracting email: {e}")
+        # Check and fill bio if missing
+        if not profile.bio or profile.bio == "N/A":
+            try:
+                response = self.llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
+                bio = response.content.strip()
+                if bio and bio != "N/A":
+                    profile.bio = bio
+            except Exception as e:
+                if settings.DEBUG:
+                    print(f"Error creating bio: {e}")
+        # Check for education if missing
+        if not profile.educations:
+            try:
+                education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
+                response = self.llm.invoke(education_prompt + "\n\n" + pdf_text)
+                education_text = response.content.strip()
                 # Try to extract JSON from the response
                 json_start = education_text.find('[')
                 json_end = education_text.rfind(']') + 1
                             endDate=edu.get("endDate", "")
                         )
                         profile.educations.append(education)
+            except Exception as e:
+                if settings.DEBUG:
+                    print(f"Error extracting education: {e}")
+        return profile
+    def _extract_with_fallback(self, pdf_text: str) -> Profile:
+        """Fallback method for profile extraction using direct API calls"""
+        client = groq.Groq(api_key=self.groq_api_key)
+        def get_llm_response(prompt: str) -> str:
+            """Helper function to get a response from the LLM."""
+            try:
+                chat_completion = client.chat.completions.create(
+                    messages=[{"role": "user", "content": prompt}],
+                    model=self.model_name,
+                    temperature=settings.FALLBACK_TEMPERATURE,
+                    max_tokens=settings.MAX_TOKENS
+                )
+                return chat_completion.choices[0].message.content
+            except Exception as e:
+                if settings.DEBUG:
+                    print(f"Error during LLM call: {e}")
+                return ""  # Return empty string on failure
+        # Extract basic information
+        name = get_llm_response(f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}").strip()
+        title = get_llm_response(f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}").strip()
+        email = get_llm_response(f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}").strip()
+        bio = get_llm_response(f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}").strip()
+        tagline = get_llm_response(f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}").strip()
+        # Extract social media
+        linkedin = get_llm_response(f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}").strip()
+        github = get_llm_response(f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}").strip()
+        instagram = get_llm_response(f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}").strip()
+        # Extract projects and skills
+        project_info = get_llm_response(f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}").strip()
+        skills_info = get_llm_response(f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}").strip()
+        # Extract education
+        education_info = get_llm_response(f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}").strip()
+        # Process the extracted information
+        social_media = SocialMedia(
+            linkedin=linkedin if linkedin != 'N/A' else None,
+            github=github if github != 'N/A' else None,
+            instagram=instagram if instagram != 'N/A' else None
+        )
+        # Process projects
+        projects = []
+        if project_info != "N/A":
+            project_lines = project_info.split("\n")
+            for line in project_lines:
+                if ":" in line:
+                    try:
+                        project_title, project_description_techstack = line.split(":", 1)
+                        project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
+                        projects.append(Project(
+                            title=project_title.strip(),
+                            description=project_description.strip(),
+                            techStack=tech_stack.strip()
+                        ))
+                    except ValueError as e:
+                        if settings.DEBUG:
+                            print(f"Error parsing project: {line}. Error: {e}")
+        # Process skills
+        skills = []
+        if skills_info != "N/A":
+            skill_list = [skill.strip() for skill in skills_info.split(",")]
+            for skill_name in skill_list:
+                if skill_name:
+                    skills.append(Skill(name=skill_name))
+        # Process education
+        educations = []
+        if education_info != "N/A":
+            education_lines = education_info.split("\n")
+            for line in education_lines:
+                if ":" in line:
+                    try:
+                        parts = line.split(":")
+                        if len(parts) >= 5:
+                            educations.append(Education(
+                                school=parts[0].strip(),
+                                degree=parts[1].strip(),
+                                fieldOfStudy=parts[2].strip(),
+                                startDate=parts[3].strip(),
+                                endDate=parts[4].strip()
+                            ))
+                    except Exception as e:
+                        if settings.DEBUG:
+                            print(f"Error parsing education: {line}. Error: {e}")
+        # Create the profile object
+        profile = Profile(
+            name=name if name != 'N/A' else "N/A",
+            title=title if title != 'N/A' else "N/A",
+            email=email if email != 'N/A' else "N/A",
+            bio=bio if bio != 'N/A' else "N/A",
+            tagline=tagline if tagline != 'N/A' else None,
+            social=social_media if (social_media.github or social_media.instagram or social_media.linkedin) else None,
+            chatbot=None,
+            profileImg=None,
+            heroImg=None,
+            projects=projects,
+            skills=skills,
+            educations=educations
+        )
+        return profile
+class GrammarCorrector:
+    """Class for correcting grammar in text using LLM"""
+    def __init__(self):
+        self.groq_api_key = settings.GROQ_API_KEY
+        self.model_name = settings.MODEL_NAME
+        self.temperature = settings.GRAMMAR_CORRECTION_TEMPERATURE
+    def correct_grammar(self, text: str) -> str:
+        """
+        Corrects grammar in user input using Groq's LLM.
+        Args:
+            text: The text to correct
+        Returns:
+            The corrected text
+        """
+        if not text:
+            return text
+        client = groq.Groq(api_key=self.groq_api_key)
         try:
             chat_completion = client.chat.completions.create(
                 messages=[
                     {
                         "role": "user",
+                        "content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
                     }
                 ],
+                model=self.model_name,
+                temperature=self.temperature,
+                max_tokens=settings.MAX_TOKENS
             )
             return chat_completion.choices[0].message.content
         except Exception as e:
+            if settings.DEBUG:
+                print(f"Error during grammar correction: {e}")
+            return text  # Return original text if correction fails
+# Create module-level instances for easier imports
+profile_extractor = ProfileExtractor()
+grammar_corrector = GrammarCorrector()
+# Export functions for backward compatibility
+def extract_profile_information(pdf_text: str) -> Profile:
+    """Legacy function for backward compatibility"""
+    return profile_extractor.extract_profile(pdf_text)
 def correct_grammar(text: str) -> str:
+    """Legacy function for backward compatibility"""
+    return grammar_corrector.correct_grammar(text)

agents/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""
+Agents package for specialized AI tasks
+"""
+from .profile_extractor import ProfileExtractor, extract_profile_information
+from .grammar_corrector import GrammarCorrector, correct_grammar
+__all__ = [
+    'ProfileExtractor',
+    'GrammarCorrector',
+    'extract_profile_information',
+    'correct_grammar'
+]

agents/grammar_corrector.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""
+Agent for correcting grammar in text
+"""
+import groq
+from config import get_settings
+settings = get_settings()
+class GrammarCorrector:
+    """Class for correcting grammar in text using LLM"""
+    def __init__(self):
+        self.groq_api_key = settings.GROQ_API_KEY
+        self.model_name = settings.MODEL_NAME
+        self.temperature = settings.GRAMMAR_CORRECTION_TEMPERATURE
+    def correct_grammar(self, text: str) -> str:
+        """
+        Corrects grammar in user input using Groq's LLM.
+        Args:
+            text: The text to correct
+        Returns:
+            The corrected text
+        """
+        if not text:
+            return text
+        client = groq.Groq(api_key=self.groq_api_key)
+        try:
+            chat_completion = client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"Correct any grammar, spelling, or punctuation errors in the following text, but keep the meaning exactly the same: '{text}'"
+                    }
+                ],
+                model=self.model_name,
+                temperature=self.temperature,
+                max_tokens=settings.MAX_TOKENS
+            )
+            return chat_completion.choices[0].message.content
+        except Exception as e:
+            if settings.DEBUG:
+                print(f"Error during grammar correction: {e}")
+            return text  # Return original text if correction fails
+# Create module-level instance for easier imports
+grammar_corrector = GrammarCorrector()
+# Export function for backward compatibility
+def correct_grammar(text: str) -> str:
+    """Legacy function for backward compatibility"""
+    return grammar_corrector.correct_grammar(text)

agents/profile_extractor.py ADDED Viewed

	@@ -0,0 +1,346 @@

+"""
+Agent for extracting profile information from resumes
+"""
+import groq
+from models import Profile, SocialMedia, Project, Skill, Education
+from typing import List, Dict, Any, Optional
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts import PromptTemplate
+from langchain_groq import ChatGroq
+import json
+from config import get_settings
+import logging
+settings = get_settings()
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG if settings.DEBUG else logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+class ProfileExtractor:
+    """
+    Class for extracting profile information from resume text
+    """
+    def __init__(self):
+        logger.debug("Initializing ProfileExtractor")
+        self.groq_api_key = settings.GROQ_API_KEY
+        self.model_name = settings.MODEL_NAME
+        self.temperature = settings.TEMPERATURE
+        self.max_tokens = settings.MAX_TOKENS
+        self.llm = self._initialize_llm()
+    def _initialize_llm(self) -> ChatGroq:
+        """Initialize the language model client"""
+        logger.debug("Initializing language model client")
+        return ChatGroq(
+            groq_api_key=self.groq_api_key,
+            model_name=self.model_name,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens
+        )
+    def extract_profile(self, pdf_text: str) -> Profile:
+        """
+        Main method to extract profile information from PDF text
+        Args:
+            pdf_text: Text extracted from a resume PDF
+        Returns:
+            Profile object with extracted information
+        """
+        logger.info("Extracting profile information")
+        try:
+            profile = self._extract_with_langchain(pdf_text)
+            logger.info("Profile extracted successfully with LangChain")
+            return profile
+        except Exception as e:
+            logger.error(f"LangChain extraction failed: {e}")
+            if settings.DEBUG:
+                print(f"LangChain extraction failed: {e}")
+            return self._extract_with_fallback(pdf_text)
+    def _extract_with_langchain(self, pdf_text: str) -> Profile:
+        """Extract profile with structured LangChain approach"""
+        logger.debug("Extracting profile with LangChain")
+        format_instructions = """
+        Extract the following information from the resume:
+        1. Full name
+        2. Professional title
+        3. Email address
+        4. Bio (a 50-100 word professional summary)
+        5. Tagline (a short 5-10 word catchy phrase summarizing professional identity)
+        6. Social media links (LinkedIn, GitHub, Instagram)
+        7. Projects (with title, description, and tech stack)
+        8. Skills
+        9. Education history (with school, degree, field of study, start date and end date)
+        Return the information in the following JSON format:
+        {
+            "name": "Full Name",
+            "title": "Professional Title",
+            "email": "[email protected]",
+            "bio": "Professional biography...",
+            "tagline": "Catchy professional tagline",
+            "social": {
+                "linkedin": "LinkedIn URL or null",
+                "github": "GitHub URL or null",
+                "instagram": "Instagram URL or null"
+            },
+            "projects": [
+                {
+                    "title": "Project Title",
+                    "description": "Project Description",
+                    "techStack": "Technologies used"
+                }
+            ],
+            "skills": [
+                {"name": "Skill 1"},
+                {"name": "Skill 2"}
+            ],
+            "educations": [
+                {
+                    "school": "University Name",
+                    "degree": "Degree Type (e.g., Bachelor's, Master's)",
+                    "fieldOfStudy": "Major or Field",
+                    "startDate": "Start Year",
+                    "endDate": "End Year or Present"
+                }
+            ]
+        }
+        If any information is not available, use null for that field.
+        """
+        template = """
+        You are a professional resume parser. Extract structured information from the following resume:
+        {pdf_text}
+        {format_instructions}
+        """
+        prompt = PromptTemplate(
+            template=template,
+            input_variables=["pdf_text"],
+            partial_variables={"format_instructions": format_instructions}
+        )
+        chain = prompt | self.llm
+        result = chain.invoke({"pdf_text": pdf_text})
+        response_text = result.content
+        json_start = response_text.find('{')
+        json_end = response_text.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            json_str = response_text[json_start:json_end]
+            profile_dict = json.loads(json_str)
+            profile = Profile.model_validate(profile_dict)
+            profile = self._fill_missing_information(profile, pdf_text)
+            logger.debug("Profile extracted and validated")
+            return profile
+        else:
+            logger.error("No JSON found in the response")
+            raise ValueError("No JSON found in the response")
+    def _fill_missing_information(self, profile: Profile, pdf_text: str) -> Profile:
+        """
+        Attempts to fill in any missing information in the profile
+        """
+        logger.debug("Filling missing information in the profile")
+        if not profile.name or profile.name == "N/A":
+            try:
+                response = self.llm.invoke("Extract only the full name from this resume text. Respond with just the name: " + pdf_text[:settings.CHUNK_SIZE])
+                name = response.content.strip()
+                if name and name != "N/A":
+                    profile.name = name
+                    logger.debug(f"Extracted name: {name}")
+            except Exception as e:
+                logger.error(f"Error extracting name: {e}")
+        if not profile.title or profile.title == "N/A":
+            try:
+                response = self.llm.invoke("Extract only the professional title from this resume text. Respond with just the title: " + pdf_text[:settings.CHUNK_SIZE])
+                title = response.content.strip()
+                if title and title != "N/A":
+                    profile.title = title
+                    logger.debug(f"Extracted title: {title}")
+            except Exception as e:
+                logger.error(f"Error extracting title: {e}")
+        if not profile.email or profile.email == "N/A":
+            try:
+                response = self.llm.invoke("Extract only the email address from this resume text. Respond with just the email: " + pdf_text)
+                email = response.content.strip()
+                if email and email != "N/A" and "@" in email:
+                    profile.email = email
+                    logger.debug(f"Extracted email: {email}")
+            except Exception as e:
+                logger.error(f"Error extracting email: {e}")
+        if not profile.bio or profile.bio == "N/A":
+            try:
+                response = self.llm.invoke("Create a short professional biography (around 50-100 words) based on this resume. Focus on skills and experience: " + pdf_text)
+                bio = response.content.strip()
+                if bio and bio != "N/A":
+                    profile.bio = bio
+                    logger.debug(f"Created bio: {bio}")
+            except Exception as e:
+                logger.error(f"Error creating bio: {e}")
+        if not profile.educations:
+            try:
+                education_prompt = "Extract education history from this resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format the response as a list of JSON objects."
+                response = self.llm.invoke(education_prompt + "\n\n" + pdf_text)
+                education_text = response.content.strip()
+                json_start = education_text.find('[')
+                json_end = education_text.rfind(']') + 1
+                if json_start >= 0 and json_end > json_start:
+                    edu_json = education_text[json_start:json_end]
+                    educations = json.loads(edu_json)
+                    for edu in educations:
+                        education = Education(
+                            school=edu.get("school", "Unknown"),
+                            degree=edu.get("degree", ""),
+                            fieldOfStudy=edu.get("fieldOfStudy", ""),
+                            startDate=edu.get("startDate", ""),
+                            endDate=edu.get("endDate", "")
+                        )
+                        profile.educations.append(education)
+                        logger.debug(f"Added education: {education}")
+            except Exception as e:
+                logger.error(f"Error extracting education: {e}")
+        if profile.skills:
+            try:
+                response = self.llm.invoke("Extract a top 8 of skills from this resume text, separated by commas. Respond with just the skills: " + ", ".join([skill.name for skill in profile.skills]))
+                skills = response.content.split(",")
+                for skill in skills:
+                    if skill:
+                        profile.topSkills.append(Skill(name=skill.strip()))
+                        logger.debug(f"Added skill: {skill}")
+            except Exception as e:
+                logger.error(f"Error extracting skills: {e}")
+        return profile
+    def _extract_with_fallback(self, pdf_text: str) -> Profile:
+        """Fallback method for profile extraction using direct API calls"""
+        logger.debug("Extracting profile with fallback method")
+        client = groq.Groq(api_key=self.groq_api_key)
+        def get_llm_response(prompt: str) -> str:
+            """Helper function to get a response from the LLM."""
+            try:
+                chat_completion = client.chat.completions.create(
+                    messages=[{"role": "user", "content": prompt}],
+                    model=self.model_name,
+                    temperature=settings.FALLBACK_TEMPERATURE,
+                    max_tokens=settings.MAX_TOKENS
+                )
+                return chat_completion.choices[0].message.content
+            except Exception as e:
+                logger.error(f"Error during LLM call: {e}")
+                return ""  # Return empty string on failure
+        name = get_llm_response(f"Extract the full name from the following text. If no name is present, respond with 'N/A'. Only respond with the name: {pdf_text}").strip()
+        title = get_llm_response(f"Extract the professional title from the following text. If no title is present, respond with 'N/A'. Only respond with the title: {pdf_text}").strip()
+        email = get_llm_response(f"Extract the email address from the following text. If no email is present, respond with 'N/A'. Only respond with the email: {pdf_text}").strip()
+        bio = get_llm_response(f"Create a short professional biography (around 50-100 words) based on the following text. Focus on skills and experience. If no bio is possible, respond with 'N/A'. Provide only the biography itself: {pdf_text}").strip()
+        tagline = get_llm_response(f"Create a short and catchy tagline (around 5-10 words) that summarizes the person's professional identity from the following text. If no tagline is possible, respond with 'N/A'. Provide only the tagline: {pdf_text}").strip()
+        linkedin = get_llm_response(f"Extract the LinkedIn profile URL from the following text. If no LinkedIn URL is present, respond with 'N/A'. Only respond with the LinkedIn URL: {pdf_text}").strip()
+        github = get_llm_response(f"Extract the GitHub profile URL from the following text. If no GitHub URL is present, respond with 'N/A'. Only respond with the GitHub URL: {pdf_text}").strip()
+        instagram = get_llm_response(f"Extract the Instagram profile URL from the following text. If no Instagram URL is present, respond with 'N/A'. Only respond with the Instagram URL: {pdf_text}").strip()
+        project_info = get_llm_response(f"Extract information about projects from the following text in this format Project Title: Project Description: Tech Stack:. If no projects are present, respond with 'N/A': {pdf_text}").strip()
+        skills_info = get_llm_response(f"Extract a list of skills from the following text, separated by commas. If no skills are present, respond with 'N/A'. Only respond with the skills: {pdf_text}").strip()
+        education_info = get_llm_response(f"Extract education history from the following resume. For each education entry, provide the school name, degree type, field of study, start date, and end date. Format as 'School: Degree: Field: StartDate: EndDate' with each education on a new line. If no education is found, respond with 'N/A': {pdf_text}").strip()
+        social_media = SocialMedia(
+            linkedin=linkedin if linkedin != 'N/A' else None,
+            github=github if github != 'N/A' else None,
+            instagram=instagram if instagram != 'N/A' else None
+        )
+        projects = []
+        if project_info != "N/A":
+            project_lines = project_info.split("\n")
+            for line in project_lines:
+                if ":" in line:
+                    try:
+                        project_title, project_description_techstack = line.split(":", 1)
+                        project_description, tech_stack = project_description_techstack.split("Tech Stack:", 1)
+                        projects.append(Project(
+                            title=project_title.strip(),
+                            description=project_description.strip(),
+                            techStack=tech_stack.strip()
+                        ))
+                        logger.debug(f"Added project: {project_title.strip()}")
+                    except ValueError as e:
+                        logger.error(f"Error parsing project: {line}. Error: {e}")
+        skills = []
+        if skills_info != "N/A":
+            skill_list = [skill.strip() for skill in skills_info.split(",")]
+            for skill_name in skill_list:
+                if skill_name:
+                    skills.append(Skill(name=skill_name))
+                    logger.debug(f"Added skill: {skill_name}")
+        educations = []
+        if education_info != "N/A":
+            education_lines = education_info.split("\n")
+            for line in education_lines:
+                if ":" in line:
+                    try:
+                        parts = line.split(":")
+                        if len(parts) >= 5:
+                            educations.append(Education(
+                                school=parts[0].strip(),
+                                degree=parts[1].strip(),
+                                fieldOfStudy=parts[2].strip(),
+                                startDate=parts[3].strip(),
+                                endDate=parts[4].strip()
+                            ))
+                            logger.debug(f"Added education: {parts[0].strip()}")
+                    except Exception as e:
+                        logger.error(f"Error parsing education: {line}. Error: {e}")
+        profile = Profile(
+            name=name if name != 'N/A' else "N/A",
+            title=title if title != 'N/A' else "N/A",
+            email=email if email != 'N/A' else "N/A",
+            bio=bio if bio != 'N/A' else "N/A",
+            tagline=tagline if tagline != 'N/A' else None,
+            social=social_media if (social_media.github or social_media.instagram or social_media.linkedin) else None,
+            chatbot=None,
+            profileImg=None,
+            heroImg=None,
+            projects=projects,
+            skills=skills,
+            educations=educations
+        )
+        logger.info("Profile extracted successfully with fallback method")
+        return profile
+# Create module-level instance for easier imports
+profile_extractor = ProfileExtractor()
+# Export function for backward compatibility
+def extract_profile_information(pdf_text: str) -> Profile:
+    """Legacy function for backward compatibility"""
+    return profile_extractor.extract_profile(pdf_text)
+# Export the class and the function
+__all__ = ['ProfileExtractor', 'extract_profile_information']

api.py CHANGED Viewed

@@ -1,9 +1,21 @@
-from fastapi import FastAPI, HTTPException
 from pymongo import MongoClient
 from bson.objectid import ObjectId
 from fastapi.middleware.cors import CORSMiddleware
 import json
 from bson import json_util
 app = FastAPI(title="Profile API", description="API to retrieve profile information")
@@ -18,17 +30,21 @@ app.add_middleware(
 # MongoDB connection configuration
 def get_db_connection():
     try:
-        client = MongoClient("mongodb://localhost:27017/", serverSelectionTimeoutMS=5000)
         # Test the connection
         client.server_info()
-        return client["profileDB"]
     except Exception as e:
-        print(f"Error connecting to MongoDB: {e}")
         raise HTTPException(status_code=500, detail=f"Database connection error: {str(e)}")
 @app.get("/api/profile/{profile_id}")
-async def get_profile(profile_id: str):
     """
     Retrieve a profile by its MongoDB ID
@@ -41,12 +57,13 @@ async def get_profile(profile_id: str):
     try:
         # Connect to MongoDB
         db = get_db_connection()
-        collection = db["profiles"]
         # Try to parse the profile_id as an ObjectId
         try:
             obj_id = ObjectId(profile_id)
-        except:
             raise HTTPException(status_code=400, detail=f"Invalid profile ID format: {profile_id}")
         # Find the profile by ID
@@ -54,10 +71,12 @@ async def get_profile(profile_id: str):
         # Check if profile exists
         if not profile:
             raise HTTPException(status_code=404, detail=f"Profile with ID {profile_id} not found")
         # Convert MongoDB document to JSON serializable format
         profile_json = json.loads(json_util.dumps(profile))
         return profile_json
@@ -66,11 +85,11 @@ async def get_profile(profile_id: str):
         raise
     except Exception as e:
         # Handle any other exceptions
         raise HTTPException(status_code=500, detail=f"Error retrieving profile: {str(e)}")
-# Add new endpoint for profile images
 @app.get("/api/profile/{profile_id}/image")
-async def get_profile_image(profile_id: str):
     """
     Retrieve just the profile image for a given profile ID
@@ -82,7 +101,7 @@ async def get_profile_image(profile_id: str):
     """
     try:
         db = get_db_connection()
-        collection = db["profiles"]
         try:
             obj_id = ObjectId(profile_id)
@@ -106,6 +125,7 @@ async def get_profile_image(profile_id: str):
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error retrieving profile image: {str(e)}")
 if __name__ == "__main__":

+from fastapi import FastAPI, HTTPException, Depends
 from pymongo import MongoClient
 from bson.objectid import ObjectId
 from fastapi.middleware.cors import CORSMiddleware
 import json
 from bson import json_util
+from config import get_settings
+from typing import Dict, Any
+import logging
+settings = get_settings()
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG if settings.DEBUG else logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
 app = FastAPI(title="Profile API", description="API to retrieve profile information")
 # MongoDB connection configuration
 def get_db_connection():
+    """Get MongoDB database connection"""
     try:
+        client = MongoClient(
+            settings.MONGODB_URI,
+            serverSelectionTimeoutMS=settings.MONGODB_TIMEOUT_MS
+        )
         # Test the connection
         client.server_info()
+        return client[settings.MONGODB_DB]
     except Exception as e:
+        logger.error(f"Error connecting to MongoDB: {e}")
         raise HTTPException(status_code=500, detail=f"Database connection error: {str(e)}")
 @app.get("/api/profile/{profile_id}")
+async def get_profile(profile_id: str) -> Dict[str, Any]:
     """
     Retrieve a profile by its MongoDB ID
     try:
         # Connect to MongoDB
         db = get_db_connection()
+        collection = db[settings.MONGODB_COLLECTION]
         # Try to parse the profile_id as an ObjectId
         try:
             obj_id = ObjectId(profile_id)
+        except Exception as id_error:
+            logger.error(f"Invalid profile ID: {profile_id}, error: {id_error}")
             raise HTTPException(status_code=400, detail=f"Invalid profile ID format: {profile_id}")
         # Find the profile by ID
         # Check if profile exists
         if not profile:
+            logger.warning(f"Profile not found: {profile_id}")
             raise HTTPException(status_code=404, detail=f"Profile with ID {profile_id} not found")
         # Convert MongoDB document to JSON serializable format
         profile_json = json.loads(json_util.dumps(profile))
+        logger.debug(f"Retrieved profile: {profile_id}")
         return profile_json
         raise
     except Exception as e:
         # Handle any other exceptions
+        logger.error(f"Error retrieving profile {profile_id}: {e}")
         raise HTTPException(status_code=500, detail=f"Error retrieving profile: {str(e)}")
 @app.get("/api/profile/{profile_id}/image")
+async def get_profile_image(profile_id: str) -> Dict[str, Any]:
     """
     Retrieve just the profile image for a given profile ID
     """
     try:
         db = get_db_connection()
+        collection = db[settings.MONGODB_COLLECTION]
         try:
             obj_id = ObjectId(profile_id)
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"Error retrieving profile image {profile_id}: {e}")
         raise HTTPException(status_code=500, detail=f"Error retrieving profile image: {str(e)}")
 if __name__ == "__main__":

app.py CHANGED Viewed

@@ -1,65 +1,32 @@
 import streamlit as st
-from io import BytesIO
-import os
 import json
 import traceback
 import base64
-from pymongo import MongoClient
-from agentProfile import extract_text_from_pdf, extract_profile_information, correct_grammar
 from models import Skill, Project, Education, SocialMedia
-def profile_to_dict(profile):
-    return {
-        "name": profile.name,
-        "title": profile.title,
-        "email": profile.email,
-        "bio": profile.bio,
-        "tagline": profile.tagline if profile.tagline else "",
-        "social": {
-            "linkedin": profile.social.linkedin if profile.social and profile.social.linkedin else "",
-            "github": profile.social.github if profile.social and profile.social.github else "",
-            "instagram": profile.social.instagram if profile.social and profile.social.instagram else ""
-        },
-        "profileImg": profile.profileImg if profile.profileImg else "",
-        "projects": [
-            {
-                "title": project.title,
-                "description": project.description,
-                "techStack": project.techStack if project.techStack else "",
-                "githubUrl": project.githubUrl if project.githubUrl else "",
-                "demoUrl": project.demoUrl if project.demoUrl else ""
-            } for project in profile.projects
-        ] if profile.projects else [],
-        "skills": [skill.name for skill in profile.skills] if profile.skills else [],
-        "educations": [
-            {
-                "school": edu.school,
-                "degree": edu.degree,
-                "fieldOfStudy": edu.fieldOfStudy,
-                "startDate": edu.startDate,
-                "endDate": edu.endDate
-            } for edu in profile.educations
-        ] if profile.educations else []
-    }
-def store_profile(profile_dict):
-    try:
-        client = MongoClient("mongodb://localhost:27017/", serverSelectionTimeoutMS=5000)
-        # Test the connection
-        client.server_info()
-        db = client["profileDB"]
-        collection = db["profiles"]
-        result = collection.insert_one(profile_dict)
-        return str(result.inserted_id)
-    except Exception as e:
-        st.error(f"Erreur de connexion à MongoDB: {str(e)}")
-        # Fallback: save to JSON file
-        file_path = f"profile_{profile_dict['name'].replace(' ', '_')}.json"
-        with open(file_path, 'w') as f:
-            json.dump(profile_dict, f, indent=2)
-        return f"Sauvegardé dans le fichier {file_path}"
 def collect_missing_data(profile):
     """
     Collects missing data from user input when automatic extraction fails.
@@ -92,7 +59,7 @@ def collect_missing_data(profile):
     if profile.bio and profile.bio != "N/A":
         if st.button("Improve Bio Grammar"):
-            profile.bio = correct_grammar(profile.bio)
             st.success("Grammar corrected!")
     # Optional information
@@ -135,7 +102,7 @@ def collect_missing_data(profile):
         num_new_edu = st.number_input("Number of additional education entries:", min_value=1, max_value=5, value=1)
         offset = len(profile.educations) if profile.educations else 0
-        for i in range(num_new_edu):
             st.write(f"Additional Education #{i+1}")
             school = st.text_input(f"School:", key=f"new_school_{offset+i}")
             degree = st.text_input(f"Degree:", key=f"new_degree_{offset+i}")
@@ -187,7 +154,7 @@ def collect_missing_data(profile):
         num_new_proj = st.number_input("Number of additional projects:", min_value=1, max_value=5, value=1)
         offset = len(profile.projects) if profile.projects else 0
-        for i in range(num_new_proj):
             st.write(f"Additional Project #{i+1}")
             title = st.text_input(f"Title:", key=f"new_proj_title_{offset+i}")
             description = st.text_area(f"Description:", key=f"new_proj_desc_{offset+i}")
@@ -196,8 +163,9 @@ def collect_missing_data(profile):
             demo_url = st.text_input(f"Demo URL (optional):", key=f"new_proj_demo_{offset+i}")
             if title and description:  # Only add if title and description are provided
-                if st.button(f"Correct Grammar for Project #{i+1}"):
-                    description = correct_grammar(description)
                     st.success("Grammar corrected!")
                 project_data.append({
@@ -233,10 +201,81 @@ def collect_missing_data(profile):
     return profile
 def main():
-    st.title("Profile Extractor from PDF")
-    # State management
     if 'profile' not in st.session_state:
         st.session_state.profile = None
     if 'extraction_complete' not in st.session_state:
@@ -246,135 +285,101 @@ def main():
     if 'profile_saved' not in st.session_state:
         st.session_state.profile_saved = False
-    # Step 1: Upload PDF
     if not st.session_state.extraction_complete:
-        uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
         if uploaded_file is not None:
-            # Read file as bytes and save to a temporary file
-            bytes_data = uploaded_file.read()
-            with open("temp.pdf", "wb") as f:
-                f.write(bytes_data)
-            pdf_text = extract_text_from_pdf("temp.pdf")
-            if pdf_text:
-                try:
-                    with st.spinner("Extracting information..."):
-                        profile = extract_profile_information(pdf_text)
                         st.session_state.profile = profile
                         st.session_state.extraction_complete = True
                         st.experimental_rerun()
-                except Exception as e:
-                    st.error(f"Erreur lors de l'extraction du profil: {str(e)}")
-                    if "403" in str(e):
-                        st.error("Erreur d'autorisation (403 Forbidden). Vérifiez les clés API et les autorisations.")
-                    with st.expander("Détails techniques"):
-                        st.code(traceback.format_exc())
-            else:
-                st.error("Could not extract text from the PDF.")
-            # Clean up temporary file
-            if os.path.exists("temp.pdf"):
-                os.remove("temp.pdf")
-    # Step 2: Collect missing data from user
     elif not st.session_state.user_input_complete:
-        profile = st.session_state.profile
-        profile = collect_missing_data(profile)
-        submit = st.button("Save Profile")
-        if submit:
-            st.session_state.profile = profile
-            st.session_state.user_input_complete = True
-            st.experimental_rerun()
-    # Step 3: Display and save final profile
     elif not st.session_state.profile_saved:
         profile = st.session_state.profile
         try:
-            # Convert profile to dictionary and store in MongoDB
-            profile_dict = profile_to_dict(profile)
-            inserted_id = store_profile(profile_dict)
-            st.success(f"Le profil a été enregistré avec succès avec l'ID : {inserted_id}")
-            # Show API access information
-            st.info(f"Access this profile via API: http://localhost:8000/api/profile/{inserted_id}")
-            st.session_state.profile_saved = True
-            st.header("Your Complete Profile")
-            # Display profile image if available
-            if profile.profileImg:
-                st.image(profile.profileImg, width=150)
-            # Display basic info in a table
-            basic_data = {
-                "Field": ["Name", "Title", "Email", "Bio", "Tagline"],
-                "Value": [
-                    profile.name,
-                    profile.title,
-                    profile.email,
-                    profile.bio,
-                    profile.tagline if profile.tagline else ""
-                ]
-            }
-            st.table(basic_data)
-            # Display social media if available
-            if profile.social:
-                social_data = {
-                    "Platform": ["LinkedIn", "GitHub", "Instagram"],
-                    "URL": [
-                        profile.social.linkedin if profile.social.linkedin else "",
-                        profile.social.github if profile.social.github else "",
-                        profile.social.instagram if profile.social.instagram else ""
-                    ]
-                }
-                st.subheader("Social Media")
-                st.table(social_data)
-            # Display education in a table if available
-            if profile.educations:
-                education_data = {
-                    "School": [edu.school for edu in profile.educations],
-                    "Degree": [edu.degree for edu in profile.educations],
-                    "Field of Study": [edu.fieldOfStudy for edu in profile.educations],
-                    "Start Date": [edu.startDate for edu in profile.educations],
-                    "End Date": [edu.endDate for edu in profile.educations]
-                }
-                st.subheader("Education")
-                st.table(education_data)
-            # Display projects in a table if available
-            if profile.projects:
-                projects_data = {
-                    "Title": [project.title for project in profile.projects],
-                    "Description": [project.description for project in profile.projects],
-                    "Tech Stack": [project.techStack if project.techStack else "" for project in profile.projects],
-                    "GitHub": [project.githubUrl if project.githubUrl else "" for project in profile.projects],
-                    "Demo": [project.demoUrl if project.demoUrl else "" for project in profile.projects]
-                }
-                st.subheader("Projects")
-                st.table(projects_data)
-            # Display skills as a comma separated list if available
-            if profile.skills:
-                st.subheader("Skills")
-                st.write(", ".join([skill.name for skill in profile.skills]))
         except Exception as e:
             st.error(f"Error saving profile: {str(e)}")
-            with st.expander("Technical details"):
                 st.code(traceback.format_exc())
-    # Reset button (available after profile is saved)
     else:
         if st.button("Extract Another Profile"):
             for key in ['profile', 'extraction_complete', 'user_input_complete', 'profile_saved']:
                 st.session_state[key] = False
             st.experimental_rerun()
 if __name__ == "__main__":
     main()

+"""
+Streamlit web application for resume profile extraction
+"""
 import streamlit as st
+import os
 import json
 import traceback
 import base64
+import logging
+from typing import Dict, Any
+# Import from our refactored modules
+from agents import profile_extractor as pe, grammar_corrector as gc
+from utils import extract_text_from_pdf, save_temp_pdf
+from services import storage_service
 from models import Skill, Project, Education, SocialMedia
+from config import get_settings
+# Get settings
+settings = get_settings()
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG if settings.DEBUG else logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+profile_extractor=pe.ProfileExtractor()
+grammar_corrector=gc.GrammarCorrector()
 def collect_missing_data(profile):
     """
     Collects missing data from user input when automatic extraction fails.
     if profile.bio and profile.bio != "N/A":
         if st.button("Improve Bio Grammar"):
+            profile.bio = grammar_corrector.correct_grammar(profile.bio)
             st.success("Grammar corrected!")
     # Optional information
         num_new_edu = st.number_input("Number of additional education entries:", min_value=1, max_value=5, value=1)
         offset = len(profile.educations) if profile.educations else 0
+        for i in range(int(num_new_edu)):
             st.write(f"Additional Education #{i+1}")
             school = st.text_input(f"School:", key=f"new_school_{offset+i}")
             degree = st.text_input(f"Degree:", key=f"new_degree_{offset+i}")
         num_new_proj = st.number_input("Number of additional projects:", min_value=1, max_value=5, value=1)
         offset = len(profile.projects) if profile.projects else 0
+        for i in range(int(num_new_proj)):
             st.write(f"Additional Project #{i+1}")
             title = st.text_input(f"Title:", key=f"new_proj_title_{offset+i}")
             description = st.text_area(f"Description:", key=f"new_proj_desc_{offset+i}")
             demo_url = st.text_input(f"Demo URL (optional):", key=f"new_proj_demo_{offset+i}")
             if title and description:  # Only add if title and description are provided
+                correct_grammar_btn = st.button(f"Correct Grammar for Project #{i+1}")
+                if correct_grammar_btn:
+                    description = grammar_corrector.correct_grammar(description)
                     st.success("Grammar corrected!")
                 project_data.append({
     return profile
+def display_profile(profile):
+    """
+    Displays a profile in the Streamlit UI
+    Args:
+        profile: The Profile object to display
+    """
+    st.header("Your Complete Profile")
+    # Display profile image if available
+    if profile.profileImg:
+        st.image(profile.profileImg, width=150)
+    # Display basic info in a table
+    basic_data = {
+        "Field": ["Name", "Title", "Email", "Bio", "Tagline"],
+        "Value": [
+            profile.name,
+            profile.title,
+            profile.email,
+            profile.bio,
+            profile.tagline if profile.tagline else ""
+        ]
+    }
+    st.table(basic_data)
+    # Display social media if available
+    if profile.social:
+        social_data = {
+            "Platform": ["LinkedIn", "GitHub", "Instagram"],
+            "URL": [
+                profile.social.linkedin if profile.social.linkedin else "",
+                profile.social.github if profile.social.github else "",
+                profile.social.instagram if profile.social.instagram else ""
+            ]
+        }
+        st.subheader("Social Media")
+        st.table(social_data)
+    # Display education in a table if available
+    if profile.educations:
+        education_data = {
+            "School": [edu.school for edu in profile.educations],
+            "Degree": [edu.degree for edu in profile.educations],
+            "Field of Study": [edu.fieldOfStudy for edu in profile.educations],
+            "Start Date": [edu.startDate for edu in profile.educations],
+            "End Date": [edu.endDate for edu in profile.educations]
+        }
+        st.subheader("Education")
+        st.table(education_data)
+    # Display projects in a table if available
+    if profile.projects:
+        projects_data = {
+            "Title": [project.title for project in profile.projects],
+            "Description": [project.description for project in profile.projects],
+            "Tech Stack": [project.techStack if project.techStack else "" for project in profile.projects],
+            "GitHub": [project.githubUrl if project.githubUrl else "" for project in profile.projects],
+            "Demo": [project.demoUrl if project.demoUrl else "" for project in profile.projects]
+        }
+        st.subheader("Projects")
+        st.table(projects_data)
+    # Display skills as a comma separated list if available
+    if profile.skills:
+        st.subheader("Skills")
+        st.write(", ".join([skill.name for skill in profile.skills]))
 def main():
+    """Main application function"""
+    st.set_page_config(page_title="Resume Profile Extractor", page_icon="📄", layout="wide")
+    st.title("Professional Profile Extractor")
+    st.write("Upload a resume PDF to extract professional profile information")
+    # Initialize session state variables
     if 'profile' not in st.session_state:
         st.session_state.profile = None
     if 'extraction_complete' not in st.session_state:
     if 'profile_saved' not in st.session_state:
         st.session_state.profile_saved = False
+    # Step 1: Upload PDF and Extract Profile
     if not st.session_state.extraction_complete:
+        uploaded_file = st.file_uploader("Upload a PDF resume", type="pdf")
         if uploaded_file is not None:
+            try:
+                # Save the uploaded file to a temporary location
+                pdf_path = save_temp_pdf(uploaded_file.getvalue())
+                # Extract text from the PDF
+                pdf_text = extract_text_from_pdf(pdf_path)
+                if not pdf_text:
+                    st.error("Could not extract text from the PDF. The file might be scanned or protected.")
+                else:
+                    with st.spinner("Extracting profile information..."):
+                        # Extract profile information using the profile extractor agent
+                        profile = profile_extractor.extract_profile(pdf_text)
                         st.session_state.profile = profile
                         st.session_state.extraction_complete = True
                         st.experimental_rerun()
+                # Clean up temporary file
+                if os.path.exists(pdf_path):
+                    os.remove(pdf_path)
+            except Exception as e:
+                logger.error(f"Error during profile extraction: {e}")
+                st.error(f"An error occurred during profile extraction: {str(e)}")
+                if "403" in str(e):
+                    st.error("Authorization error (403 Forbidden). Please check your API key and permissions.")
+                with st.expander("Technical Details"):
+                    st.code(traceback.format_exc())
+    # Step 2: Allow User to Edit/Complete the Profile
     elif not st.session_state.user_input_complete:
+        st.info("We've extracted information from your resume. Please review and complete any missing details.")
+        # Call the function to collect and complete missing data
+        profile = collect_missing_data(st.session_state.profile)
+        # Add buttons for submitting or starting over
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("Save Profile"):
+                st.session_state.profile = profile
+                st.session_state.user_input_complete = True
+                st.experimental_rerun()
+        with col2:
+            if st.button("Start Over"):
+                st.session_state.profile = None
+                st.session_state.extraction_complete = False
+                st.experimental_rerun()
+    # Step 3: Save Profile and Display Results
     elif not st.session_state.profile_saved:
         profile = st.session_state.profile
         try:
+            # Store the profile using the storage service
+            inserted_id = storage_service.store_profile(
+                profile,
+                error_handler=st.error
+            )
+            st.success(f"Profile saved successfully with ID: {inserted_id}")
+            # Display the Portfolio URL
+            st.info(f"Access to your portfolio: [Portfolio URL](http://localhost:3000/{inserted_id})")
+            # Mark as saved in session state
+            st.session_state.profile_saved = True
+            # Display the complete profile
+            display_profile(profile)
         except Exception as e:
+            logger.error(f"Error saving profile: {e}")
             st.error(f"Error saving profile: {str(e)}")
+            with st.expander("Technical Details"):
                 st.code(traceback.format_exc())
+    # Final state - allow extracting another profile
     else:
+        st.success("Profile extraction complete!")
+        # Show options to extract another profile or view the current one
         if st.button("Extract Another Profile"):
+            # Reset session state
             for key in ['profile', 'extraction_complete', 'user_input_complete', 'profile_saved']:
                 st.session_state[key] = False
             st.experimental_rerun()
+        else:
+            # Show the profile again
+            display_profile(st.session_state.profile)
 if __name__ == "__main__":
     main()

config.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from pydantic_settings import BaseSettings
+from typing import Optional, Dict, Any
+import os
+from functools import lru_cache
+class Settings(BaseSettings):
+    """
+    Application settings loaded from environment variables or .env file
+    """
+    # API keys
+    GROQ_API_KEY: str
+    # LLM settings
+    MODEL_NAME: str = "qwen-2.5-32b"
+    MAX_TOKENS: int = 2048
+    TEMPERATURE: float = 0.5
+    FALLBACK_TEMPERATURE: float = 0.7
+    GRAMMAR_CORRECTION_TEMPERATURE: float = 0.3
+    # MongoDB settings
+    MONGODB_URI: str = "mongodb://localhost:27017/"
+    MONGODB_DB: str = "profileDB"
+    MONGODB_COLLECTION: str = "profiles"
+    MONGODB_TIMEOUT_MS: int = 5000
+    # Application settings
+    CACHE_SIZE: int = 100
+    CHUNK_SIZE: int = 1000
+    DEBUG: bool = False
+    # File settings
+    TEMP_FILE_DIR: str = "./"
+    class Config:
+        env_file = ".env"
+        case_sensitive = True
+@lru_cache()
+def get_settings() -> Settings:
+    """
+    Get cached settings instance
+    """
+    return Settings()

models.py CHANGED Viewed

@@ -41,4 +41,5 @@ class Profile(BaseModel):
     chatbot: Optional[Chatbot] = None
     projects: List[Project] = []
     skills: List[Skill] = []
     educations: List[Education] = []

     chatbot: Optional[Chatbot] = None
     projects: List[Project] = []
     skills: List[Skill] = []
+    topSkills: List[str] = []
     educations: List[Education] = []

services/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Services package for backend operations
+"""
+from .storage_service import StorageService, storage_service
+__all__ = ['StorageService', 'storage_service']

services/storage_service.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+Service for storing and retrieving profile data
+"""
+from pymongo import MongoClient
+from models import Profile
+from config import get_settings
+import json
+import logging
+from typing import Dict, Any, Optional
+settings = get_settings()
+logger = logging.getLogger(__name__)
+class StorageService:
+    """Service for storing and retrieving profile data"""
+    def __init__(self):
+        self.mongo_uri = settings.MONGODB_URI
+        self.db_name = settings.MONGODB_DB
+        self.collection_name = settings.MONGODB_COLLECTION
+        self.timeout_ms = settings.MONGODB_TIMEOUT_MS
+    def profile_to_dict(self, profile: Profile) -> Dict[str, Any]:
+        """Convert Profile object to dictionary for MongoDB storage"""
+        return {
+            "name": profile.name,
+            "title": profile.title,
+            "email": profile.email,
+            "bio": profile.bio,
+            "tagline": profile.tagline if profile.tagline else "",
+            "social": {
+                "linkedin": profile.social.linkedin if profile.social and profile.social.linkedin else "",
+                "github": profile.social.github if profile.social and profile.social.github else "",
+                "instagram": profile.social.instagram if profile.social and profile.social.instagram else ""
+            },
+            "profileImg": profile.profileImg if profile.profileImg else "",
+            "projects": [
+                {
+                    "title": project.title,
+                    "description": project.description,
+                    "techStack": project.techStack if project.techStack else "",
+                    "githubUrl": project.githubUrl if project.githubUrl else "",
+                    "demoUrl": project.demoUrl if project.demoUrl else ""
+                } for project in profile.projects
+            ] if profile.projects else [],
+            "skills": [skill.name for skill in profile.skills] if profile.skills else [],
+            "educations": [
+                {
+                    "school": edu.school,
+                    "degree": edu.degree,
+                    "fieldOfStudy": edu.fieldOfStudy,
+                    "startDate": edu.startDate,
+                    "endDate": edu.endDate
+                } for edu in profile.educations
+            ] if profile.educations else []
+        }
+    def store_profile(self, profile: Profile, error_handler=None) -> str:
+        """
+        Store profile data in MongoDB or fallback to JSON file
+        Args:
+            profile: The Profile object to store
+            error_handler: Optional function to handle errors (useful for framework-specific error handling)
+        Returns:
+            String ID of the stored profile or path to JSON file
+        """
+        profile_dict = self.profile_to_dict(profile)
+        try:
+            client = MongoClient(
+                self.mongo_uri,
+                serverSelectionTimeoutMS=self.timeout_ms
+            )
+            # Test the connection
+            client.server_info()
+            db = client[self.db_name]
+            collection = db[self.collection_name]
+            result = collection.insert_one(profile_dict)
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"MongoDB connection error: {e}")
+            if error_handler:
+                error_handler(f"Error connecting to MongoDB: {str(e)}")
+            # Fallback: save to JSON file
+            file_path = f"profile_{profile_dict['name'].replace(' ', '_')}.json"
+            with open(file_path, 'w') as f:
+                json.dump(profile_dict, f, indent=2)
+            return f"Saved to file {file_path}"
+# Create a global instance
+storage_service = StorageService()

temp.pdf CHANGED Viewed

Binary files a/temp.pdf and b/temp.pdf differ

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Utilities package for helper functions
+"""
+from .pdf_utils import extract_text_from_pdf, save_temp_pdf
+__all__ = ['extract_text_from_pdf', 'save_temp_pdf']

utils/pdf_utils.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+Utilities for working with PDF files
+"""
+import PyPDF2
+import io
+import os
+from config import get_settings
+import logging
+settings = get_settings()
+logger = logging.getLogger(__name__)
+def extract_text_from_pdf(pdf_path):
+    """
+    Extracts text from a PDF file.
+    Args:
+        pdf_path (str): The path to the PDF file.
+    Returns:
+        str: The extracted text. Returns an empty string if extraction fails.
+    """
+    text = ""
+    try:
+        with open(pdf_path, 'rb') as file:
+            reader = PyPDF2.PdfReader(file)
+            for page_num in range(len(reader.pages)):
+                page = reader.pages[page_num]
+                text += page.extract_text()
+        if not text.strip():
+            logger.warning(f"Extracted empty text from PDF: {pdf_path}")
+        logger.info(f"Extracted text are {text}")
+        return text
+    except Exception as e:
+        logger.error(f"Error extracting text from PDF: {e}")
+        return ""  # Return empty string on failure
+def save_temp_pdf(file_data, filename="temp.pdf"):
+    """
+    Save uploaded file data to a temporary PDF file
+    Args:
+        file_data: The binary data of the file
+        filename: The name to save the file as
+    Returns:
+        Path to the saved file
+    """
+    filepath = os.path.join(settings.TEMP_FILE_DIR, filename)
+    try:
+        with open(filepath, 'wb') as f:
+            f.write(file_data)
+        return filepath
+    except Exception as e:
+        logger.error(f"Error saving temporary PDF: {e}")
+        raise