Spaces:

Shaikmohdhuz
/

Hiring_agent

Sleeping

App Files Files Community

Shaikmohdhuz commited on Apr 15

Commit

3c4414a

1 Parent(s): 29367b6

made changes on pdf

Browse files

Files changed (1) hide show

agent.py +54 -44

agent.py CHANGED Viewed

@@ -7,94 +7,104 @@ import io
 import docx
 from typing import Dict, List, Optional
 class HiringAgent:
     def __init__(self):
         self.agent = AgentPro(tools=[AresInternetTool(), CodeEngine()])
     def extract_text_from_pdf(self, pdf_url: str) -> str:
         """Extract text from PDF file."""
-        response = requests.get(pdf_url)
-        pdf_file = io.BytesIO(response.content)
-        pdf_reader = PyPDF2.PdfReader(pdf_file)
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
-        return text
     def extract_text_from_docx(self, docx_url: str) -> str:
         """Extract text from DOCX file."""
-        response = requests.get(docx_url)
-        docx_file = io.BytesIO(response.content)
-        doc = docx.Document(docx_file)
-        text = ""
-        for paragraph in doc.paragraphs:
-            text += paragraph.text + "\n"
-        return text
     def analyze_github_profile(self, github_url: str) -> Dict:
         """Analyze GitHub profile and extract relevant information."""
         response = requests.get(github_url)
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Extract basic information
-        name = soup.find('span', {'class': 'p-name'}).text.strip() if soup.find('span', {'class': 'p-name'}) else ""
-        bio = soup.find('div', {'class': 'p-note'}).text.strip() if soup.find('div', {'class': 'p-note'}) else ""
-        # Extract repositories
         repos = []
         for repo in soup.find_all('a', {'data-hovercard-type': 'repository'})[:5]:
             repos.append({
                 'name': repo.text.strip(),
                 'url': f"https://github.com{repo['href']}"
             })
         return {
-            'name': name,
-            'bio': bio,
             'repositories': repos
         }
     def analyze_candidate(self, resume_url: str, github_url: str, job_description: str, company_info: str) -> Dict:
         """Analyze candidate profile and generate assessment."""
-        # Extract resume text
         if resume_url.endswith('.pdf'):
             resume_text = self.extract_text_from_pdf(resume_url)
         elif resume_url.endswith('.docx'):
             resume_text = self.extract_text_from_docx(resume_url)
         else:
-            resume_text = ""
-        # Analyze GitHub profile
         github_data = self.analyze_github_profile(github_url)
-        # Generate assessment using AgentPro
         prompt = f"""
         Analyze this candidate profile and provide a detailed assessment:
-        Resume Content:
         {resume_text}
-        GitHub Profile:
         Name: {github_data['name']}
         Bio: {github_data['bio']}
         Top Repositories: {[repo['name'] for repo in github_data['repositories']]}
-        Job Description:
         {job_description}
-        Company Information:
         {company_info}
         Please provide:
         1. Skills and experience match with job requirements
         2. Technical proficiency assessment
         3. Cultural fit analysis
         4. Strengths and areas for development
-        5. Overall recommendations
         """
         assessment = self.agent(prompt)
         return {
             'resume_analysis': resume_text,
             'github_analysis': github_data,

 import docx
 from typing import Dict, List, Optional
+def convert_drive_url(url: str) -> str:
+    if "drive.google.com" in url and "view" in url:
+        file_id = url.split("/d/")[1].split("/")[0]
+        return f"https://drive.google.com/uc?export=download&id={file_id}"
+    return url
 class HiringAgent:
     def __init__(self):
         self.agent = AgentPro(tools=[AresInternetTool(), CodeEngine()])
     def extract_text_from_pdf(self, pdf_url: str) -> str:
         """Extract text from PDF file."""
+        try:
+            pdf_url = convert_drive_url(pdf_url)
+            response = requests.get(pdf_url)
+            pdf_file = io.BytesIO(response.content)
+            reader = PyPDF2.PdfReader(pdf_file)
+            text = "\n".join(page.extract_text() or "" for page in reader.pages)
+            return text or "⚠️ No text could be extracted from the PDF."
+        except Exception as e:
+            return f"⚠️ Error reading PDF: {str(e)}"
     def extract_text_from_docx(self, docx_url: str) -> str:
         """Extract text from DOCX file."""
+        try:
+            docx_url = convert_drive_url(docx_url)
+            response = requests.get(docx_url)
+            docx_file = io.BytesIO(response.content)
+            doc = docx.Document(docx_file)
+            text = "\n".join(p.text for p in doc.paragraphs)
+            return text or "⚠️ No text found in DOCX."
+        except Exception as e:
+            return f"⚠️ Error reading DOCX: {str(e)}"
     def analyze_github_profile(self, github_url: str) -> Dict:
         """Analyze GitHub profile and extract relevant information."""
         response = requests.get(github_url)
         soup = BeautifulSoup(response.text, 'html.parser')
+        name = soup.find('span', {'class': 'p-name'})
+        bio = soup.find('div', {'class': 'p-note'})
         repos = []
         for repo in soup.find_all('a', {'data-hovercard-type': 'repository'})[:5]:
             repos.append({
                 'name': repo.text.strip(),
                 'url': f"https://github.com{repo['href']}"
             })
         return {
+            'name': name.text.strip() if name else "",
+            'bio': bio.text.strip() if bio else "",
             'repositories': repos
         }
     def analyze_candidate(self, resume_url: str, github_url: str, job_description: str, company_info: str) -> Dict:
         """Analyze candidate profile and generate assessment."""
+        # Resume Extraction
         if resume_url.endswith('.pdf'):
             resume_text = self.extract_text_from_pdf(resume_url)
         elif resume_url.endswith('.docx'):
             resume_text = self.extract_text_from_docx(resume_url)
         else:
+            resume_text = "⚠️ Unsupported resume format. Please upload a .pdf or .docx file."
+        # GitHub Info
         github_data = self.analyze_github_profile(github_url)
         prompt = f"""
         Analyze this candidate profile and provide a detailed assessment:
+        📄 Resume Content:
         {resume_text}
+        👨‍💻 GitHub Profile:
         Name: {github_data['name']}
         Bio: {github_data['bio']}
         Top Repositories: {[repo['name'] for repo in github_data['repositories']]}
+        📝 Job Description:
         {job_description}
+        🏢 Company Information:
         {company_info}
         Please provide:
         1. Skills and experience match with job requirements
         2. Technical proficiency assessment
         3. Cultural fit analysis
         4. Strengths and areas for development
+        5. Final hiring recommendation
         """
         assessment = self.agent(prompt)
         return {
             'resume_analysis': resume_text,
             'github_analysis': github_data,