Shaikmohdhuz commited on
Commit
3c4414a
·
1 Parent(s): 29367b6

made changes on pdf

Browse files
Files changed (1) hide show
  1. agent.py +54 -44
agent.py CHANGED
@@ -7,94 +7,104 @@ import io
7
  import docx
8
  from typing import Dict, List, Optional
9
 
 
 
 
 
 
 
 
 
10
  class HiringAgent:
11
  def __init__(self):
12
  self.agent = AgentPro(tools=[AresInternetTool(), CodeEngine()])
13
 
14
  def extract_text_from_pdf(self, pdf_url: str) -> str:
15
  """Extract text from PDF file."""
16
- response = requests.get(pdf_url)
17
- pdf_file = io.BytesIO(response.content)
18
- pdf_reader = PyPDF2.PdfReader(pdf_file)
19
- text = ""
20
- for page in pdf_reader.pages:
21
- text += page.extract_text()
22
- return text
23
-
 
 
24
  def extract_text_from_docx(self, docx_url: str) -> str:
25
  """Extract text from DOCX file."""
26
- response = requests.get(docx_url)
27
- docx_file = io.BytesIO(response.content)
28
- doc = docx.Document(docx_file)
29
- text = ""
30
- for paragraph in doc.paragraphs:
31
- text += paragraph.text + "\n"
32
- return text
33
-
 
 
34
  def analyze_github_profile(self, github_url: str) -> Dict:
35
  """Analyze GitHub profile and extract relevant information."""
36
  response = requests.get(github_url)
37
  soup = BeautifulSoup(response.text, 'html.parser')
38
-
39
- # Extract basic information
40
- name = soup.find('span', {'class': 'p-name'}).text.strip() if soup.find('span', {'class': 'p-name'}) else ""
41
- bio = soup.find('div', {'class': 'p-note'}).text.strip() if soup.find('div', {'class': 'p-note'}) else ""
42
-
43
- # Extract repositories
44
  repos = []
45
  for repo in soup.find_all('a', {'data-hovercard-type': 'repository'})[:5]:
46
  repos.append({
47
  'name': repo.text.strip(),
48
  'url': f"https://github.com{repo['href']}"
49
  })
50
-
51
  return {
52
- 'name': name,
53
- 'bio': bio,
54
  'repositories': repos
55
  }
56
-
57
  def analyze_candidate(self, resume_url: str, github_url: str, job_description: str, company_info: str) -> Dict:
58
  """Analyze candidate profile and generate assessment."""
59
- # Extract resume text
 
60
  if resume_url.endswith('.pdf'):
61
  resume_text = self.extract_text_from_pdf(resume_url)
62
  elif resume_url.endswith('.docx'):
63
  resume_text = self.extract_text_from_docx(resume_url)
64
  else:
65
- resume_text = ""
66
-
67
- # Analyze GitHub profile
68
  github_data = self.analyze_github_profile(github_url)
69
-
70
- # Generate assessment using AgentPro
71
  prompt = f"""
72
  Analyze this candidate profile and provide a detailed assessment:
73
-
74
- Resume Content:
75
  {resume_text}
76
-
77
- GitHub Profile:
78
  Name: {github_data['name']}
79
  Bio: {github_data['bio']}
80
  Top Repositories: {[repo['name'] for repo in github_data['repositories']]}
81
-
82
- Job Description:
83
  {job_description}
84
-
85
- Company Information:
86
  {company_info}
87
-
88
  Please provide:
89
  1. Skills and experience match with job requirements
90
  2. Technical proficiency assessment
91
  3. Cultural fit analysis
92
  4. Strengths and areas for development
93
- 5. Overall recommendations
94
  """
95
-
96
  assessment = self.agent(prompt)
97
-
98
  return {
99
  'resume_analysis': resume_text,
100
  'github_analysis': github_data,
 
7
  import docx
8
  from typing import Dict, List, Optional
9
 
10
+
11
+ def convert_drive_url(url: str) -> str:
12
+ if "drive.google.com" in url and "view" in url:
13
+ file_id = url.split("/d/")[1].split("/")[0]
14
+ return f"https://drive.google.com/uc?export=download&id={file_id}"
15
+ return url
16
+
17
+
18
  class HiringAgent:
19
  def __init__(self):
20
  self.agent = AgentPro(tools=[AresInternetTool(), CodeEngine()])
21
 
22
  def extract_text_from_pdf(self, pdf_url: str) -> str:
23
  """Extract text from PDF file."""
24
+ try:
25
+ pdf_url = convert_drive_url(pdf_url)
26
+ response = requests.get(pdf_url)
27
+ pdf_file = io.BytesIO(response.content)
28
+ reader = PyPDF2.PdfReader(pdf_file)
29
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
30
+ return text or "⚠️ No text could be extracted from the PDF."
31
+ except Exception as e:
32
+ return f"⚠️ Error reading PDF: {str(e)}"
33
+
34
  def extract_text_from_docx(self, docx_url: str) -> str:
35
  """Extract text from DOCX file."""
36
+ try:
37
+ docx_url = convert_drive_url(docx_url)
38
+ response = requests.get(docx_url)
39
+ docx_file = io.BytesIO(response.content)
40
+ doc = docx.Document(docx_file)
41
+ text = "\n".join(p.text for p in doc.paragraphs)
42
+ return text or "⚠️ No text found in DOCX."
43
+ except Exception as e:
44
+ return f"⚠️ Error reading DOCX: {str(e)}"
45
+
46
  def analyze_github_profile(self, github_url: str) -> Dict:
47
  """Analyze GitHub profile and extract relevant information."""
48
  response = requests.get(github_url)
49
  soup = BeautifulSoup(response.text, 'html.parser')
50
+
51
+ name = soup.find('span', {'class': 'p-name'})
52
+ bio = soup.find('div', {'class': 'p-note'})
53
+
 
 
54
  repos = []
55
  for repo in soup.find_all('a', {'data-hovercard-type': 'repository'})[:5]:
56
  repos.append({
57
  'name': repo.text.strip(),
58
  'url': f"https://github.com{repo['href']}"
59
  })
60
+
61
  return {
62
+ 'name': name.text.strip() if name else "",
63
+ 'bio': bio.text.strip() if bio else "",
64
  'repositories': repos
65
  }
66
+
67
  def analyze_candidate(self, resume_url: str, github_url: str, job_description: str, company_info: str) -> Dict:
68
  """Analyze candidate profile and generate assessment."""
69
+
70
+ # Resume Extraction
71
  if resume_url.endswith('.pdf'):
72
  resume_text = self.extract_text_from_pdf(resume_url)
73
  elif resume_url.endswith('.docx'):
74
  resume_text = self.extract_text_from_docx(resume_url)
75
  else:
76
+ resume_text = "⚠️ Unsupported resume format. Please upload a .pdf or .docx file."
77
+
78
+ # GitHub Info
79
  github_data = self.analyze_github_profile(github_url)
80
+
 
81
  prompt = f"""
82
  Analyze this candidate profile and provide a detailed assessment:
83
+
84
+ 📄 Resume Content:
85
  {resume_text}
86
+
87
+ 👨‍💻 GitHub Profile:
88
  Name: {github_data['name']}
89
  Bio: {github_data['bio']}
90
  Top Repositories: {[repo['name'] for repo in github_data['repositories']]}
91
+
92
+ 📝 Job Description:
93
  {job_description}
94
+
95
+ 🏢 Company Information:
96
  {company_info}
97
+
98
  Please provide:
99
  1. Skills and experience match with job requirements
100
  2. Technical proficiency assessment
101
  3. Cultural fit analysis
102
  4. Strengths and areas for development
103
+ 5. Final hiring recommendation
104
  """
105
+
106
  assessment = self.agent(prompt)
107
+
108
  return {
109
  'resume_analysis': resume_text,
110
  'github_analysis': github_data,