Spaces:
Sleeping
Sleeping
Commit
·
3c4414a
1
Parent(s):
29367b6
made changes on pdf
Browse files
agent.py
CHANGED
@@ -7,94 +7,104 @@ import io
|
|
7 |
import docx
|
8 |
from typing import Dict, List, Optional
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
class HiringAgent:
|
11 |
def __init__(self):
|
12 |
self.agent = AgentPro(tools=[AresInternetTool(), CodeEngine()])
|
13 |
|
14 |
def extract_text_from_pdf(self, pdf_url: str) -> str:
|
15 |
"""Extract text from PDF file."""
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
text
|
22 |
-
|
23 |
-
|
|
|
|
|
24 |
def extract_text_from_docx(self, docx_url: str) -> str:
|
25 |
"""Extract text from DOCX file."""
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
text
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
def analyze_github_profile(self, github_url: str) -> Dict:
|
35 |
"""Analyze GitHub profile and extract relevant information."""
|
36 |
response = requests.get(github_url)
|
37 |
soup = BeautifulSoup(response.text, 'html.parser')
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
# Extract repositories
|
44 |
repos = []
|
45 |
for repo in soup.find_all('a', {'data-hovercard-type': 'repository'})[:5]:
|
46 |
repos.append({
|
47 |
'name': repo.text.strip(),
|
48 |
'url': f"https://github.com{repo['href']}"
|
49 |
})
|
50 |
-
|
51 |
return {
|
52 |
-
'name': name,
|
53 |
-
'bio': bio,
|
54 |
'repositories': repos
|
55 |
}
|
56 |
-
|
57 |
def analyze_candidate(self, resume_url: str, github_url: str, job_description: str, company_info: str) -> Dict:
|
58 |
"""Analyze candidate profile and generate assessment."""
|
59 |
-
|
|
|
60 |
if resume_url.endswith('.pdf'):
|
61 |
resume_text = self.extract_text_from_pdf(resume_url)
|
62 |
elif resume_url.endswith('.docx'):
|
63 |
resume_text = self.extract_text_from_docx(resume_url)
|
64 |
else:
|
65 |
-
resume_text = ""
|
66 |
-
|
67 |
-
#
|
68 |
github_data = self.analyze_github_profile(github_url)
|
69 |
-
|
70 |
-
# Generate assessment using AgentPro
|
71 |
prompt = f"""
|
72 |
Analyze this candidate profile and provide a detailed assessment:
|
73 |
-
|
74 |
-
Resume Content:
|
75 |
{resume_text}
|
76 |
-
|
77 |
-
GitHub Profile:
|
78 |
Name: {github_data['name']}
|
79 |
Bio: {github_data['bio']}
|
80 |
Top Repositories: {[repo['name'] for repo in github_data['repositories']]}
|
81 |
-
|
82 |
-
Job Description:
|
83 |
{job_description}
|
84 |
-
|
85 |
-
Company Information:
|
86 |
{company_info}
|
87 |
-
|
88 |
Please provide:
|
89 |
1. Skills and experience match with job requirements
|
90 |
2. Technical proficiency assessment
|
91 |
3. Cultural fit analysis
|
92 |
4. Strengths and areas for development
|
93 |
-
5.
|
94 |
"""
|
95 |
-
|
96 |
assessment = self.agent(prompt)
|
97 |
-
|
98 |
return {
|
99 |
'resume_analysis': resume_text,
|
100 |
'github_analysis': github_data,
|
|
|
7 |
import docx
|
8 |
from typing import Dict, List, Optional
|
9 |
|
10 |
+
|
11 |
+
def convert_drive_url(url: str) -> str:
|
12 |
+
if "drive.google.com" in url and "view" in url:
|
13 |
+
file_id = url.split("/d/")[1].split("/")[0]
|
14 |
+
return f"https://drive.google.com/uc?export=download&id={file_id}"
|
15 |
+
return url
|
16 |
+
|
17 |
+
|
18 |
class HiringAgent:
|
19 |
def __init__(self):
|
20 |
self.agent = AgentPro(tools=[AresInternetTool(), CodeEngine()])
|
21 |
|
22 |
def extract_text_from_pdf(self, pdf_url: str) -> str:
|
23 |
"""Extract text from PDF file."""
|
24 |
+
try:
|
25 |
+
pdf_url = convert_drive_url(pdf_url)
|
26 |
+
response = requests.get(pdf_url)
|
27 |
+
pdf_file = io.BytesIO(response.content)
|
28 |
+
reader = PyPDF2.PdfReader(pdf_file)
|
29 |
+
text = "\n".join(page.extract_text() or "" for page in reader.pages)
|
30 |
+
return text or "⚠️ No text could be extracted from the PDF."
|
31 |
+
except Exception as e:
|
32 |
+
return f"⚠️ Error reading PDF: {str(e)}"
|
33 |
+
|
34 |
def extract_text_from_docx(self, docx_url: str) -> str:
|
35 |
"""Extract text from DOCX file."""
|
36 |
+
try:
|
37 |
+
docx_url = convert_drive_url(docx_url)
|
38 |
+
response = requests.get(docx_url)
|
39 |
+
docx_file = io.BytesIO(response.content)
|
40 |
+
doc = docx.Document(docx_file)
|
41 |
+
text = "\n".join(p.text for p in doc.paragraphs)
|
42 |
+
return text or "⚠️ No text found in DOCX."
|
43 |
+
except Exception as e:
|
44 |
+
return f"⚠️ Error reading DOCX: {str(e)}"
|
45 |
+
|
46 |
def analyze_github_profile(self, github_url: str) -> Dict:
|
47 |
"""Analyze GitHub profile and extract relevant information."""
|
48 |
response = requests.get(github_url)
|
49 |
soup = BeautifulSoup(response.text, 'html.parser')
|
50 |
+
|
51 |
+
name = soup.find('span', {'class': 'p-name'})
|
52 |
+
bio = soup.find('div', {'class': 'p-note'})
|
53 |
+
|
|
|
|
|
54 |
repos = []
|
55 |
for repo in soup.find_all('a', {'data-hovercard-type': 'repository'})[:5]:
|
56 |
repos.append({
|
57 |
'name': repo.text.strip(),
|
58 |
'url': f"https://github.com{repo['href']}"
|
59 |
})
|
60 |
+
|
61 |
return {
|
62 |
+
'name': name.text.strip() if name else "",
|
63 |
+
'bio': bio.text.strip() if bio else "",
|
64 |
'repositories': repos
|
65 |
}
|
66 |
+
|
67 |
def analyze_candidate(self, resume_url: str, github_url: str, job_description: str, company_info: str) -> Dict:
|
68 |
"""Analyze candidate profile and generate assessment."""
|
69 |
+
|
70 |
+
# Resume Extraction
|
71 |
if resume_url.endswith('.pdf'):
|
72 |
resume_text = self.extract_text_from_pdf(resume_url)
|
73 |
elif resume_url.endswith('.docx'):
|
74 |
resume_text = self.extract_text_from_docx(resume_url)
|
75 |
else:
|
76 |
+
resume_text = "⚠️ Unsupported resume format. Please upload a .pdf or .docx file."
|
77 |
+
|
78 |
+
# GitHub Info
|
79 |
github_data = self.analyze_github_profile(github_url)
|
80 |
+
|
|
|
81 |
prompt = f"""
|
82 |
Analyze this candidate profile and provide a detailed assessment:
|
83 |
+
|
84 |
+
📄 Resume Content:
|
85 |
{resume_text}
|
86 |
+
|
87 |
+
👨💻 GitHub Profile:
|
88 |
Name: {github_data['name']}
|
89 |
Bio: {github_data['bio']}
|
90 |
Top Repositories: {[repo['name'] for repo in github_data['repositories']]}
|
91 |
+
|
92 |
+
📝 Job Description:
|
93 |
{job_description}
|
94 |
+
|
95 |
+
🏢 Company Information:
|
96 |
{company_info}
|
97 |
+
|
98 |
Please provide:
|
99 |
1. Skills and experience match with job requirements
|
100 |
2. Technical proficiency assessment
|
101 |
3. Cultural fit analysis
|
102 |
4. Strengths and areas for development
|
103 |
+
5. Final hiring recommendation
|
104 |
"""
|
105 |
+
|
106 |
assessment = self.agent(prompt)
|
107 |
+
|
108 |
return {
|
109 |
'resume_analysis': resume_text,
|
110 |
'github_analysis': github_data,
|