Spaces:
Running
Running
from crewai import Agent, Task, Crew | |
from langchain_groq import ChatGroq | |
from langchain_community.document_loaders import RecursiveUrlLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from googlesearch import search | |
from PyPDF2 import PdfReader | |
from dotenv import load_dotenv | |
import os | |
import logging | |
from bs4 import BeautifulSoup | |
import re | |
load_dotenv() | |
logging.basicConfig(filename="app.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
llm = ChatGroq( | |
api_key=os.getenv("GROQ_API_KEY"), | |
model="llama3-70b-8192", | |
temperature=0.5, | |
max_tokens=1000 | |
) | |
resume_ranker = Agent( | |
role="Resume Ranker", | |
goal="Rank resumes based on job fit with fairness", | |
backstory="An expert in evaluating resumes fairly", | |
llm=llm, | |
verbose=True, | |
allow_delegation=False | |
) | |
def html_to_text(html_content: str) -> str: | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# Extract text with proper spacing | |
text = soup.get_text(separator=" ").strip() | |
# Remove excessive multiple spaces | |
text = re.sub(r'\s+', ' ', text) | |
def extract_text_from_pdf(file_path=None, file_content=None): | |
if file_path: | |
reader = PdfReader(file_path) | |
elif file_content: | |
reader = PdfReader(file_content) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() or "" | |
return text | |
def fetch_related_content(job_description): | |
query = f"{job_description} site:*.edu | site:*.org | site:*.gov -inurl:(signup | login)" | |
urls = list(search(query, num_results=5)) | |
documents = [] | |
for url in urls: | |
try: | |
loader = RecursiveUrlLoader(url=url,extractor=html_to_text,max_depth=1, | |
headers={"User-Agent": "Mozilla/5.0"}) | |
docs = loader.load() | |
documents.extend(docs) | |
except Exception as e: | |
logging.error(f"Error loading {url}: {e}") | |
return documents | |
def store_in_vdb(documents): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
chunks = text_splitter.split_documents(documents) | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
return FAISS.from_documents(chunks, embeddings) | |
def process_resumes(job_description, dir_path=None, uploaded_files=None): | |
resumes = [] | |
if dir_path and os.path.isdir(dir_path): | |
for filename in os.listdir(dir_path): | |
if filename.endswith(".pdf"): | |
file_path = os.path.join(dir_path, filename) | |
resume_text = extract_text_from_pdf(file_path=file_path) | |
resumes.append(f"Resume: {filename}\nContent: {resume_text}") | |
elif uploaded_files: | |
for uploaded_file in uploaded_files: | |
resume_text = extract_text_from_pdf(file_content=uploaded_file) | |
resumes.append(f"Resume: {uploaded_file.name}\nContent: {resume_text}") | |
return resumes | |
def create_resume_rank_task(job_description, dir_path=None, uploaded_files=None): | |
resumes = process_resumes(job_description, dir_path, uploaded_files) | |
if not resumes: | |
return None | |
documents = fetch_related_content(job_description) | |
vdb = store_in_vdb(documents) if documents else None | |
context = vdb.similarity_search(job_description, k=3) if vdb else [] | |
context_text = "\n".join([doc.page_content for doc in context]) or "No context." | |
prompt = f"Rank these resumes: {', '.join(resumes)} for '{job_description}' using context: '{context_text}'. Ensure fairness by avoiding bias based on gender, age, or ethnicity. Flag any potential bias in reasoning." | |
return Task( | |
description=prompt, | |
agent=resume_ranker, | |
expected_output="A ranked list with scores (0-100), reasoning, and bias flags." | |
) |