Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from groq import Groq | |
import numpy as np | |
import re | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from docx import Document | |
from PyPDF2 import PdfReader | |
from transformers import pipeline | |
# Initialize Groq client | |
client = Groq( | |
api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
# Initialize HuggingFace summarization pipeline | |
summarizer = pipeline("summarization") | |
# Function to get Groq analysis of the job description | |
def groq_chat_completion(prompt): | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": prompt, | |
} | |
], | |
model="llama3-8b-8192", | |
) | |
return chat_completion.choices[0].message.content | |
# Function to extract text from uploaded files | |
def extract_text(file): | |
if file.type == "text/plain": | |
return file.read().decode("utf-8") | |
elif file.type == "application/pdf": | |
pdf_reader = PdfReader(file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() or "" | |
return text | |
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
doc = Document(file) | |
text = "" | |
for para in doc.paragraphs: | |
text += para.text + "\n" | |
return text | |
else: | |
return "" | |
# Function to extract keywords and calculate similarity | |
def extract_keywords(documents): | |
vectorizer = TfidfVectorizer(stop_words="english") | |
tfidf_matrix = vectorizer.fit_transform(documents) | |
return vectorizer, tfidf_matrix | |
def calculate_similarity(tfidf_matrix): | |
similarity_matrix = cosine_similarity(tfidf_matrix) | |
return similarity_matrix | |
# Function to generate summary for each resume | |
def generate_summary(text): | |
if len(text.split()) > 200: # Summarize only if the text is long enough | |
summary = summarizer(text, max_length=150, min_length=50, do_sample=False) | |
return summary[0]['summary_text'] | |
return text # Return original text if it's short | |
# Streamlit UI | |
st.title("Detail Job Creator and Resume Scanner") | |
st.write("Analyze resumes and match them with job descriptions.") | |
# Upload job description and display Groq analysis first | |
st.subheader("Job Description") | |
job_description = st.text_area( | |
"Paste the job description here:", | |
height=150, | |
) | |
if job_description: | |
st.subheader("Groq Analysis") | |
groq_response = groq_chat_completion(job_description) | |
st.write("Groq's analysis of the job description:") | |
st.write(groq_response) | |
# Proceed with resume upload only if job description is provided | |
st.subheader("Upload Resumes") | |
uploaded_files = st.file_uploader( | |
"Upload resume files (Text, Word, or PDF):", | |
accept_multiple_files=True, | |
type=["txt", "docx", "pdf"] | |
) | |
if st.button("Analyze Resumes"): | |
if not uploaded_files: | |
st.error("Please upload at least one resume.") | |
else: | |
# Extract text from resumes | |
resumes = [extract_text(file) for file in uploaded_files] | |
resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files | |
if not resumes: | |
st.error("No valid text extracted from resumes. Please check your files.") | |
else: | |
# Combine job description and resumes for analysis | |
documents = [job_description] + resumes | |
# Extract keywords and calculate similarity | |
vectorizer, tfidf_matrix = extract_keywords(documents) | |
similarities = calculate_similarity(tfidf_matrix) | |
# Display match scores and summaries | |
st.subheader("Resume Match Scores and Summaries") | |
for i, file in enumerate(uploaded_files): | |
st.write(f"**Resume {i+1}: {file.name}**") | |
st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%") | |
# Generate and display summary | |
summary = generate_summary(resumes[i]) | |
st.write("**Summary:**") | |
st.write(summary) | |
st.write("---") | |