resume_scanner / app.py
akazmi's picture
Update app.py
1d4d6a7 verified
raw
history blame
4.3 kB
import streamlit as st
import os
from groq import Groq
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from docx import Document
from PyPDF2 import PdfReader
from transformers import pipeline
# Initialize Groq client
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
# Initialize HuggingFace summarization pipeline
summarizer = pipeline("summarization")
# Function to get Groq analysis of the job description
def groq_chat_completion(prompt):
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="llama3-8b-8192",
)
return chat_completion.choices[0].message.content
# Function to extract text from uploaded files
def extract_text(file):
if file.type == "text/plain":
return file.read().decode("utf-8")
elif file.type == "application/pdf":
pdf_reader = PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() or ""
return text
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
else:
return ""
# Function to extract keywords and calculate similarity
def extract_keywords(documents):
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(documents)
return vectorizer, tfidf_matrix
def calculate_similarity(tfidf_matrix):
similarity_matrix = cosine_similarity(tfidf_matrix)
return similarity_matrix
# Function to generate summary for each resume
def generate_summary(text):
if len(text.split()) > 200: # Summarize only if the text is long enough
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
return summary[0]['summary_text']
return text # Return original text if it's short
# Streamlit UI
st.title("Detail Job Creator and Resume Scanner")
st.write("Analyze resumes and match them with job descriptions.")
# Upload job description and display Groq analysis first
st.subheader("Job Description")
job_description = st.text_area(
"Paste the job description here:",
height=150,
)
if job_description:
st.subheader("Groq Analysis")
groq_response = groq_chat_completion(job_description)
st.write("Groq's analysis of the job description:")
st.write(groq_response)
# Proceed with resume upload only if job description is provided
st.subheader("Upload Resumes")
uploaded_files = st.file_uploader(
"Upload resume files (Text, Word, or PDF):",
accept_multiple_files=True,
type=["txt", "docx", "pdf"]
)
if st.button("Analyze Resumes"):
if not uploaded_files:
st.error("Please upload at least one resume.")
else:
# Extract text from resumes
resumes = [extract_text(file) for file in uploaded_files]
resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files
if not resumes:
st.error("No valid text extracted from resumes. Please check your files.")
else:
# Combine job description and resumes for analysis
documents = [job_description] + resumes
# Extract keywords and calculate similarity
vectorizer, tfidf_matrix = extract_keywords(documents)
similarities = calculate_similarity(tfidf_matrix)
# Display match scores and summaries
st.subheader("Resume Match Scores and Summaries")
for i, file in enumerate(uploaded_files):
st.write(f"**Resume {i+1}: {file.name}**")
st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%")
# Generate and display summary
summary = generate_summary(resumes[i])
st.write("**Summary:**")
st.write(summary)
st.write("---")