import streamlit as st import os from groq import Groq import numpy as np import re from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from docx import Document from PyPDF2 import PdfReader from transformers import pipeline # Initialize Groq client client = Groq( api_key=os.environ.get("GROQ_API_KEY"), ) # Initialize HuggingFace summarization pipeline summarizer = pipeline("summarization") # Function to get Groq analysis of the job description def groq_chat_completion(prompt): chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": prompt, } ], model="llama3-8b-8192", ) return chat_completion.choices[0].message.content # Function to extract text from uploaded files def extract_text(file): if file.type == "text/plain": return file.read().decode("utf-8") elif file.type == "application/pdf": pdf_reader = PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() or "" return text elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": doc = Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text else: return "" # Function to extract keywords and calculate similarity def extract_keywords(documents): vectorizer = TfidfVectorizer(stop_words="english") tfidf_matrix = vectorizer.fit_transform(documents) return vectorizer, tfidf_matrix def calculate_similarity(tfidf_matrix): similarity_matrix = cosine_similarity(tfidf_matrix) return similarity_matrix # Function to generate summary for each resume def generate_summary(text): if len(text.split()) > 200: # Summarize only if the text is long enough summary = summarizer(text, max_length=150, min_length=50, do_sample=False) return summary[0]['summary_text'] return text # Return original text if it's short # Streamlit UI st.title("Detail Job Creator and Resume Scanner") st.write("Analyze resumes and match them with job descriptions.") # Upload job description and display Groq analysis first st.subheader("Job Description") job_description = st.text_area( "Paste the job description here:", height=150, ) if job_description: st.subheader("Groq Analysis") groq_response = groq_chat_completion(job_description) st.write("Groq's analysis of the job description:") st.write(groq_response) # Proceed with resume upload only if job description is provided st.subheader("Upload Resumes") uploaded_files = st.file_uploader( "Upload resume files (Text, Word, or PDF):", accept_multiple_files=True, type=["txt", "docx", "pdf"] ) if st.button("Analyze Resumes"): if not uploaded_files: st.error("Please upload at least one resume.") else: # Extract text from resumes resumes = [extract_text(file) for file in uploaded_files] resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files if not resumes: st.error("No valid text extracted from resumes. Please check your files.") else: # Combine job description and resumes for analysis documents = [job_description] + resumes # Extract keywords and calculate similarity vectorizer, tfidf_matrix = extract_keywords(documents) similarities = calculate_similarity(tfidf_matrix) # Display match scores and summaries st.subheader("Resume Match Scores and Summaries") for i, file in enumerate(uploaded_files): st.write(f"**Resume {i+1}: {file.name}**") st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%") # Generate and display summary summary = generate_summary(resumes[i]) st.write("**Summary:**") st.write(summary) st.write("---")