import google.generativeai as genai import streamlit as st import fitz import spacy from docx import Document import dateparser from datetime import datetime from giner import GiNER # Load SpaCy model nlp = spacy.load('en_core_web_sm') # Load GLiNER model giner = GiNER("roberta-large") def extract_text_from_pdf(file): pdf = fitz.open(stream=file.read(), filetype="pdf") text = "" for page in pdf: text += page.get_text() return text def extract_text_from_doc(file): doc = Document(file) return " ".join([paragraph.text for paragraph in doc.paragraphs]) def authenticate_gemini(api_key): try: genai.configure(api_key=api_key) model = genai.GenerativeModel('gemini-pro') return model except Exception as e: st.error(f"Authentication failed: {e}") return None def generate_summary(text, model): prompt = f"Summarize the following resume:\n\n{text}\n\nProvide a brief overview of the candidate's qualifications, experience, and key skills." response = model.generate_content(prompt) return response.text def extract_info(text): doc = nlp(text) giner_results = giner.annotate(text) # Extract companies companies = set([ent.text for ent in doc.ents if ent.label_ == "ORG"]) companies.update([entity['text'] for entity in giner_results if entity['type'] == "ORG"]) # Extract experience experience = max([datetime.now().year - date.year for ent in doc.ents if ent.label_ == "DATE" and (date := dateparser.parse(ent.text)) and date.year <= datetime.now().year] or [0]) # Extract education education = set([ent.text for ent in doc.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in ["university", "college", "institute", "school"])]) education.update([entity['text'] for entity in giner_results if entity['type'] == "ORG" and any(keyword in entity['text'].lower() for keyword in ["university", "college", "institute", "school"])]) # Extract contact information email = next((ent.text for ent in doc.ents if ent.label_ == "EMAIL"), "Not found") phone = next((ent.text for ent in doc.ents if ent.label_ == "PHONE_NUMBER"), "Not found") # Use GLiNER for additional entity extraction for entity in giner_results: if entity['type'] == "PER" and email == "Not found": email = entity['text'] elif entity['type'] == "PHONE" and phone == "Not found": phone = entity['text'] return list(companies), experience, list(education), email, phone def main(): st.title("Enhanced Resume Analyzer") api_key = st.secrets["GEMINI_API_KEY"] # Use Streamlit secrets uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx"]) if uploaded_file is not None: try: model = authenticate_gemini(api_key) if model is None: return if uploaded_file.type == "application/pdf": resume_text = extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": resume_text = extract_text_from_doc(uploaded_file) else: st.error("Unsupported file format.") return companies, experience, education, email, phone = extract_info(resume_text) st.subheader("Extracted Information") st.write(f"*Years of Experience:* {experience}") st.write("*Companies:*", ", ".join(companies)) st.write("*Education:*", ", ".join(education)) st.write(f"*Email:* {email}") st.write(f"*Phone:* {phone}") summary = generate_summary(resume_text, model) st.subheader("Resume Summary") st.write(summary) except Exception as e: st.error(f"Error during processing: {str(e)}") st.exception(e) # This will print the full traceback if __name__ == "__main__": main()