Spaces:
Sleeping
Sleeping
import google.generativeai as genai | |
import streamlit as st | |
import fitz | |
import spacy | |
from docx import Document | |
import dateparser | |
from datetime import datetime | |
from giner import GiNER | |
# Load SpaCy model | |
nlp = spacy.load('en_core_web_sm') | |
# Load GLiNER model | |
giner = GiNER("roberta-large") | |
def extract_text_from_pdf(file): | |
pdf = fitz.open(stream=file.read(), filetype="pdf") | |
text = "" | |
for page in pdf: | |
text += page.get_text() | |
return text | |
def extract_text_from_doc(file): | |
doc = Document(file) | |
return " ".join([paragraph.text for paragraph in doc.paragraphs]) | |
def authenticate_gemini(api_key): | |
try: | |
genai.configure(api_key=api_key) | |
model = genai.GenerativeModel('gemini-pro') | |
return model | |
except Exception as e: | |
st.error(f"Authentication failed: {e}") | |
return None | |
def generate_summary(text, model): | |
prompt = f"Summarize the following resume:\n\n{text}\n\nProvide a brief overview of the candidate's qualifications, experience, and key skills." | |
response = model.generate_content(prompt) | |
return response.text | |
def extract_info(text): | |
doc = nlp(text) | |
giner_results = giner.annotate(text) | |
# Extract companies | |
companies = set([ent.text for ent in doc.ents if ent.label_ == "ORG"]) | |
companies.update([entity['text'] for entity in giner_results if entity['type'] == "ORG"]) | |
# Extract experience | |
experience = max([datetime.now().year - date.year for ent in doc.ents if ent.label_ == "DATE" and (date := dateparser.parse(ent.text)) and date.year <= datetime.now().year] or [0]) | |
# Extract education | |
education = set([ent.text for ent in doc.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in ["university", "college", "institute", "school"])]) | |
education.update([entity['text'] for entity in giner_results if entity['type'] == "ORG" and any(keyword in entity['text'].lower() for keyword in ["university", "college", "institute", "school"])]) | |
# Extract contact information | |
email = next((ent.text for ent in doc.ents if ent.label_ == "EMAIL"), "Not found") | |
phone = next((ent.text for ent in doc.ents if ent.label_ == "PHONE_NUMBER"), "Not found") | |
# Use GLiNER for additional entity extraction | |
for entity in giner_results: | |
if entity['type'] == "PER" and email == "Not found": | |
email = entity['text'] | |
elif entity['type'] == "PHONE" and phone == "Not found": | |
phone = entity['text'] | |
return list(companies), experience, list(education), email, phone | |
def main(): | |
st.title("Enhanced Resume Analyzer") | |
api_key = st.secrets["GEMINI_API_KEY"] # Use Streamlit secrets | |
uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx"]) | |
if uploaded_file is not None: | |
try: | |
model = authenticate_gemini(api_key) | |
if model is None: | |
return | |
if uploaded_file.type == "application/pdf": | |
resume_text = extract_text_from_pdf(uploaded_file) | |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
resume_text = extract_text_from_doc(uploaded_file) | |
else: | |
st.error("Unsupported file format.") | |
return | |
companies, experience, education, email, phone = extract_info(resume_text) | |
st.subheader("Extracted Information") | |
st.write(f"*Years of Experience:* {experience}") | |
st.write("*Companies:*", ", ".join(companies)) | |
st.write("*Education:*", ", ".join(education)) | |
st.write(f"*Email:* {email}") | |
st.write(f"*Phone:* {phone}") | |
summary = generate_summary(resume_text, model) | |
st.subheader("Resume Summary") | |
st.write(summary) | |
except Exception as e: | |
st.error(f"Error during processing: {str(e)}") | |
st.exception(e) # This will print the full traceback | |
if __name__ == "__main__": | |
main() |