glinerdemo / app.py
bangaboy's picture
Update app.py
54f3307 verified
import google.generativeai as genai
import streamlit as st
import fitz
import spacy
from docx import Document
import dateparser
from datetime import datetime
from giner import GiNER
# Load SpaCy model
nlp = spacy.load('en_core_web_sm')
# Load GLiNER model
giner = GiNER("roberta-large")
def extract_text_from_pdf(file):
pdf = fitz.open(stream=file.read(), filetype="pdf")
text = ""
for page in pdf:
text += page.get_text()
return text
def extract_text_from_doc(file):
doc = Document(file)
return " ".join([paragraph.text for paragraph in doc.paragraphs])
def authenticate_gemini(api_key):
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-pro')
return model
except Exception as e:
st.error(f"Authentication failed: {e}")
return None
def generate_summary(text, model):
prompt = f"Summarize the following resume:\n\n{text}\n\nProvide a brief overview of the candidate's qualifications, experience, and key skills."
response = model.generate_content(prompt)
return response.text
def extract_info(text):
doc = nlp(text)
giner_results = giner.annotate(text)
# Extract companies
companies = set([ent.text for ent in doc.ents if ent.label_ == "ORG"])
companies.update([entity['text'] for entity in giner_results if entity['type'] == "ORG"])
# Extract experience
experience = max([datetime.now().year - date.year for ent in doc.ents if ent.label_ == "DATE" and (date := dateparser.parse(ent.text)) and date.year <= datetime.now().year] or [0])
# Extract education
education = set([ent.text for ent in doc.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in ["university", "college", "institute", "school"])])
education.update([entity['text'] for entity in giner_results if entity['type'] == "ORG" and any(keyword in entity['text'].lower() for keyword in ["university", "college", "institute", "school"])])
# Extract contact information
email = next((ent.text for ent in doc.ents if ent.label_ == "EMAIL"), "Not found")
phone = next((ent.text for ent in doc.ents if ent.label_ == "PHONE_NUMBER"), "Not found")
# Use GLiNER for additional entity extraction
for entity in giner_results:
if entity['type'] == "PER" and email == "Not found":
email = entity['text']
elif entity['type'] == "PHONE" and phone == "Not found":
phone = entity['text']
return list(companies), experience, list(education), email, phone
def main():
st.title("Enhanced Resume Analyzer")
api_key = st.secrets["GEMINI_API_KEY"] # Use Streamlit secrets
uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx"])
if uploaded_file is not None:
try:
model = authenticate_gemini(api_key)
if model is None:
return
if uploaded_file.type == "application/pdf":
resume_text = extract_text_from_pdf(uploaded_file)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
resume_text = extract_text_from_doc(uploaded_file)
else:
st.error("Unsupported file format.")
return
companies, experience, education, email, phone = extract_info(resume_text)
st.subheader("Extracted Information")
st.write(f"*Years of Experience:* {experience}")
st.write("*Companies:*", ", ".join(companies))
st.write("*Education:*", ", ".join(education))
st.write(f"*Email:* {email}")
st.write(f"*Phone:* {phone}")
summary = generate_summary(resume_text, model)
st.subheader("Resume Summary")
st.write(summary)
except Exception as e:
st.error(f"Error during processing: {str(e)}")
st.exception(e) # This will print the full traceback
if __name__ == "__main__":
main()