Spaces:

bangaboy
/

glinerdemo

Sleeping

App Files Files Community

glinerdemo / app.py

bangaboy

Update app.py

54f3307 verified 6 months ago

raw

history blame contribute delete

4.05 kB


	import google.generativeai as genai
	import streamlit as st
	import fitz
	import spacy
	from docx import Document
	import dateparser
	from datetime import datetime
	from giner import GiNER

	# Load SpaCy model
	nlp = spacy.load('en_core_web_sm')

	# Load GLiNER model
	giner = GiNER("roberta-large")

	def extract_text_from_pdf(file):
	pdf = fitz.open(stream=file.read(), filetype="pdf")
	text = ""
	for page in pdf:
	text += page.get_text()
	return text

	def extract_text_from_doc(file):
	doc = Document(file)
	return " ".join([paragraph.text for paragraph in doc.paragraphs])

	def authenticate_gemini(api_key):
	try:
	genai.configure(api_key=api_key)
	model = genai.GenerativeModel('gemini-pro')
	return model
	except Exception as e:
	st.error(f"Authentication failed: {e}")
	return None

	def generate_summary(text, model):
	prompt = f"Summarize the following resume:\n\n{text}\n\nProvide a brief overview of the candidate's qualifications, experience, and key skills."
	response = model.generate_content(prompt)
	return response.text

	def extract_info(text):
	doc = nlp(text)
	giner_results = giner.annotate(text)

	# Extract companies
	companies = set([ent.text for ent in doc.ents if ent.label_ == "ORG"])
	companies.update([entity['text'] for entity in giner_results if entity['type'] == "ORG"])

	# Extract experience
	experience = max([datetime.now().year - date.year for ent in doc.ents if ent.label_ == "DATE" and (date := dateparser.parse(ent.text)) and date.year <= datetime.now().year] or [0])

	# Extract education
	education = set([ent.text for ent in doc.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in ["university", "college", "institute", "school"])])
	education.update([entity['text'] for entity in giner_results if entity['type'] == "ORG" and any(keyword in entity['text'].lower() for keyword in ["university", "college", "institute", "school"])])

	# Extract contact information
	email = next((ent.text for ent in doc.ents if ent.label_ == "EMAIL"), "Not found")
	phone = next((ent.text for ent in doc.ents if ent.label_ == "PHONE_NUMBER"), "Not found")

	# Use GLiNER for additional entity extraction
	for entity in giner_results:
	if entity['type'] == "PER" and email == "Not found":
	email = entity['text']
	elif entity['type'] == "PHONE" and phone == "Not found":
	phone = entity['text']

	return list(companies), experience, list(education), email, phone

	def main():
	st.title("Enhanced Resume Analyzer")

	api_key = st.secrets["GEMINI_API_KEY"] # Use Streamlit secrets
	uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx"])

	if uploaded_file is not None:
	try:
	model = authenticate_gemini(api_key)
	if model is None:
	return

	if uploaded_file.type == "application/pdf":
	resume_text = extract_text_from_pdf(uploaded_file)
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	resume_text = extract_text_from_doc(uploaded_file)
	else:
	st.error("Unsupported file format.")
	return

	companies, experience, education, email, phone = extract_info(resume_text)

	st.subheader("Extracted Information")
	st.write(f"Years of Experience: {experience}")
	st.write("Companies:", ", ".join(companies))
	st.write("Education:", ", ".join(education))
	st.write(f"Email: {email}")
	st.write(f"Phone: {phone}")

	summary = generate_summary(resume_text, model)
	st.subheader("Resume Summary")
	st.write(summary)

	except Exception as e:
	st.error(f"Error during processing: {str(e)}")
	st.exception(e) # This will print the full traceback

	if __name__ == "__main__":
	main()