Spaces:

Prernas19
/

resume_parser

Sleeping

App Files Files Community

Prernas19 commited on Aug 10, 2024

Commit

6116745

verified ·

1 Parent(s): 1053c28

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -22

app.py CHANGED Viewed

@@ -36,32 +36,37 @@ def extract_text_from_docx(file):
     return "\n".join([para.text for para in doc.paragraphs])
 def extract_companies(text):
     doc = nlp(text)
     companies = []
     company_pattern = re.compile(
-         r'\b(?:Inc\.|Corp\.|LLC|Ltd\.|Co\.|Company|Group|Services|Technologies|Pvt\.|Solutions|Consulting|Associates|Enterprises|Partners|Holdings|Systems|Networks|Ventures|Partners|International|Ltd|GmbH|S\.A\.|S\.L\.|LLP|PLC|AG|LLC)\b', re.IGNORECASE)
     for ent in doc.ents:
-        if ent.label_ == "ORG" and company_pattern.search(ent.text):
-            companies.append(ent.text)
-    # Join companies with new lines
-    return "\n".join(companies)
 def extract_colleges(text):
     doc = nlp(text)
     colleges = []
     edu_keywords = ["university", "college", "institute", "school", "academy", "polytechnic", "faculty", "department", "center", "centre", "campus", "educational", "institute of technology"]
     for sent in doc.sents:
         edu_ents = [ent for ent in sent.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in edu_keywords)]
         for edu in edu_ents:
             colleges.append(edu.text)
-    # Join colleges with new lines
-    return "\n".join(colleges)
 def extract_years_of_experience(text):
     years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE)
@@ -122,27 +127,28 @@ def parse_resume(file):
         summary = extract_summary(doc)
         linkedin = extract_linkedin(text)
-        return companies, colleges, years_of_experience, phone, email, summary, linkedin
     except Exception as e:
         import traceback
-        return f"An error occurred while parsing the resume: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
-# Create Gradio interface with separate output components
 iface = gr.Interface(
     fn=parse_resume,
     inputs=gr.File(label="Upload Resume (PDF or DOCX)"),
-    outputs=[
-        gr.Textbox(label="Companies Worked For", lines=10),
-        gr.Textbox(label="Colleges Attended", lines=10),
-        gr.Textbox(label="Years of Experience"),
-        gr.Textbox(label="Phone Number"),
-        gr.Textbox(label="Email ID"),
-        gr.Textbox(label="Summary", lines=3),
-        gr.Textbox(label="LinkedIn ID")
-    ],
     title="Advanced Resume Parser",
     description="Upload a resume in PDF or DOCX format to extract key information."
 )
-iface.launch(share=True)

     return "\n".join([para.text for para in doc.paragraphs])
 def extract_companies(text):
+    # Process the text with the spaCy model
     doc = nlp(text)
     companies = []
+    # Define a regex pattern for common company name suffixes
     company_pattern = re.compile(
+        r'\b(?:Inc|Corp|LLC|Ltd|Co|Company|Group|Services|Technologies|Pvt|Solutions|Consulting)\b', re.IGNORECASE)
+    # Iterate over the identified entities in the text
     for ent in doc.ents:
+        if ent.label_ == "ORG":
+            # Apply the regex pattern to filter out company names
+            if company_pattern.search(ent.text):
+                companies.append(ent.text)
+    return companies
 def extract_colleges(text):
     doc = nlp(text)
     colleges = []
+    # Extended list of education-related keywords
     edu_keywords = ["university", "college", "institute", "school", "academy", "polytechnic", "faculty", "department", "center", "centre", "campus", "educational", "institute of technology"]
     for sent in doc.sents:
+        # Extract entities labeled as ORG and check if they contain education-related keywords
         edu_ents = [ent for ent in sent.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in edu_keywords)]
         for edu in edu_ents:
             colleges.append(edu.text)
+    return colleges
 def extract_years_of_experience(text):
     years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE)
         summary = extract_summary(doc)
         linkedin = extract_linkedin(text)
+        result = {
+            "Companies Worked For": companies,
+            "Colleges Attended": colleges,
+            "Years of Experience": years_of_experience,
+            "Phone Number": phone,
+            "Email ID": email,
+            "Summary": summary,
+            "LinkedIn ID": linkedin
+        }
+        return result
     except Exception as e:
         import traceback
+        return {"Error": f"An error occurred while parsing the resume: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"}
+# Create Gradio interface
 iface = gr.Interface(
     fn=parse_resume,
     inputs=gr.File(label="Upload Resume (PDF or DOCX)"),
+    outputs=gr.JSON(label="Extracted Information"),
     title="Advanced Resume Parser",
     description="Upload a resume in PDF or DOCX format to extract key information."
 )
+iface.launch(share=True)