Kabilash10 commited on
Commit
638377b
·
verified ·
1 Parent(s): ab0fab3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +481 -0
app.py CHANGED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ import re
4
+ import nltk
5
+ from nltk.tokenize import word_tokenize
6
+ from nltk.corpus import wordnet
7
+ import requests
8
+ from typing import Optional
9
+ import os
10
+ import pandas as pd
11
+ from sqlalchemy import create_engine, Column, Integer, String, Float
12
+ from sqlalchemy.ext.declarative import declarative_base
13
+ from sqlalchemy.orm import sessionmaker
14
+ import json
15
+ import openai # Import OpenAI
16
+
17
+ # Initialize NLTK resources
18
+ def download_nltk_resources():
19
+ resources = {
20
+ 'punkt': 'tokenizers/punkt',
21
+ 'averaged_perceptron_tagger': 'taggers/averaged_perceptron_tagger',
22
+ 'wordnet': 'corpora/wordnet',
23
+ 'stopwords': 'corpora/stopwords'
24
+ }
25
+ for package, resource in resources.items():
26
+ try:
27
+ nltk.data.find(resource)
28
+ except LookupError:
29
+ nltk.download(package)
30
+
31
+ download_nltk_resources()
32
+
33
+ # Ensure spaCy model is downloaded
34
+ import spacy
35
+ try:
36
+ nlp = spacy.load("en_core_web_sm")
37
+ except OSError:
38
+ spacy.cli.download("en_core_web_sm")
39
+ nlp = spacy.load("en_core_web_sm")
40
+
41
+ # Database setup
42
+ Base = declarative_base()
43
+
44
+ class ResumeScore(Base):
45
+ __tablename__ = 'resume_scores'
46
+ id = Column(Integer, primary_key=True)
47
+ resume_name = Column(String)
48
+ score = Column(Float)
49
+ skills = Column(String)
50
+ certifications = Column(String)
51
+ experience_years = Column(Float)
52
+ education_level = Column(String)
53
+ summary = Column(String)
54
+
55
+ # Create engine and session
56
+ engine = create_engine('sqlite:///resumes.db')
57
+ Base.metadata.create_all(engine)
58
+ Session = sessionmaker(bind=engine)
59
+ session = Session()
60
+
61
+ # Custom CSS to enhance UI
62
+ def set_custom_css():
63
+ st.markdown("""
64
+ <style>
65
+ .stProgress .st-bo {
66
+ background-color: #f0f2f6;
67
+ }
68
+ .stProgress .st-bp {
69
+ background: linear-gradient(to right, #4CAF50, #8BC34A);
70
+ }
71
+ .skill-tag {
72
+ display: inline-block;
73
+ padding: 5px 10px;
74
+ }
75
+ </style>
76
+ """, unsafe_allow_html=True)
77
+
78
+ def get_docparser_data(file, api_key, parser_id) -> Optional[dict]:
79
+ upload_url = f"https://api.docparser.com/v1/document/upload/{parser_id}"
80
+ auth = (api_key, '') # Use HTTP Basic Auth with the API key
81
+ files = {'file': file}
82
+ try:
83
+ # Upload the document
84
+ response = requests.post(upload_url, auth=auth, files=files)
85
+ response.raise_for_status()
86
+ document_id = response.json().get('id')
87
+
88
+ # Ensure document ID is valid
89
+ if not document_id:
90
+ st.error("Failed to retrieve document ID from Docparser.")
91
+ return None
92
+
93
+ # Fetch parsed data
94
+ result_url = f"https://api.docparser.com/v1/results/{parser_id}/{document_id}"
95
+ result_response = requests.get(result_url, auth=auth)
96
+ result_response.raise_for_status()
97
+ data = result_response.json()
98
+
99
+ # Check if the response is a list and handle accordingly
100
+ if isinstance(data, list) and len(data) > 0:
101
+ data = data[0] # Assuming you want the first result
102
+
103
+ return data
104
+ except requests.exceptions.HTTPError as http_err:
105
+ st.error(f"HTTP error occurred: {http_err}")
106
+ except Exception as e:
107
+ st.error(f"Error fetching data from Docparser: {e}")
108
+ return None
109
+
110
+ def get_openai_data(file_path: str, openai_key: str) -> Optional[dict]:
111
+ openai.api_key = openai_key
112
+ try:
113
+ with open(file_path, 'rb') as file:
114
+ file_content = file.read()
115
+ response = openai.Completion.create(
116
+ engine="text-davinci-003",
117
+ prompt=f"Extract and analyze the resume content: {file_content}",
118
+ max_tokens=1500
119
+ )
120
+ return response.choices[0].text
121
+ except Exception as e:
122
+ st.error(f"Error fetching data from OpenAI: {e}")
123
+ return None
124
+
125
+ def calculate_weighted_score(skills, certifications, experience_years, education_level, projects, skill_weight, certification_weight, experience_weight, education_weight, project_weight):
126
+ skill_score = min(len(skills) * 15, 100)
127
+ certification_score = min(len(certifications) * 20, 100)
128
+ experience_score = min(experience_years * 15, 100)
129
+ education_score = 100 if education_level else 0
130
+ project_score = min(len(projects) * 10, 100) # Assuming each project contributes 10 points
131
+
132
+ total_score = (
133
+ skill_score * skill_weight +
134
+ certification_score * certification_weight +
135
+ experience_score * experience_weight +
136
+ education_score * education_weight +
137
+ project_score * project_weight
138
+ )
139
+
140
+ return round(min(total_score, 100), 2)
141
+
142
+ def process_resume(file, job_description, filename, parser_choice, openai_key=None, api_key=None, parser_id=None, skill_weight=0.9, certification_weight=0.05, experience_weight=0.03, education_weight=0.02, project_weight=0.1):
143
+ try:
144
+ if parser_choice == "Docparser":
145
+ data = get_docparser_data(file, api_key, parser_id)
146
+ elif parser_choice == "OpenAI":
147
+ data = get_openai_data(file, openai_key)
148
+ else:
149
+ st.error("Invalid parser choice")
150
+ return None
151
+
152
+ if not data:
153
+ st.warning(f"Failed to extract data from the resume {filename}")
154
+ return None
155
+
156
+ # Extract fields from the response
157
+ personal_details = {
158
+ 'name': data.get('name', 'Unknown'),
159
+ 'email': data.get('email', 'Unknown'),
160
+ 'phone': data.get('phone', 'Unknown')
161
+ }
162
+ education = {
163
+ 'degree': data.get('degree', 'Not specified'),
164
+ 'institution': data.get('institution', 'Not specified'),
165
+ 'year': data.get('year', 'Not specified')
166
+ }
167
+ experience_years = data.get('experience_years', 0)
168
+
169
+ # Ensure certifications, skills, and projects are lists of strings
170
+ certifications = [cert if isinstance(cert, str) else str(cert) for cert in data.get('certifications', [])]
171
+ skills = [skill if isinstance(skill, str) else str(skill) for skill in data.get('skills', [])]
172
+ projects = [project if isinstance(project, str) else str(project) for project in data.get('projects', [])] # Assuming 'projects' is a key in the data
173
+ summary = data.get('summary', 'No summary available')
174
+
175
+ # Calculate weighted score
176
+ weighted_score = calculate_weighted_score(
177
+ skills, certifications, experience_years, education.get('degree', 'Not specified'), projects,
178
+ skill_weight, certification_weight, experience_weight, education_weight, project_weight
179
+ )
180
+
181
+ resume_name = filename or personal_details.get('name', 'Unknown')
182
+ skills_str = ', '.join(skills)
183
+ certifications_str = ', '.join(certifications)
184
+ projects_str = ', '.join(projects)
185
+
186
+ resume_score = ResumeScore(
187
+ resume_name=resume_name,
188
+ score=weighted_score,
189
+ skills=skills_str,
190
+ certifications=certifications_str,
191
+ experience_years=experience_years,
192
+ education_level=education.get('degree', 'Not specified'),
193
+ summary=summary
194
+ )
195
+ session.add(resume_score)
196
+ session.commit()
197
+
198
+ result = {
199
+ 'name': resume_name,
200
+ 'score': weighted_score,
201
+ 'personal_details': personal_details,
202
+ 'education': education,
203
+ 'experience': {'total_years': experience_years},
204
+ 'certifications': certifications,
205
+ 'skills': skills,
206
+ 'projects': projects, # Include projects in the result
207
+ 'summary': summary
208
+ }
209
+
210
+ return result
211
+ except Exception as e:
212
+ st.error(f"Error processing the resume {filename}: {e}")
213
+ session.rollback()
214
+ return None
215
+
216
+ def process_resumes(folder_path, job_description, parser_choice, openai_key=None, api_key=None, parser_id=None, skill_weight=0.9, certification_weight=0.05, experience_weight=0.03, education_weight=0.02, project_weight=0.1):
217
+ if not os.path.isdir(folder_path):
218
+ st.error("Invalid folder path")
219
+ return []
220
+
221
+ scores = []
222
+ processed_count = 0
223
+
224
+ try:
225
+ pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.pdf')]
226
+
227
+ if not pdf_files:
228
+ st.warning("No PDF files found in the folder")
229
+ return []
230
+
231
+ total_files = len(pdf_files)
232
+ progress_bar = st.progress(0)
233
+
234
+ for index, filename in enumerate(pdf_files):
235
+ file_path = os.path.join(folder_path, filename)
236
+ with open(file_path, 'rb') as file:
237
+ result = process_resume(file, job_description, filename, parser_choice, openai_key, api_key, parser_id, skill_weight, certification_weight, experience_weight, education_weight, project_weight)
238
+ if result:
239
+ scores.append(result)
240
+ processed_count += 1
241
+
242
+ progress = (index + 1) / total_files
243
+ progress_bar.progress(progress)
244
+
245
+ st.success(f"Successfully processed {processed_count} resumes")
246
+ return scores
247
+
248
+ except Exception as e:
249
+ st.error(f"Error processing resumes: {e}")
250
+ session.rollback()
251
+ return []
252
+
253
+ def display_results(result):
254
+ with st.expander(f"📄 {result.get('name', 'Unknown')} - Match: {result['score']}%"):
255
+ st.write(f"### Overall Match Score: {result['score']}%")
256
+ st.write("### Skills Found:")
257
+ if result['skills']:
258
+ for skill in result['skills']:
259
+ st.markdown(f"- {skill}")
260
+ else:
261
+ st.markdown("No skills found.")
262
+
263
+ st.write("### Certifications:")
264
+ if result['certifications']:
265
+ for cert in result['certifications']:
266
+ st.markdown(f"- {cert}")
267
+ else:
268
+ st.markdown("No certifications found.")
269
+
270
+ st.write(f"### Total Years of Experience: {result['experience'].get('total_years', 0)}")
271
+ st.write("### Education:")
272
+ degree = result['education'].get('degree', 'Not specified')
273
+ st.markdown(f"- Degree: {degree}")
274
+
275
+ if st.button(f"View Detailed Analysis ({result.get('name', 'Unknown')})", key=f"view_{result.get('name', 'default')}"):
276
+ st.write("#### Resume Summary:")
277
+ st.text(result['summary'])
278
+
279
+ def view_scores():
280
+ st.header("Stored Resume Scores")
281
+ resumes = session.query(ResumeScore).order_by(ResumeScore.score.desc()).all()
282
+
283
+ if resumes:
284
+ data = []
285
+ for idx, resume in enumerate(resumes, start=1):
286
+ try:
287
+ # Attempt to parse skills and certifications as JSON
288
+ skills = json.loads(resume.skills)
289
+ certifications = json.loads(resume.certifications)
290
+
291
+ # Extract values if they are in JSON format
292
+ skills_str = ', '.join([skill['key_0'] for skill in skills]) if isinstance(skills, list) else resume.skills
293
+ certifications_str = ', '.join([cert['key_0'] for cert in certifications]) if isinstance(certifications, list) else resume.certifications
294
+ except json.JSONDecodeError:
295
+ # If parsing fails, treat them as plain strings
296
+ skills_str = resume.skills
297
+ certifications_str = resume.certifications
298
+
299
+ data.append({
300
+ 'S.No': idx,
301
+ 'Name': resume.resume_name,
302
+ 'Score': resume.score,
303
+ 'Skills': skills_str,
304
+ 'Certifications': certifications_str,
305
+ 'Experience (Years)': resume.experience_years,
306
+ 'Education': resume.education_level,
307
+ 'Projects': resume.summary # Assuming projects are part of the summary or add a separate field if needed
308
+ })
309
+
310
+ df = pd.DataFrame(data)
311
+ df_display = df[['S.No', 'Name', 'Score', 'Skills', 'Certifications', 'Experience (Years)', 'Education', 'Projects']]
312
+
313
+ # Define a threshold for best-fit resumes
314
+ threshold = 50
315
+ best_fits = df[df['Score'] >= threshold]
316
+
317
+ # Display all resumes
318
+ st.subheader("All Resumes")
319
+ for index, row in df_display.iterrows():
320
+ st.write(f"**{row['S.No']}. {row['Name']}**")
321
+ st.write(f"Score: {row['Score']}%")
322
+ st.write(f"Skills: {row['Skills']}")
323
+ st.write(f"Certifications: {row['Certifications']}")
324
+ st.write(f"Experience: {row['Experience (Years)']} years")
325
+ st.write(f"Education: {row['Education']}")
326
+ st.write(f"Projects: {row['Projects']}")
327
+
328
+ col1, col2 = st.columns([1, 1])
329
+ with col1:
330
+ if st.button(f"View Detailed Analysis ({row['Name']})", key=f"view_{index}"):
331
+ st.write(f"## Analysis Report for {row['Name']}")
332
+ st.write(f"### Score: {row['Score']}%")
333
+ st.write(f"### Skills: {row['Skills']}")
334
+ st.write(f"### Certifications: {row['Certifications']}")
335
+ st.write(f"### Experience: {row['Experience (Years)']} years")
336
+ st.write(f"### Education: {row['Education']}")
337
+ st.write("### Projects:")
338
+ st.text(row['Projects'])
339
+ with col2:
340
+ if st.button(f"Delete {row['Name']}", key=f"delete_{index}"):
341
+ # Find the resume in the database and delete it
342
+ resume_to_delete = session.query(ResumeScore).filter_by(resume_name=row['Name']).first()
343
+ if resume_to_delete:
344
+ session.delete(resume_to_delete)
345
+ session.commit()
346
+ st.success(f"Deleted {row['Name']} from the database.")
347
+ st.experimental_set_query_params(refresh=True) # Use query params to trigger a rerun
348
+
349
+ # Display best-fit resumes
350
+ if not best_fits.empty:
351
+ st.subheader("Best Fit Resumes")
352
+ for index, row in best_fits.iterrows():
353
+ st.write(f"**{row['S.No']}. {row['Name']}**")
354
+ st.write(f"Score: {row['Score']}%")
355
+ st.write(f"Skills: {row['Skills']}")
356
+ st.write(f"Certifications: {row['Certifications']}")
357
+ st.write(f"Experience: {row['Experience (Years)']} years")
358
+ st.write(f"Education: {row['Education']}")
359
+ st.write(f"Projects: {row['Projects']}")
360
+
361
+ col1, col2 = st.columns([1, 1])
362
+ with col1:
363
+ if st.button(f"View Detailed Analysis ({row['Name']})", key=f"view_best_{index}"):
364
+ st.write(f"## Analysis Report for {row['Name']}")
365
+ st.write(f"### Score: {row['Score']}%")
366
+ st.write(f"### Skills: {row['Skills']}")
367
+ st.write(f"### Certifications: {row['Certifications']}")
368
+ st.write(f"### Experience: {row['Experience (Years)']} years")
369
+ st.write(f"### Education: {row['Education']}")
370
+ st.write("### Projects:")
371
+ st.text(row['Projects'])
372
+ with col2:
373
+ if st.button(f"Delete {row['Name']}", key=f"delete_best_{index}"):
374
+ # Find the resume in the database and delete it
375
+ resume_to_delete = session.query(ResumeScore).filter_by(resume_name=row['Name']).first()
376
+ if resume_to_delete:
377
+ session.delete(resume_to_delete)
378
+ session.commit()
379
+ st.success(f"Deleted {row['Name']} from the database.")
380
+ st.experimental_set_query_params(refresh=True) # Use query params to trigger a rerun
381
+ else:
382
+ st.write("No resume scores available.")
383
+
384
+ def main():
385
+ st.title("Resume Analyzer")
386
+ set_custom_css()
387
+
388
+ menu = ["Home", "View Scores"]
389
+ choice = st.sidebar.selectbox("Menu", menu)
390
+
391
+ if choice == "Home":
392
+ analysis_type = st.selectbox("Select Analysis Type:", ["Single Resume", "Folder Upload"])
393
+ method_choice = st.selectbox("Select Method:", ["Use LLM", "Use Field Extraction"])
394
+
395
+ openai_key = None # Initialize openai_key
396
+ if method_choice == "Use LLM":
397
+ openai_key = st.text_input("Enter OpenAI API Key:", type="password")
398
+ parser_choice = "OpenAI"
399
+ else:
400
+ parser_choice = "Docparser" # Only Docparser is available for field extraction
401
+ api_key = st.text_input("Enter Docparser API Key:", type="password")
402
+ parser_id = st.text_input("Enter Docparser Parser ID:")
403
+
404
+ job_description = st.text_area("Enter job description:", height=150, placeholder="Paste job description here...", key="job_desc")
405
+
406
+ # Configure weights
407
+ st.sidebar.header("Configure Weights")
408
+ skill_weight = st.sidebar.slider("Skill Weight", 0.0, 1.0, 0.9)
409
+ certification_weight = st.sidebar.slider("Certification Weight", 0.0, 1.0, 0.05)
410
+ experience_weight = st.sidebar.slider("Experience Weight", 0.0, 1.0, 0.03)
411
+ education_weight = st.sidebar.slider("Education Weight", 0.0, 1.0, 0.02)
412
+ project_weight = st.sidebar.slider("Project Weight", 0.0, 1.0, 0.1) # New slider for project weight
413
+
414
+ if analysis_type == "Single Resume":
415
+ uploaded_file = st.file_uploader("Upload a resume PDF file", type="pdf")
416
+
417
+ if st.button("Analyze Resume"):
418
+ if not uploaded_file:
419
+ st.error("Please upload a resume PDF file")
420
+ return
421
+ if not job_description:
422
+ st.error("Please enter a job description")
423
+ return
424
+ if method_choice == "Use LLM" and not openai_key:
425
+ st.error("Please enter the OpenAI API key")
426
+ return
427
+ if method_choice == "Use Field Extraction" and (not api_key or not parser_id):
428
+ st.error("Please enter the Docparser API key and Parser ID")
429
+ return
430
+ with st.spinner("Processing resume..."):
431
+ result = process_resume(uploaded_file, job_description, uploaded_file.name, parser_choice, openai_key, api_key, parser_id, skill_weight, certification_weight, experience_weight, education_weight, project_weight)
432
+ if result:
433
+ st.success("Analysis complete!")
434
+ display_results(result)
435
+ else:
436
+ st.warning("Failed to process the resume.")
437
+
438
+ elif analysis_type == "Folder Upload":
439
+ folder_path = st.text_input("Resume folder path:", placeholder="e.g. C:/Users/username/resumes")
440
+
441
+ if st.button("Analyze Resumes"):
442
+ if not folder_path:
443
+ st.error("Please enter the folder path containing resumes")
444
+ return
445
+ if not job_description:
446
+ st.error("Please enter a job description")
447
+ return
448
+ if method_choice == "Use LLM" and not openai_key:
449
+ st.error("Please enter the OpenAI API key")
450
+ return
451
+ if method_choice == "Use Field Extraction" and (not api_key or not parser_id):
452
+ st.error("Please enter the Docparser API key and Parser ID")
453
+ return
454
+ with st.spinner("Processing resumes..."):
455
+ scores = process_resumes(folder_path, job_description, parser_choice, openai_key, api_key, parser_id, skill_weight, certification_weight, experience_weight, education_weight, project_weight)
456
+ if scores:
457
+ st.success("Analysis complete!")
458
+ for result in scores:
459
+ display_results(result)
460
+ else:
461
+ st.warning("No valid resumes found to process")
462
+
463
+ with st.expander("ℹ️ How to use"):
464
+ st.markdown("""
465
+ 1. Select the analysis type: Single Resume or Folder Upload.
466
+ 2. Choose the method: Use LLM or Use Field Extraction.
467
+ 3. If using LLM, enter the OpenAI API key.
468
+ 4. If using Field Extraction, enter the Docparser API key and Parser ID.
469
+ 5. Upload a resume PDF file or enter the path to a folder containing resumes.
470
+ 6. Paste the job description.
471
+ 7. Configure the weights for skills, certifications, experience, education, and projects.
472
+ 8. Click 'Analyze' to start processing.
473
+ 9. View the match score and extracted information.
474
+ 10. Click 'View Detailed Analysis' to see the summary and more details.
475
+ """)
476
+
477
+ elif choice == "View Scores":
478
+ view_scores()
479
+
480
+ if __name__ == "__main__":
481
+ main()