HARISH20205 commited on
Commit
ebed33b
·
0 Parent(s):
.dockerignore ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual environments
2
+ myenv/
3
+ venv/
4
+ env/
5
+
6
+ # Data directories
7
+ Data/
8
+ data/
9
+
10
+ # Test directories
11
+ test/
12
+ tests/
13
+
14
+ # IDE settings
15
+ .vscode/
16
+ .idea/
17
+
18
+ # Python cache files
19
+ __pycache__/
20
+ *.py[cod]
21
+ *$py.class
22
+ *.so
23
+ .Python
24
+ .coverage
25
+ htmlcov/
26
+
27
+ # Logs
28
+ *.log
29
+ logs/
30
+
31
+ # Local configuration
32
+ .env.local
33
+ .env.development
34
+ .env.test
35
+
36
+ # Temporary files
37
+ tmp/
38
+ temp/
39
+
40
+ # Git related
41
+ .git/
42
+ .gitignore
43
+
44
+ # Docker related
45
+ .docker/
46
+
47
+ # System files
48
+ .DS_Store
49
+ Thumbs.db
.gitignore ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ *__pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ *.egg-info/
19
+ .installed.cfg
20
+ *.egg
21
+
22
+ # Django
23
+ *.log
24
+ local_settings.py
25
+ db.sqlite3
26
+ db.sqlite3-journal
27
+ media
28
+
29
+ # Environment variables
30
+ .env
31
+ **/.env
32
+
33
+ .env.local
34
+
35
+ # Distribution / packaging
36
+ .Python
37
+ build/
38
+ develop-eggs/
39
+ dist/
40
+ downloads/
41
+ eggs/
42
+ .eggs/
43
+ lib/
44
+ lib64/
45
+ parts/
46
+ sdist/
47
+ var/
48
+ *.egg-info/
49
+ .installed.cfg
50
+ *.egg
51
+
52
+ # IDE specific files
53
+ .idea/
54
+ .vscode/
55
+ *.swp
56
+ *.swo
57
+
58
+ # OS specific files
59
+ .DS_Store
60
+ Thumbs.db
61
+
62
+ myenv
63
+ *test/
.hfignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore Python virtual environment
2
+
3
+ myenv/
4
+
5
+ # Ignore build directories
6
+
7
+ __pycache__/
8
+ build/
9
+ dist/
10
+
11
+ # Ignore sensitive files
12
+
13
+ **/.env
14
+
15
+ **/.parquet
16
+
17
+ secrets.json
18
+
19
+ migrations/
20
+ Data/
Data/Resume.pdf ADDED
Binary file (107 kB). View file
 
Data/Resumetest.pdf ADDED
The diff for this file is too large to render. See raw diff
 
Data/company.json ADDED
The diff for this file is too large to render. See raw diff
 
Data/output.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"total_score": 70.5, "detailed_scores": {"skills_match": 80.0, "experience_relevance": 65.0, "education_relevance": 80, "overall_formatting": 100}, "feedback": {"strengths": ["Strong skills match", "Strong education relevance", "Strong overall formatting"], "improvements": []}, "detailed_feedback": {"skills_match": {"matching_elements": ["Python"], "missing_elements": ["Django", "REST APIs"], "explanation": "The candidate demonstrates proficiency in Python, which is a key requirement. However, the candidate lacks explicit mention of Django and REST APIs, which are crucial for the job description. While FastAPI and Flask are related to REST APIs, they are not a direct substitute for experience with RESTful principles. The other listed skills are not directly relevant to the job description."}, "experience_relevance": {"matching_elements": ["Experience with Python in the context of AI model development.", "Experience in developing systems that require efficient processing."], "missing_elements": ["Experience with Django.", "Experience with REST APIs.", "Experience in general software engineering outside of AI/ML."], "explanation": "The candidate has relevant experience with Python, which is a core requirement for the software engineer role. The experience developing a speech-to-text summarization system indicates an understanding of efficient processing, which can be valuable. However, the experience lacks direct involvement with Django and REST APIs, which are key technologies mentioned in the job description. The role is for a software engineer, and the work experience is primarily focused on AI/ML, so the relevance is moderate."}, "education_relevance": {"matching_elements": ["Strong GPA: 8.46"], "missing_elements": [], "explanation": "Education assessment completed"}, "overall_formatting": {"matching_elements": ["name", "email", "phone"], "missing_elements": [], "explanation": "Format assessment completed"}}}
2
+ {"ats_score": 72.5, "detailed_scores": {"skills_match": 80.0, "experience_relevance": 65.0, "education_relevance": 90, "overall_formatting": 100, "extra_sections": 0.0}, "feedback": {"strengths": ["Strong skills match", "Strong education relevance", "Strong overall formatting"], "improvements": ["Improve extra sections"]}, "detailed_feedback": {"skills_match": {"matching_elements": ["Python"], "missing_elements": ["Django", "REST APIs"], "explanation": "The candidate demonstrates proficiency in Python, which is a core requirement. However, the job description specifically mentions Django and REST APIs, which are not explicitly listed in the candidate's skills. While the candidate has a strong foundation in programming and related technologies, the lack of Django and REST API experience lowers the overall score. Knowledge of frameworks such as FastAPI or Flask may be relevant."}, "experience_relevance": {"matching_elements": ["Python (implied through the use of NLP models and neural network development)", "Experience in developing and improving system performance"], "missing_elements": ["Django", "REST APIs", "Software Engineering experience"], "explanation": "The candidate's experience shows some relevance to the software engineer role. The use of Python for NLP model development and neural networks is a positive indicator. The project focusing on improving system performance is also relevant. However, there is no direct mention of Django or REST APIs, which are key requirements. The internship is relatively short (5 months), and the experience is primarily in research and development rather than software engineering, which lowers the score."}, "education_relevance": {"matching_elements": ["Strong GPA: 8.46", "MTECH (Integrated) in Computer Science and Engineering from Vellore Institute of Technology (VIT) Vellore, India"], "missing_elements": [], "explanation": "Education assessment completed"}, "overall_formatting": {"matching_elements": ["name", "email", "phone"], "missing_elements": [], "explanation": "Format assessment completed"}, "extra_sections": {"matching_elements": [], "missing_elements": ["Awards And Achievements", "Volunteer Experience", "Hobbies And Interests", "Publications", "Conferences And Presentations", "Patents", "Professional Affiliations", "Portfolio Links", "Summary Or Objective"], "explanation": "Additional sections assessment completed"}}}
3
+
4
+ {
5
+ "user_id": 12345,
6
+ "user_name": "John Doe",
7
+ "similarity": 0.23571285605430603,
8
+ "ats_score": {
9
+ "ats_score": 70.5,
10
+ "detailed_scores": {
11
+ "skills_match": 80.0,
12
+ "experience_relevance": 65.0,
13
+ "education_relevance": 80,
14
+ "overall_formatting": 100
15
+ },
16
+ "feedback": {
17
+ "strengths": [
18
+ "Strong skills match",
19
+ "Strong education relevance",
20
+ "Strong overall formatting"
21
+ ],
22
+ "improvements": []
23
+ },
24
+ "detailed_feedback": {
25
+ "skills_match": {
26
+ "matching_elements": [
27
+ "Python"
28
+ ],
29
+ "missing_elements": [
30
+ "Django",
31
+ "REST APIs"
32
+ ],
33
+ "explanation": "The candidate possesses Python skills, which is a core requirement. Django and REST APIs are missing but are crucial for the job. The other skills, while valuable in general software engineering, are not directly relevant to the specific requirements outlined in the job description, hence the score reflects the partial match."
34
+ },
35
+ "experience_relevance": {
36
+ "matching_elements": [
37
+ "Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization",
38
+ "Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART",
39
+ "Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core machine learning concepts like gradient descent and one-hot encoding"
40
+ ],
41
+ "missing_elements": [
42
+ "Experience with Python",
43
+ "Experience with Django",
44
+ "Experience with REST APIs",
45
+ "Software Engineering specific projects"
46
+ ],
47
+ "explanation": "The work experience demonstrates a strong foundation in AI, NLP, and machine learning, which are relevant to software engineering. The intern developed a speech-to-text summarization system and built a neural network, showcasing practical skills. However, the description lacks explicit mention of Python, Django, or REST APIs, which are key requirements for the software engineer role. The experience is relevant but not a direct match for the specific technologies mentioned in the job description. The duration of the internship (Jun 2024 - Oct 2024) is a reasonable length, indicating a solid commitment to the role."
48
+ },
49
+ "education_relevance": {
50
+ "matching_elements": [
51
+ "Strong GPA: 8.46"
52
+ ],
53
+ "missing_elements": [],
54
+ "explanation": "Education assessment completed"
55
+ },
56
+ "overall_formatting": {
57
+ "matching_elements": [
58
+ "name",
59
+ "email",
60
+ "phone"
61
+ ],
62
+ "missing_elements": [],
63
+ "explanation": "Format assessment completed"
64
+ }
65
+ }
66
+ },
67
+ "structured_data": {
68
+ "name": "Harish KB",
69
+ "email": "[email protected]",
70
+ "phone": "8248052926",
71
+ "skills": [
72
+ "Python",
73
+ "Java",
74
+ "C/C++",
75
+ "Supervised learning",
76
+ "unsupervised learning",
77
+ "NLP",
78
+ "LLMs",
79
+ "GitHub",
80
+ "Docker",
81
+ "Linux",
82
+ "AWS",
83
+ "Hugging Face",
84
+ "OpenCV",
85
+ "YOLO",
86
+ "FastAPI",
87
+ "Flask",
88
+ "MongoDB",
89
+ "Firebase"
90
+ ],
91
+ "experience": [
92
+ {
93
+ "title": "AI Research and Development Intern (Remote)",
94
+ "company": "eBramha Techworks Private Limited",
95
+ "dates": "Jun 2024 - Oct 2024",
96
+ "description": "- Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization, enhancing processing speed and efficiency while significantly reducing overall processing time and improving system performance.\n- Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART, contributing to the development of effective solutions for tasks like summarization and language understanding.\n- Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core machine learning concepts like gradient descent and one-hot encoding."
97
+ }
98
+ ],
99
+ "education": [
100
+ {
101
+ "institution": "Vellore Institute of Technology (VIT), Vellore, India",
102
+ "degree": "MTECH (Integrated) in Computer Science and Engineering",
103
+ "dates": "Aug 2022 - July 2027",
104
+ "gpa": "8.46"
105
+ }
106
+ ],
107
+ "certifications": [
108
+ "Coursera: Supervised Machine Learning: Regression and Classification",
109
+ "Coursera: Advanced Learning Algorithms",
110
+ "Coursera: Generative AI with Large Language Models"
111
+ ],
112
+ "areas_of_interest": [
113
+ "Machine Learning and AI",
114
+ "Full Stack Development",
115
+ "Cloud Computing and DevOps Practices"
116
+ ]
117
+ },
118
+ "markdown_format": "# Harish KB\n\n8248052926 | [email protected]\n\n## Education\n\nVellore Institute of Technology (VIT), Vellore, India\nMTECH (Integrated) in Computer Science and Engineering (CGPA: 8.46)\nAug 2022 - July 2027\n\n## Experience\n\n**AI Research and Development Intern (Remote)**\neBramha Techworks Private Limited\nJun 2024 - Oct 2024\n\n* Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization, enhancing processing speed and efficiency while significantly reducing overall processing time and improving system performance.\n* Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART, contributing to the development of effective solutions for tasks like summarization and language understanding.\n* Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core machine learning concepts like gradient descent and one-hot encoding.\n\n## Projects\n\n**VerbiSense: Interactive Document Retrieval System**\nLink\n\n* Built the VerbiSense backend with FastAPI, optimizing document uploads, query processing, and API performance for real-time interactions with the React frontend.\n* Integrated Retrieval-Augmented Generation (RAG) for improved document retrieval and response generation.\n* Applied PyTorch models for advanced NLP tasks like semantic understanding and context-based querying.\n\n**Speech-to-Text Summarization**\n\n* Developed a Python script that improved audio transcription accuracy by 30% and reduced post-processing time by 35%.\n* Designed and implemented the frontend interface to provide a seamless, user-friendly experience for individuals interacting with the speech-to-text summarization system.\n\n## Technical Skills\n\n**Languages**: Python, Java, C/C++\n**Machine Learning**: Supervised learning, unsupervised learning, NLP, LLMs\n**Tools**: GitHub, Docker, Linux, AWS, Hugging Face\n**Computer Vision**: OpenCV, YOLO\n**Backend**: FastAPI, Flask, MongoDB, Firebase\n\n## Areas of Interest\n\n* Machine Learning and AI\n* Full Stack Development\n* Cloud Computing and DevOps Practices\n\n## Certifications\n\n* Coursera: Supervised Machine Learning: Regression and Classification\n* Coursera: Advanced Learning Algorithms\n* Coursera: Generative AI with Large Language Models."
119
+ }
Data/output.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Harish KB
2
+ „ 8248052926
3
4
+ ï Harish KB
5
+ § HARISH20205
6
+ Education
7
+ Vellore Institute of Technology (VIT)
8
+ Vellore, India
9
+ MTECH (Integrated) in Computer Science and Engineering(CGPA: 8.46)
10
+ Aug 2022– July 2027
11
+ Experience
12
+ AI Research and Development Intern (Remote)
13
+ Jun 2024 – Oct 2024
14
+ eBramha Techworks Private Limited
15
+ • Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for
16
+ summarization, enhancing processing speed and efficiency while significantly reducing overall processing
17
+ time and improving system performance.
18
+ • Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART,
19
+ contributing to the development of effective solutions for tasks like summarization and language
20
+ understanding.
21
+ • Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core
22
+ machine learning concepts like gradient descent and one-hot encoding.
23
+ Projects
24
+ VerbiSense: Interactive Document Retrieval System - Link
25
+ • Built the VerbiSense backend with FastAPI, optimizing document uploads, query processing, and API
26
+ performance for real-time interactions with the React frontend.
27
+ • Integrated Retrieval-Augmented Generation (RAG) for improved document retrieval and response
28
+ generation.
29
+ • Applied PyTorch models for advanced NLP tasks like semantic understanding and context-based querying.
30
+ Speech-to-Text Summarization
31
+ • Developed a Python script that improved audio transcription accuracy by 30% and reduced
32
+ post-processing time by 35%.
33
+ • Designed and implemented the frontend interface to provide a seamless, user-friendly experience for
34
+ individuals interacting with the speech-to-text summarization system.
35
+ Technical Skills
36
+ Languages: Python, Java, C/C++
37
+ Machine Learning: Supervised learning, unsupervised learning, NLP, LLMs
38
+ Tools: GitHub, Docker, Linux, AWS, Hugging Face
39
+ Computer Vision: OpenCV, YOLO
40
+ Backend: FastAPI, Flask, MongoDB, Firebase
41
+ Areas of Interest
42
+ • Machine Learning and AI
43
+ • Full Stack Development
44
+ • Cloud Computing and DevOps Practices
45
+ Certifications
46
+ • Coursera: Supervised Machine Learning: Regression and Classification
47
+ • Coursera: Advanced Learning Algorithms
48
+ • Coursera: Generative AI with Large Language Models
49
+
Data/outputjson.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "sections": [
3
+ {
4
+ "page_content": ""
5
+ }
6
+ ]
7
+ }
Data/resumen.pdf ADDED
Binary file (201 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image with Python optimized for Hugging Face Spaces
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+ ENV PORT=7860
8
+ ENV PYTHONPATH=/app
9
+
10
+ # Set working directory
11
+ WORKDIR /app
12
+
13
+ # Install essential system dependencies including build-essential
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ libgl1-mesa-glx \
16
+ libglib2.0-0 \
17
+ tesseract-ocr \
18
+ poppler-utils \
19
+ build-essential \
20
+ python3-dev \
21
+ curl \
22
+ && apt-get clean \
23
+ && rm -rf /var/lib/apt/lists/*
24
+
25
+ # Copy requirements first for better caching
26
+ COPY requirements.txt /app/
27
+ RUN pip install --no-cache-dir -r requirements.txt
28
+
29
+ # Copy only necessary application code
30
+ COPY app.py /app/
31
+ COPY Process/ /app/Process/
32
+ COPY ResumeATS/ /app/ResumeATS/
33
+ COPY manage.py /app/
34
+
35
+ # Create a non-root user for security
36
+ RUN adduser --disabled-password --gecos "" appuser
37
+ RUN chown -R appuser:appuser /app
38
+ USER appuser
39
+
40
+ # Expose the port Hugging Face Spaces will use
41
+ EXPOSE 7860
42
+
43
+ # Healthcheck to verify the service is running
44
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
45
+ CMD curl -f http://localhost:${PORT}/ || exit 1
46
+
47
+ # Run app.py as required for Hugging Face Spaces
48
+ CMD ["python", "app.py"]
Process/__init__.py ADDED
File without changes
Process/admin.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.contrib import admin
2
+
3
+ # Register your models here.
Process/apps.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class ApiConfig(AppConfig):
5
+ default_auto_field = 'django.db.models.BigAutoField'
6
+ name = 'Process'
Process/ats_parser.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from .response import get_response
3
+ from pydantic import BaseModel, TypeAdapter
4
+ import json
5
+
6
+ class Section:
7
+ name: str
8
+ email: str
9
+ phone: str
10
+ skills: str
11
+ experience: str
12
+ education: str
13
+ certifications: str
14
+ areas_of_interest: str
15
+
16
+ def deep_get(dictionary, keys, default=None):
17
+ for key in keys:
18
+ if isinstance(dictionary, dict):
19
+ dictionary = dictionary.get(key, {})
20
+ else:
21
+ return default
22
+ return dictionary if dictionary != {} else default
23
+
24
+
25
+
26
+ def extract_resume_details(resume: str):
27
+ print("Innnnnnnnnnnnnnnnnnnnnnnnnnnn")
28
+ """
29
+ This function processes a given resume text to:
30
+ 1. Extract structured data into predefined fields.
31
+
32
+
33
+ Parameters:
34
+ resume (str): The raw text of the resume.
35
+
36
+ Returns:
37
+ JSON: A JSON containing the structured data in JSON format.
38
+ """
39
+
40
+ system_ins = """Analyze the provided resume and perform the following tasks:
41
+
42
+ 1. Extract the resume's content into a structured format under the following fields:
43
+
44
+ {
45
+ "structured_data":{
46
+ "name": None,
47
+ "email": None,
48
+ "github": None,
49
+ "phone": None,
50
+ "skills": None,
51
+ "experience": None,
52
+ "education": None,
53
+ "certifications": None,
54
+ "areas_of_interest": None,
55
+ "projects": None,
56
+ "languages": None,
57
+ "awards_and_achievements": None,
58
+ "volunteer_experience": None,
59
+ "hobbies_and_interests": None,
60
+ "publications": None,
61
+ "conferences_and_presentations": None,
62
+ "patents": None,
63
+ "professional_affiliations": None,
64
+ "portfolio_links": None,
65
+ "summary_or_objective": None
66
+ }
67
+ }
68
+
69
+ - Provide this output in JSON format under the key "structured_data".
70
+ - If a field is missing or cannot be determined, set its value to None.
71
+ """
72
+ try:
73
+ combined_output = get_response(prompt=resume, task=system_ins)
74
+ print("Before parsing st dataa to JSON",combined_output)
75
+
76
+ result = json.loads(combined_output)
77
+ print("after st data json",combined_output)
78
+
79
+ structured_data = result["structured_data"]
80
+ print(structured_data)
81
+
82
+ return structured_data
83
+ except:
84
+ return {"structured_data":"Failed to Get Due to Improper Json Data"}
85
+ # resume = "Harish KB 8248052926 # [email protected] i Harish KB HARISH20205 Education Vellore Institute of Technology (VIT) Vellore, India MTECH (Integrated) in Computer Science and Engineering(CGPA: 8.46) Aug 2022 July 2027 Experience AI Research and Development Intern (Remote) Jun 2024 Oct 2024 eBramha Techworks Private Limited - Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization, enhancing processing speed and efficiency while significantly reducing overall processing time and improving system performance. - Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART, contributing to the development of effective solutions for tasks like summarization and language understanding. - Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core machine learning concepts like gradient descent and one-hot encoding. Projects VerbiSense: Interactive Document Retrieval System - Link - Built the VerbiSense backend with FastAPI, optimizing document uploads, query processing, and API performance for real-time interactions with the React frontend. - Integrated Retrieval-Augmented Generation (RAG) for improved document retrieval and response generation. - Applied PyTorch models for advanced NLP tasks like semantic understanding and context-based querying. Speech-to-Text Summarization - Developed a Python script that improved audio transcription accuracy by 30% and reduced post-processing time by 35%. - Designed and implemented the frontend interface to provide a seamless, user-friendly experience for individuals interacting with the speech-to-text summarization system. Technical Skills Languages: Python, Java, C/C++ Machine Learning: Supervised learning, unsupervised learning, NLP, LLMs Tools: GitHub, Docker, Linux, AWS, Hugging Face Computer Vision: OpenCV, YOLO Backend: FastAPI, Flask, MongoDB, Firebase Areas of Interest - Machine Learning and AI - Full Stack Development - Cloud Computing and DevOps Practices Certifications - Coursera: Supervised Machine Learning: Regression and Classification - Coursera: Advanced Learning Algorithms - Coursera: Generative AI with Large Language Models."
86
+
87
+ # print(extract_resume_details(resume))
Process/change.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.http import JsonResponse
2
+ from django.views.decorators.csrf import csrf_exempt
3
+ import json
4
+ import logging
5
+ import concurrent.futures
6
+
7
+ from .response import get_response
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+ @csrf_exempt
12
+ def process_change(request):
13
+ if request.method == "POST":
14
+ try:
15
+ # Parse JSON only once
16
+ data = json.loads(request.body)
17
+
18
+ user_id = data.get('user_id')
19
+ prompt = data.get("prompt")
20
+ content = data.get('content')
21
+ section = data.get("section")
22
+ job_description = data.get('job_description')
23
+
24
+ if not all([user_id, prompt, content]):
25
+ return JsonResponse({'error': 'Missing required fields'}, status=400)
26
+
27
+ # Customize processing approach based on section
28
+ section_specific_instruction = ""
29
+ if section == "skills" or section == "experience" or section == "projects":
30
+ section_specific_instruction = "Format achievements using the X-Y-Z method (e.g., 'Accomplished X as measured by Y, by doing Z'). Provide at least one compelling example that demonstrates measurable impact."
31
+ else:
32
+ section_specific_instruction = "Incorporate relevant keywords from the job description while avoiding generic buzzwords. Focus on specificity and concrete details that align with ATS screening requirements."
33
+
34
+ combined_prompt = f"Content: {content}\nJob Description: {job_description}\nTask: {prompt}"
35
+
36
+ system_instruction = """As an ATS resume optimizer, modify the content to match the job requirements while preserving truthfulness. Use the X-Y-Z method to quantify achievements where possible (e.g., "Accomplished X as measured by Y, by doing Z"). Format your response as a clean text without any prefixes or explanations. Do not include any JSON formatting in your actual content modification."""
37
+
38
+ # Combine system_instruction with section_specific_instruction
39
+ combined_system_instruction = f"{system_instruction} {section_specific_instruction}"
40
+
41
+ modified_content = get_response(combined_prompt, combined_system_instruction)
42
+
43
+ return JsonResponse({
44
+ 'user_id': user_id,
45
+ 'modified_content': modified_content
46
+ }, status=200)
47
+
48
+ except json.JSONDecodeError:
49
+ return JsonResponse({'error': 'Invalid JSON format'}, status=400)
50
+ except Exception as e:
51
+ logger.error(f"Error in process_change: {str(e)}")
52
+ return JsonResponse({'error': 'Processing error'}, status=500)
53
+ else:
54
+ return JsonResponse({'message': 'Only POST requests are allowed'}, status=405)
Process/extract.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import pytesseract
3
+ from PIL import Image
4
+ import io
5
+
6
+ def extract_text_from_pdf(file_path):
7
+ text = ""
8
+ doc = fitz.open(file_path)
9
+
10
+ for page_num in range(len(doc)):
11
+ page = doc.load_page(page_num)
12
+ # Try to extract text
13
+ page_text = page.get_text()
14
+
15
+ if page_text.strip(): # If text is found
16
+ text += page_text
17
+ else: # If no text, use OCR
18
+ pix = page.get_pixmap()
19
+ img = Image.open(io.BytesIO(pix.tobytes("png")))
20
+ ocr_text = pytesseract.image_to_string(img)
21
+ text += ocr_text
22
+
23
+ return text
24
+
25
+ # file_path = "../Data/resumen.pdf"
26
+ # text = extract_text_from_pdf(file_path)
27
+ # print(text)
Process/migrations/0001_initial.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 5.1.5 on 2025-02-05 14:15
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ initial = True
9
+
10
+ dependencies = [
11
+ ]
12
+
13
+ operations = [
14
+ migrations.CreateModel(
15
+ name='EndPoint',
16
+ fields=[
17
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18
+ ('user_name', models.TextField()),
19
+ ('user_id', models.IntegerField()),
20
+ ('resume', models.TextField()),
21
+ ('prompt', models.TextField()),
22
+ ],
23
+ ),
24
+ ]
Process/migrations/0002_endpoint_time.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 5.1.5 on 2025-02-05 14:25
2
+
3
+ import django.utils.timezone
4
+ from django.db import migrations, models
5
+
6
+
7
+ class Migration(migrations.Migration):
8
+
9
+ dependencies = [
10
+ ('Process', '0001_initial'),
11
+ ]
12
+
13
+ operations = [
14
+ migrations.AddField(
15
+ model_name='endpoint',
16
+ name='time',
17
+ field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
18
+ preserve_default=False,
19
+ ),
20
+ ]
Process/migrations/0003_rename_prompt_endpoint_job_description.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 5.1.5 on 2025-02-13 14:40
2
+
3
+ from django.db import migrations
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('Process', '0002_endpoint_time'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.RenameField(
14
+ model_name='endpoint',
15
+ old_name='prompt',
16
+ new_name='job_description',
17
+ ),
18
+ ]
Process/migrations/__init__.py ADDED
File without changes
Process/models.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import models
2
+
3
+ # Create your models here.
4
+
5
+ class EndPoint(models.Model):
6
+ user_id = models.IntegerField()
7
+ resume = models.TextField()
8
+ job_description = models.TextField()
9
+ time = models.DateTimeField(auto_now_add=True)
10
+
11
+ def __str__(self):
12
+ return {
13
+ 'user_id':self.user_id,
14
+ 'resume':self.resume,
15
+ 'job_description':self.job_description,
16
+ 'time':self.time
17
+ }
18
+
19
+
Process/output.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "user_id": 12345,
3
+ "similarity": 0.23571285605430603,
4
+ "ats_score": {
5
+ "ats_score": 68.5,
6
+ "detailed_scores": {
7
+ "skills_match": 80.0,
8
+ "experience_relevance": 65.0,
9
+ "education_relevance": 70,
10
+ "overall_formatting": 100
11
+ },
12
+ "feedback": {
13
+ "strengths": ["Strong skills match", "Strong overall formatting"],
14
+ "improvements": []
15
+ },
16
+ "detailed_feedback": {
17
+ "skills_match": {
18
+ "matching_elements": ["Python"],
19
+ "missing_elements": ["Django", "REST APIs"],
20
+ "explanation": "The candidate possesses Python skills, which is a core requirement. However, they lack explicit mention of Django and REST APIs, which are essential for the job. Other skills, while valuable in general software engineering, are not directly relevant to the specified job description focusing on Python, Django, and REST APIs."
21
+ },
22
+ "experience_relevance": {
23
+ "matching_elements": [
24
+ "Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization",
25
+ "Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART"
26
+ ],
27
+ "missing_elements": [
28
+ "Experience with Django framework",
29
+ "Experience with REST APIs",
30
+ "Software engineering experience outside of internship"
31
+ ],
32
+ "explanation": "The work experience demonstrates some relevance to software engineering through the development of a speech-to-text summarization system and research on NLP models. These indicate programming and problem-solving skills, which are valuable in software engineering. The implementation of a neural network from scratch further highlights programming capabilities. However, the experience is limited to an internship, and there's no explicit mention of Python, Django, or REST APIs, which are key requirements for the target job description. The score reflects the partial relevance due to general programming and model development experience, but the lack of specific skills and professional experience lowers the score."
33
+ },
34
+ "education_relevance": {
35
+ "matching_elements": [],
36
+ "missing_elements": [],
37
+ "explanation": "Education assessment completed"
38
+ },
39
+ "overall_formatting": {
40
+ "matching_elements": ["name", "email", "phone"],
41
+ "missing_elements": [],
42
+ "explanation": "Format assessment completed"
43
+ }
44
+ }
45
+ },
46
+ "structured_data": {
47
+ "name": "Harish KB",
48
+ "email": "[email protected]",
49
+ "phone": "8248052926",
50
+ "skills": [
51
+ "Python",
52
+ "Java",
53
+ "C/C++",
54
+ "Supervised learning",
55
+ "Unsupervised learning",
56
+ "NLP",
57
+ "LLMs",
58
+ "GitHub",
59
+ "Docker",
60
+ "Linux",
61
+ "AWS",
62
+ "Hugging Face",
63
+ "OpenCV",
64
+ "YOLO",
65
+ "FastAPI",
66
+ "Flask",
67
+ "MongoDB",
68
+ "Firebase"
69
+ ],
70
+ "experience": [
71
+ {
72
+ "title": "AI Research and Development Intern (Remote)",
73
+ "company": "eBramha Techworks Private Limited",
74
+ "start_date": "Jun 2024",
75
+ "end_date": "Oct 2024",
76
+ "description": "- Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization, enhancing processing speed and efficiency while significantly reducing overall processing time and improving system performance.\n- Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART, contributing to the development of effective solutions for tasks like summarization and language understanding.\n- Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core machine learning concepts like gradient descent and one-hot encoding."
77
+ }
78
+ ],
79
+ "education": [
80
+ {
81
+ "institution": "Vellore Institute of Technology (VIT)",
82
+ "location": "Vellore, India",
83
+ "degree": "MTECH (Integrated) in Computer Science and Engineering",
84
+ "graduation_date": "July 2027",
85
+ "start_date": "Aug 2022",
86
+ "cgpa": "8.46"
87
+ }
88
+ ],
89
+ "certifications": [
90
+ "Coursera: Supervised Machine Learning: Regression and Classification",
91
+ "Coursera: Advanced Learning Algorithms",
92
+ "Coursera: Generative AI with Large Language Models"
93
+ ],
94
+ "areas_of_interest": [
95
+ "Machine Learning and AI",
96
+ "Full Stack Development",
97
+ "Cloud Computing and DevOps Practices"
98
+ ],
99
+ "projects": [
100
+ {
101
+ "name": "VerbiSense: Interactive Document Retrieval System",
102
+ "description": "- Built the VerbiSense backend with FastAPI, optimizing document uploads, query processing, and API performance for real-time interactions with the React frontend.\n- Integrated Retrieval-Augmented Generation (RAG) for improved document retrieval and response generation.\n- Applied PyTorch models for advanced NLP tasks like semantic understanding and context-based querying."
103
+ },
104
+ {
105
+ "name": "Speech-to-Text Summarization",
106
+ "description": "- Developed a Python script that improved audio transcription accuracy by 30% and reduced post-processing time by 35%.\n- Designed and implemented the frontend interface to provide a seamless, user-friendly experience for individuals interacting with the speech-to-text summarization system."
107
+ }
108
+ ],
109
+ "languages": ["Python", "Java", "C/C++"],
110
+ "awards_and_achievements": null,
111
+ "volunteer_experience": null,
112
+ "hobbies_and_interests": null,
113
+ "publications": null,
114
+ "conferences_and_presentations": null,
115
+ "patents": null,
116
+ "professional_affiliations": null,
117
+ "portfolio_links": null,
118
+ "summary_or_objective": null
119
+ },
120
+ "markdown_format": "# Harish KB\n\n8248052926 | [email protected]\n\n## Education\n\nVellore Institute of Technology (VIT), Vellore, India\nMTECH (Integrated) in Computer Science and Engineering (CGPA: 8.46)\nAug 2022 - July 2027\n\n## Experience\n\n**AI Research and Development Intern (Remote)**\neBramha Techworks Private Limited\nJun 2024 - Oct 2024\n\n* Developed a speech-to-text summarization system integrating Whisper for transcription and Pegasus for summarization, enhancing processing speed and efficiency while significantly reducing overall processing time and improving system performance.\n* Conducted in-depth research on advanced NLP models such as PEGASUS, BERTsum and BART, contributing to the development of effective solutions for tasks like summarization and language understanding.\n* Built a neural network for handwritten digit classification (MNIST) from scratch, implementing core machine learning concepts like gradient descent and one-hot encoding.\n\n## Projects\n\n**VerbiSense: Interactive Document Retrieval System**\n\n* Built the VerbiSense backend with FastAPI, optimizing document uploads, query processing, and API performance for real-time interactions with the React frontend.\n* Integrated Retrieval-Augmented Generation (RAG) for improved document retrieval and response generation.\n* Applied PyTorch models for advanced NLP tasks like semantic understanding and context-based querying.\n\n**Speech-to-Text Summarization**\n\n* Developed a Python script that improved audio transcription accuracy by 30% and reduced post-processing time by 35%.\n* Designed and implemented the frontend interface to provide a seamless, user-friendly experience for individuals interacting with the speech-to-text summarization system.\n\n## Technical Skills\n\n**Languages:** Python, Java, C/C++\n**Machine Learning:** Supervised learning, unsupervised learning, NLP, LLMs\n**Tools:** GitHub, Docker, Linux, AWS, Hugging Face\n**Computer Vision:** OpenCV, YOLO\n**Backend:** FastAPI, Flask, MongoDB, Firebase\n\n## Areas of Interest\n\n* Machine Learning and AI\n* Full Stack Development\n* Cloud Computing and DevOps Practices\n\n## Certifications\n\n* Coursera: Supervised Machine Learning: Regression and Classification\n* Coursera: Advanced Learning Algorithms\n* Coursera: Generative AI with Large Language Models."
121
+ }
Process/response.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ from google import genai
5
+ from google.genai import types
6
+
7
+ load_dotenv()
8
+
9
+
10
+
11
+ sys_instruct="Provide the output in JSON format where the key is the topic and the value is a list of relevant contents. Ensure the response is clear, user friendly, structured."
12
+ def get_response(prompt,task):
13
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
14
+
15
+ response = client.models.generate_content(
16
+ model="gemini-2.0-flash",
17
+ config=types.GenerateContentConfig(
18
+ system_instruction=task+sys_instruct,response_mime_type='application/json',temperature=0.6),
19
+ contents=prompt
20
+ )
21
+ # print(response.text)
22
+ return response.text
23
+
24
+ # get_response("What is AI?","explain the given prompt")
Process/serializers.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from rest_framework import serializers
2
+ from .models import EndPoint
3
+
4
+
5
+ class EndPointSerializer(serializers.ModelSerializer):
6
+ class Meta:
7
+ model = EndPoint
8
+ fields = ['id','user_id','resume','prompt','time']
Process/tests.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.test import TestCase
2
+
3
+ # Create your tests here.
Process/urls.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.urls import path
2
+ from . import views
3
+ from .change import process_change
4
+
5
+ urlpatterns = [
6
+ path('process_resume/', views.process_resume, name='handle_request'),
7
+ path('process_change/', process_change, name="handle_change"),
8
+ path('verify_api/', views.verify_api, name='verify_api'),
9
+ ]
10
+
Process/utils.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import concurrent.futures
3
+ import logging
4
+ from typing import Dict, List, Optional, Union
5
+ from .response import get_response
6
+
7
+ # Set up logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ SYSTEM_INSTRUCTION = """
11
+ Provide responses in this exact JSON format:
12
+ {
13
+ "score": <number 0-10>,
14
+ "matching_elements": [<list of matching items>],
15
+ "missing_elements": [<list of recommended items>],
16
+ "explanation": "<explanation in 10-15 words>"
17
+ }
18
+ Ensure the score is always a number between 0-10.
19
+ """
20
+
21
+ class ATSResumeParser:
22
+ def __init__(self):
23
+ self.score_weights = {
24
+ 'skills_match': 30,
25
+ 'experience_relevance': 25,
26
+ 'education_relevance': 10,
27
+ 'overall_formatting': 15,
28
+ 'keyword_optimization': 10,
29
+ 'extra_sections': 10
30
+ }
31
+ self.total_weight = sum(self.score_weights.values())
32
+
33
+ def _parse_gemini_response(self, response_text: str) -> Dict:
34
+ """Parse the response from Gemini API with caching for better performance"""
35
+ try:
36
+ response = json.loads(response_text)
37
+ return {
38
+ 'score': float(response['score']),
39
+ 'matching': response.get('matching_elements', []),
40
+ 'missing': response.get('missing_elements', []),
41
+ 'explanation': response.get('explanation', '')
42
+ }
43
+ except (json.JSONDecodeError, KeyError, ValueError):
44
+ return {'score': 5.0, 'matching': [], 'missing': [], 'explanation': ''}
45
+
46
+ def _score_skills(self, skills: List[str], job_description: Optional[str]) -> Dict:
47
+ print("skills:",skills)
48
+ """Score skills with optimized processing"""
49
+ if not skills:
50
+ return {'score': 0, 'matching': [], 'missing': [], 'explanation': 'No skills provided'}
51
+
52
+ base_score = 70
53
+
54
+ skills_length = len(skills)
55
+ if skills_length >= 5:
56
+ base_score += 10
57
+ if skills_length >= 10:
58
+ base_score += 10
59
+
60
+ if not job_description:
61
+ return {'score': base_score, 'matching': skills, 'missing': [], 'explanation': 'No job description provided'}
62
+
63
+ prompt = f"Skills: {','.join(skills[:20])}. Job description: {job_description[:500]}. Rate match."
64
+
65
+ response = self._parse_gemini_response(
66
+ get_response(prompt, SYSTEM_INSTRUCTION)
67
+ )
68
+ print("complted skills:",response)
69
+ return {
70
+ 'score': (base_score + (response['score'] * 10)) / 2,
71
+ 'matching': response['matching'],
72
+ 'missing': response['missing'],
73
+ 'explanation': response['explanation']
74
+ }
75
+
76
+ def _score_experience(self, experience: List[Dict], job_description: Optional[str]) -> Dict:
77
+ """Score experience with optimized processing"""
78
+ print("experience:",experience)
79
+ if not experience:
80
+ return {'score': 0, 'matching': [], 'missing': [], 'explanation': 'No experience provided'}
81
+
82
+ base_score = 60
83
+
84
+ required_keys = {'title', 'company', 'description'}
85
+ improvement_keywords = {'increased', 'decreased', 'improved', '%', 'reduced'}
86
+
87
+ for exp in experience:
88
+ if required_keys.issubset(exp.keys()):
89
+ base_score += 10
90
+
91
+ description = exp.get('description', '')
92
+ if description and any(keyword in description for keyword in improvement_keywords):
93
+ base_score += 5
94
+
95
+ if not job_description:
96
+ return {'score': base_score, 'matching': [], 'missing': [], 'explanation': 'No job description provided'}
97
+
98
+ simplified_exp = [{'title': e.get('title', ''), 'description': e.get('description', '')[:100]}
99
+ for e in experience[:3]]
100
+
101
+ prompt = f"Experience: {json.dumps(simplified_exp)}. Job description: {job_description[:500]}. Rate match."
102
+
103
+ response = self._parse_gemini_response(
104
+ get_response(prompt, SYSTEM_INSTRUCTION)
105
+ )
106
+ print("completed experience:",response)
107
+ return {
108
+ 'score': (base_score + (response['score'] * 10)) / 2,
109
+ 'matching': response['matching'],
110
+ 'missing': response['missing'],
111
+ 'explanation': response['explanation']
112
+ }
113
+
114
+ def _score_education(self, education: List[Dict]) -> Dict:
115
+ """Score education with optimized processing"""
116
+ if not education:
117
+ return {'score': 0, 'matching': [], 'missing': [], 'explanation': 'No education provided'}
118
+
119
+ score = 70
120
+ matching = []
121
+
122
+ required_keys = {'institution', 'degree', 'start_date', 'end_date'}
123
+
124
+ for edu in education:
125
+ gpa = edu.get('gpa')
126
+ if gpa and float(gpa) > 3.0:
127
+ score += 10
128
+ matching.append(f"Strong GPA: {gpa}")
129
+
130
+ if required_keys.issubset(edu.keys()):
131
+ score += 10
132
+ matching.append(f"{edu.get('degree', '')} from {edu.get('institution', '')}")
133
+
134
+ return {
135
+ 'score': min(100, score),
136
+ 'matching': matching,
137
+ 'missing': [],
138
+ 'explanation': 'Education assessment completed'
139
+ }
140
+
141
+ def _score_formatting(self, structured_data: Dict) -> Dict:
142
+ """Score formatting with optimized processing"""
143
+ score = 100
144
+
145
+ contact_fields = ('name', 'email', 'phone')
146
+ essential_sections = ('skills', 'experience', 'education')
147
+
148
+ structured_keys = set(structured_data.keys())
149
+
150
+ missing_contacts = [field for field in contact_fields if field not in structured_keys]
151
+ if missing_contacts:
152
+ score -= 20
153
+
154
+ missing_sections = [section for section in essential_sections if section not in structured_keys]
155
+ missing_penalty = 15 * len(missing_sections)
156
+ if missing_sections:
157
+ score -= missing_penalty
158
+
159
+ return {
160
+ 'score': max(0, score),
161
+ 'matching': [field for field in contact_fields if field in structured_keys],
162
+ 'missing': missing_contacts + missing_sections,
163
+ 'explanation': 'Format assessment completed'
164
+ }
165
+
166
+ def _score_extra(self, structured_data: Dict) -> Dict:
167
+ """Score extra sections with optimized processing"""
168
+ extra_sections = {
169
+ "awards_and_achievements": 15,
170
+ "volunteer_experience": 10,
171
+ "hobbies_and_interests": 5,
172
+ "publications": 15,
173
+ "conferences_and_presentations": 10,
174
+ "patents": 15,
175
+ "professional_affiliations": 10,
176
+ "portfolio_links": 10,
177
+ "summary_or_objective": 10
178
+ }
179
+
180
+ total_possible = sum(extra_sections.values())
181
+
182
+ structured_keys = set(structured_data.keys())
183
+
184
+ score = 0
185
+ matching = []
186
+ missing = []
187
+
188
+ for section, weight in extra_sections.items():
189
+ if section in structured_keys and structured_data.get(section):
190
+ score += weight
191
+ matching.append(section.replace('_', ' ').title())
192
+ else:
193
+ missing.append(section.replace('_', ' ').title())
194
+
195
+ normalized_score = (score * 100) // total_possible if total_possible > 0 else 0
196
+
197
+ return {
198
+ 'score': normalized_score,
199
+ 'matching': matching,
200
+ 'missing': missing,
201
+ 'explanation': 'Additional sections assessment completed'
202
+ }
203
+
204
+ def parse_and_score(self, structured_data: Dict, job_description: Optional[str] = None) -> Dict:
205
+ """Parse and score resume with parallel processing"""
206
+ scores = {}
207
+ feedback = {'strengths': [], 'improvements': []}
208
+ detailed_feedback = {}
209
+
210
+ with concurrent.futures.ThreadPoolExecutor() as executor:
211
+ # Define tasks to run in parallel
212
+ tasks = {
213
+ 'skills_match': executor.submit(self._score_skills, structured_data.get('skills', []), job_description),
214
+ 'experience_relevance': executor.submit(self._score_experience, structured_data.get('experience', []), job_description),
215
+ 'education_relevance': executor.submit(self._score_education, structured_data.get('education', [])),
216
+ 'overall_formatting': executor.submit(self._score_formatting, structured_data),
217
+ 'extra_sections': executor.submit(self._score_extra, structured_data)
218
+ }
219
+
220
+ total_score = 0
221
+ for category, future in tasks.items():
222
+ result = future.result()
223
+
224
+ scores[category] = result['score']
225
+
226
+ weight = self.score_weights[category] / 100
227
+ total_score += result['score'] * weight
228
+
229
+ detailed_feedback[category] = {
230
+ 'matching_elements': result['matching'],
231
+ 'missing_elements': result['missing'],
232
+ 'explanation': result['explanation']
233
+ }
234
+
235
+ if result['score'] >= 80:
236
+ feedback['strengths'].append(f"Strong {category.replace('_', ' ')}")
237
+ elif result['score'] < 60:
238
+ feedback['improvements'].append(f"Improve {category.replace('_', ' ')}")
239
+
240
+ return {
241
+ 'total_score': round(total_score, 2),
242
+ 'detailed_scores': scores,
243
+ 'feedback': feedback,
244
+ 'detailed_feedback': detailed_feedback
245
+ }
246
+
247
+ def generate_ats_score(structured_data: Union[Dict, str], job_des_text: Optional[str] = None) -> Dict:
248
+ """Generate ATS score with optimized processing"""
249
+ try:
250
+ print("check",structured_data)
251
+ if not structured_data:
252
+ return {"error": "No resume data provided"}
253
+
254
+ if isinstance(structured_data, str):
255
+ try:
256
+ structured_data = json.loads(structured_data)
257
+ except json.JSONDecodeError:
258
+ return {"error": "Invalid JSON format in resume data"}
259
+
260
+ parser = ATSResumeParser()
261
+ result = parser.parse_and_score(structured_data, job_des_text)
262
+
263
+ return {
264
+ 'ats_score': result['total_score'],
265
+ 'detailed_scores': result['detailed_scores'],
266
+ 'feedback': result['feedback'],
267
+ 'detailed_feedback': result['detailed_feedback']
268
+ }
269
+
270
+ except Exception as e:
271
+ return {"error": f"An error occurred: {str(e)}"}
Process/views.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from django.http import JsonResponse
3
+ from django.views.decorators.csrf import csrf_exempt
4
+ import json
5
+ from transformers import AutoTokenizer, AutoModel
6
+ import torch
7
+ import os
8
+
9
+ from .ats_parser import extract_resume_details
10
+ from .utils import generate_ats_score
11
+ from .response import get_response
12
+ from .extract import extract_text_from_pdf
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ model = AutoModel.from_pretrained(model_name)
20
+
21
+ def get_embeddings(texts):
22
+ inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
23
+ with torch.no_grad():
24
+ model_output = model(**inputs)
25
+ embeddings = model_output.last_hidden_state.mean(dim=1)
26
+ return embeddings
27
+
28
+ def calculate_similarity(job_description, resume_text):
29
+ jd_embedding = get_embeddings([job_description])
30
+ resume_embedding = get_embeddings([resume_text])
31
+
32
+ jd_embedding = jd_embedding / jd_embedding.norm(dim=1, keepdim=True)
33
+ resume_embedding = resume_embedding / resume_embedding.norm(dim=1, keepdim=True)
34
+ similarity = torch.mm(jd_embedding, resume_embedding.T).item()
35
+ return similarity
36
+
37
+ @csrf_exempt
38
+ def process_resume(request):
39
+ if request.method == 'POST':
40
+ try:
41
+ data = json.loads(request.body)
42
+
43
+ user_id = data.get('user_id')
44
+ resume = data.get('resume')
45
+ job_description = data.get('job_description')
46
+ logger.info(f"Received data for user_id: {user_id}")
47
+
48
+ similarity = calculate_similarity(job_description, resume)
49
+ logger.info("Similarity calculation completed")
50
+
51
+ st_data = extract_resume_details(resume)
52
+ logger.info("Resume details extraction completed")
53
+
54
+ ats_score = generate_ats_score(st_data, job_description)
55
+ logger.info("ATS score generation completed")
56
+
57
+ response_data = {
58
+ 'user_id': user_id,
59
+ 'similarity': similarity,
60
+ 'ats_score': ats_score,
61
+ 'structured_data': st_data
62
+ }
63
+ return JsonResponse(response_data, status=200)
64
+ except json.JSONDecodeError:
65
+ logger.error("Invalid JSON received")
66
+ return JsonResponse({'error': 'Invalid JSON'}, status=400)
67
+ else:
68
+ return JsonResponse({'message': 'Only POST requests are allowed'}, status=405)
69
+
70
+ def verify_api(request):
71
+ if request.method == 'GET':
72
+ return JsonResponse({'message': 'yaay working-GET '}, status=200)
73
+ else:
74
+ return JsonResponse({'error': 'Only GET requests are allowed'}, status=405)
ResumeATS/__init__.py ADDED
File without changes
ResumeATS/asgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ASGI config for ResumeATS project.
3
+
4
+ It exposes the ASGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.asgi import get_asgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ResumeATS.settings')
15
+
16
+ application = get_asgi_application()
ResumeATS/settings.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Django settings for ResumeATS project.
3
+
4
+ Generated by 'django-admin startproject' using Django 5.1.5.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.1/topics/settings/
8
+
9
+ For the full list of settings and their values, see
10
+ https://docs.djangoproject.com/en/5.1/ref/settings/
11
+ """
12
+
13
+ from pathlib import Path
14
+ from dotenv import load_dotenv
15
+ import os
16
+ load_dotenv()
17
+ # Build paths inside the project like this: BASE_DIR / 'subdir'.
18
+ BASE_DIR = Path(__file__).resolve().parent.parent
19
+
20
+
21
+ # Quick-start development settings - unsuitable for production
22
+ # See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
23
+
24
+ # SECURITY WARNING: keep the secret key used in production secret!
25
+ SECRET_KEY = os.getenv("SECRET_KEY")
26
+
27
+ # SECURITY WARNING: don't run with debug turned on in production!
28
+ DEBUG = True
29
+
30
+ ALLOWED_HOSTS = []
31
+
32
+
33
+ # Application definition
34
+
35
+ INSTALLED_APPS = [
36
+ 'django.contrib.admin',
37
+ 'django.contrib.auth',
38
+ 'django.contrib.contenttypes',
39
+ 'django.contrib.sessions',
40
+ 'django.contrib.messages',
41
+ 'django.contrib.staticfiles',
42
+ 'rest_framework',
43
+ 'Process',
44
+ ]
45
+
46
+ MIDDLEWARE = [
47
+ 'django.middleware.security.SecurityMiddleware',
48
+ 'django.contrib.sessions.middleware.SessionMiddleware',
49
+ 'django.middleware.common.CommonMiddleware',
50
+ 'django.middleware.csrf.CsrfViewMiddleware',
51
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
52
+ 'django.contrib.messages.middleware.MessageMiddleware',
53
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
54
+ ]
55
+
56
+ ROOT_URLCONF = 'ResumeATS.urls'
57
+
58
+ TEMPLATES = [
59
+ {
60
+ 'BACKEND': 'django.template.backends.django.DjangoTemplates',
61
+ 'DIRS': [],
62
+ 'APP_DIRS': True,
63
+ 'OPTIONS': {
64
+ 'context_processors': [
65
+ 'django.template.context_processors.debug',
66
+ 'django.template.context_processors.request',
67
+ 'django.contrib.auth.context_processors.auth',
68
+ 'django.contrib.messages.context_processors.messages',
69
+ ],
70
+ },
71
+ },
72
+ ]
73
+
74
+ WSGI_APPLICATION = 'ResumeATS.wsgi.application'
75
+
76
+
77
+ # Database
78
+ # https://docs.djangoproject.com/en/5.1/ref/settings/#databases
79
+
80
+ DATABASES = {
81
+ 'default': {
82
+ 'ENGINE': 'django.db.backends.sqlite3',
83
+ 'NAME': BASE_DIR / 'db.sqlite3',
84
+ }
85
+ }
86
+
87
+
88
+ # Password validation
89
+ # https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
90
+
91
+ AUTH_PASSWORD_VALIDATORS = [
92
+ {
93
+ 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
94
+ },
95
+ {
96
+ 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
97
+ },
98
+ {
99
+ 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
100
+ },
101
+ {
102
+ 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
103
+ },
104
+ ]
105
+
106
+
107
+ # Internationalization
108
+ # https://docs.djangoproject.com/en/5.1/topics/i18n/
109
+
110
+ LANGUAGE_CODE = 'en-us'
111
+
112
+ TIME_ZONE = 'UTC'
113
+
114
+ USE_I18N = True
115
+
116
+ USE_TZ = True
117
+
118
+
119
+ # Static files (CSS, JavaScript, Images)
120
+ # https://docs.djangoproject.com/en/5.1/howto/static-files/
121
+
122
+ STATIC_URL = 'static/'
123
+
124
+ # Default primary key field type
125
+ # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
126
+
127
+ DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
ResumeATS/urls.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ URL configuration for ResumeATS project.
3
+
4
+ The `urlpatterns` list routes URLs to views. For more information please see:
5
+ https://docs.djangoproject.com/en/5.1/topics/http/urls/
6
+ Examples:
7
+ Function views
8
+ 1. Add an import: from my_app import views
9
+ 2. Add a URL to urlpatterns: path('', views.home, name='home')
10
+ Class-based views
11
+ 1. Add an import: from other_app.views import Home
12
+ 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
13
+ Including another URLconf
14
+ 1. Import the include() function: from django.urls import include, path
15
+ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
16
+ """
17
+ from django.contrib import admin
18
+ from django.urls import path, include
19
+
20
+ urlpatterns = [
21
+ path('', include('Process.urls')),
22
+ ]
ResumeATS/wsgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WSGI config for ResumeATS project.
3
+
4
+ It exposes the WSGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.wsgi import get_wsgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ResumeATS.settings')
15
+
16
+ application = get_wsgi_application()
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is a bridging file for Hugging Face Spaces
2
+ # It ensures that when the container starts, your Django app will run correctly
3
+
4
+ import os
5
+ import sys
6
+ import subprocess
7
+ import logging
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ if __name__ == "__main__":
13
+ # Make sure we're using the correct port
14
+ port = os.environ.get("PORT", "7860")
15
+ logger.info(f"Starting Django server on port {port}")
16
+
17
+ try:
18
+ # Start the Django development server
19
+ # For better production deployment, consider using gunicorn
20
+ subprocess.run(
21
+ ["python", "manage.py", "runserver", f"0.0.0.0:{port}"],
22
+ check=True
23
+ )
24
+ except Exception as e:
25
+ logger.error(f"Error starting Django server: {e}")
26
+ sys.exit(1)
deploy.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ sudo apt-get install tesseract-ocr
manage.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """Django's command-line utility for administrative tasks."""
3
+ import os
4
+ import sys
5
+
6
+
7
+ def main():
8
+ """Run administrative tasks."""
9
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ResumeATS.settings')
10
+ try:
11
+ from django.core.management import execute_from_command_line
12
+ except ImportError as exc:
13
+ raise ImportError(
14
+ "Couldn't import Django. Are you sure it's installed and "
15
+ "available on your PYTHONPATH environment variable? Did you "
16
+ "forget to activate a virtual environment?"
17
+ ) from exc
18
+ execute_from_command_line(sys.argv)
19
+
20
+
21
+ if __name__ == '__main__':
22
+ main()
readme.md ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ Django==5.1.5
3
+ djangorestframework==3.15.2
4
+ python-dotenv==1.0.1
5
+ numpy==2.2.2
6
+ pandas==2.2.3
7
+ scikit-learn==1.6.1
8
+ pillow==11.0.0
9
+ tqdm==4.67.1
10
+ requests==2.32.3
11
+
12
+ # PDF processing
13
+ pdfplumber==0.11.5
14
+ pdf2docx==0.5.8
15
+ PyMuPDF==1.25.2
16
+ pytesseract==0.3.13
17
+ python-docx==1.1.2
18
+ pdfminer.six==20231228
19
+
20
+ # NLP & AI
21
+ google-generativeai==0.8.4
22
+ transformers==4.48.2
23
+ nltk==3.9.1
24
+
25
+ # Image Processing
26
+ opencv-python-headless==4.11.0.86
27
+
28
+ # File handling and data formats
29
+ pyarrow==19.0.1
30
+ fsspec==2024.12.0
31
+ filelock==3.17.0
32
+
33
+ # Utilities
34
+ regex==2024.11.6
35
+ markdown2==2.5.3