kirtanj commited on
Commit
e1eeb11
·
1 Parent(s): df0cb94
.gitignore CHANGED
@@ -1,3 +1 @@
1
- .env
2
- .streamlit/secrets.toml
3
- chat.py
 
1
+ DOCUMENTATION.md
 
 
.streamlit/config.toml CHANGED
@@ -1,7 +1,16 @@
 
 
 
 
 
 
 
 
 
1
  [theme]
2
  base="dark"
3
  primaryColor="#FF4B4B"
4
- backgroundColor="#0E1117"
5
- secondaryBackgroundColor="#262730"
6
- textColor="#FAFAFA"
7
  font="sans serif"
 
1
+ [server]
2
+ port = 8501
3
+ address = "localhost"
4
+ maxUploadSize = 5
5
+
6
+ [browser]
7
+ serverAddress = "localhost"
8
+ serverPort = 8501
9
+
10
  [theme]
11
  base="dark"
12
  primaryColor="#FF4B4B"
13
+ backgroundColor="#FFFFFF"
14
+ secondaryBackgroundColor="#F0F2F6"
15
+ textColor="#262730"
16
  font="sans serif"
AI_Powered_Resume_Analyzer.ipynb DELETED
@@ -1,384 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 67,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "from PyPDF2 import PdfReader\n",
10
- "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
11
- "from langchain.embeddings.openai import OpenAIEmbeddings\n",
12
- "from langchain.vectorstores import FAISS\n",
13
- "from langchain.chat_models import ChatOpenAI\n",
14
- "from langchain.chains.question_answering import load_qa_chain\n",
15
- "import warnings\n",
16
- "warnings.filterwarnings('ignore')"
17
- ]
18
- },
19
- {
20
- "cell_type": "code",
21
- "execution_count": 62,
22
- "metadata": {},
23
- "outputs": [
24
- {
25
- "name": "stdout",
26
- "output_type": "stream",
27
- "text": [
28
- "<PyPDF2._reader.PdfReader object at 0x00000154154007D0>\n"
29
- ]
30
- }
31
- ],
32
- "source": [
33
- "pdf = \"/content/resume.pdf\"\n",
34
- "pdf_reader = PdfReader(pdf)\n",
35
- "print(pdf_reader)"
36
- ]
37
- },
38
- {
39
- "cell_type": "code",
40
- "execution_count": null,
41
- "metadata": {},
42
- "outputs": [],
43
- "source": [
44
- "# extrat text from each page separately\n",
45
- "text = \"\"\n",
46
- "for page in pdf_reader.pages:\n",
47
- " text += page.extract_text()\n",
48
- "\n",
49
- "print(text)"
50
- ]
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": 64,
55
- "metadata": {},
56
- "outputs": [
57
- {
58
- "data": {
59
- "text/plain": [
60
- "['GOPINATH ASOKAN \\nData Science Enthusiast \\nPassionate data science enthusiast with a strong foundation in diverse industries. Equipped with 5+ years\\nof industry experience, highly skilled in problem-solving, and project management. Eager to seamlessly\\nmerge analytical skills with artistic expertise for impactful insights and innovation. Excited to apply data-\\ndriven strategies to challenges, contribute proactively and effectively to the field, and drive innovation. \\[email protected] \\nlinkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal',\n",
61
- " \"linkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal \\nProficiently executed image editing tasks for bigbasket's\\nproduct images, encompassing renaming, retouching, \\ncolor\\ncorrection, content cropping, and photo manipulation. \\nExpertly designed captivating banners and creatives for\\nadvertisements, skillfully integrating combo packs, multi-\\npacks, and hero images into Bigbasket's product pages. \\nContributed to taxonomy by mapping tax codes, manually\\nidentified competitor products, and verified AI-generated\\noutputs for accuracy, assisting in AI improvement efforts.\",\n",
62
- " 'Contributed to taxonomy by mapping tax codes, manually\\nidentified competitor products, and verified AI-generated\\noutputs for accuracy, assisting in AI improvement efforts. \\nAssociate Engineer - Quality \\nRudra Blades and Edges Pvt Ltd \\n07/2018 - 12/2018\\n, \\n \\nChennai \\nPerformed continuous and comprehensive material analysis\\nto ensure structural integrity and precise alignment with\\ncustomer specifications as maintaining quality standards. \\nConsistently maintained high quality standards at critical\\nwelding, grinding, and precision parallelism stations, by\\nensuring unwavering quality in the production process. \\nProficiently managed and coordinated material dispatch',\n",
63
- " 'welding, grinding, and precision parallelism stations, by\\nensuring unwavering quality in the production process. \\nProficiently managed and coordinated material dispatch\\nwhile meeting both regular order requirements and the\\npriority to ensure prompt and reliable customer service. \\nGraduate Engineer Trainee - Quality \\nLear Automotive India Pvt Ltd \\n07/2016 - 07/2017\\n, \\n \\nChennai \\nEfficiently managed productive customer meetings while\\nskillfully addressing challenging issues through \\ndetailed\\ncomprehensive Minutes of Meeting documentation. \\nMaintained stock alignment, meticulously validating the\\nperpetual and wall-to-wall inventory in physical and QAD',\n",
64
- " 'detailed\\ncomprehensive Minutes of Meeting documentation. \\nMaintained stock alignment, meticulously validating the\\nperpetual and wall-to-wall inventory in physical and QAD\\nsoftware systems to ensure inventory precision & accuracy. \\nImplemented Q-Point, ICA, & PCA for quality enhancement\\nand has managed up-to-date Quality Notice documentation\\nwith 8D reports in the SQTS system for the issue resolution. \\nEDUCATION \\nMaster Data Science \\nGUVI Geeks Network Pvt Ltd \\n2023\\n, \\n \\nChennai \\nB.E. in Mechanical Engineering \\nKnowledge Institute of Technology \\n2012 - 2016\\n, \\n \\nSalem \\nSKILLS \\nPython \\nPostgreSQL \\nMongoDB \\nTableau \\nPowerBI \\nMachine Learning \\nDeep Learning \\nNLP \\nLLM \\nOpenAI \\nSelenium \\nAirflow',\n",
65
- " 'Knowledge Institute of Technology \\n2012 - 2016\\n, \\n \\nSalem \\nSKILLS \\nPython \\nPostgreSQL \\nMongoDB \\nTableau \\nPowerBI \\nMachine Learning \\nDeep Learning \\nNLP \\nLLM \\nOpenAI \\nSelenium \\nAirflow \\nHadoop \\nPySpark \\nOCR \\nNumpy \\nPandas \\nStreamlit \\nPlotly \\nMatplotlib \\nSeaborn \\nCERTIFICATE \\nMicrosoft AI-900 Azure AI Fundamentals\\n (2023)\\n \\nPROJECTS \\nAI Resume Analyzer and LinkedIn Scraper with Selenium \\nBuilt an \\nInnovative \\nAI-driven Streamlit app with LLM, OpenAI for\\nprecise resume analysis and suggestions. Integrated Selenium for\\ndynamic LinkedIn data extraction, enhancing career insights. \\nAI excels in resume analysis - summarizing, strengths, weaknesses,',\n",
66
- " 'precise resume analysis and suggestions. Integrated Selenium for\\ndynamic LinkedIn data extraction, enhancing career insights. \\nAI excels in resume analysis - summarizing, strengths, weaknesses,\\nand suggesting job titles. Leveraging Selenium for LinkedIn data, it\\nstreamlines job searches for comprehensive career insights. \\nTools: Python, LLM, OpenAI, Selenium, Streamlit, Numpy, Pandas.\\n \\ngithub.com/gopiashokan/AI-Resume-Analyzer-LinkedIn-Scraper.git\\n \\nRetail Sales Forecast \\nImplemented ML for precise retail sales predictions, emphasizing\\npreprocessing and algorithm selection. Streamlined Streamlit app\\nintegrates EDA, \\noptimizing decision-making in dynamic retail.',\n",
67
- " 'Implemented ML for precise retail sales predictions, emphasizing\\npreprocessing and algorithm selection. Streamlined Streamlit app\\nintegrates EDA, \\noptimizing decision-making in dynamic retail. \\nRevolutionized retail decisions with advanced ML, using a streamlit\\napplication integrating EDA for precise sales forecasts, \\nfeature\\ncomparison & actionable insights by identifying trends & patterns. \\nTools: Python, \\nsklearn, PostgreSQL, Streamlit, Numpy, Pandas, Plotly,\\nMatplotlib, Seaborn.\\n \\nhttps://github.com/gopiashokan/Retail-Sales-Forecast.git\\n \\nIndustrial Copper Modeling \\nLeveraged advanced ML regression models for precise pricing and\\nclassification, enhancing targeted customer engagement by',\n",
68
- " 'Industrial Copper Modeling \\nLeveraged advanced ML regression models for precise pricing and\\nclassification, enhancing targeted customer engagement by\\npredicting potential customers in the copper industry landscape. \\nExpert in data preprocessing, feature engineering, cross-validation,\\nhyperparameter tuning, and Streamlit app development, \\nskillfully\\napplying the skills to solve real-world manufacturing challenges. \\nTools: Python, sklearn, Streamlit, Matplotlib,Seaborn, Numpy,Pandas.\\n \\ngithub.com/gopiashokan/Industrial-Copper-Modeling.git\\n \\nAirbnb Analysis \\nLeverage Streamlit for dynamic exploratory data analysis (EDA)\\nwith interactive charts. Extend insights through a comprehensive',\n",
69
- " 'Airbnb Analysis \\nLeverage Streamlit for dynamic exploratory data analysis (EDA)\\nwith interactive charts. Extend insights through a comprehensive\\nTableau dashboard, uncovering trends and patterns in the dataset. \\nAnalyzed pricing dynamics & availability patterns in the Hospitality\\nsector, enabling informed decision-making and empowering\\nstakeholders to make choices based on insights and visualizations. \\nTools: Python, MongoDB, PostgreSQL, Tableau, Streamlit, Plotly,\\nPandas.\\n \\ngithub.com/gopiashokan/Airbnb-Analysis.git']"
70
- ]
71
- },
72
- "execution_count": 64,
73
- "metadata": {},
74
- "output_type": "execute_result"
75
- }
76
- ],
77
- "source": [
78
- "# Split the long text into small chunks\n",
79
- "text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,\n",
80
- " chunk_overlap=200,\n",
81
- " length_function=len)\n",
82
- "\n",
83
- "chunks = text_splitter.split_text(text=text)\n",
84
- "chunks"
85
- ]
86
- },
87
- {
88
- "cell_type": "code",
89
- "execution_count": 65,
90
- "metadata": {},
91
- "outputs": [
92
- {
93
- "data": {
94
- "text/plain": [
95
- "'GOPINATH ASOKAN \\nData Science Enthusiast \\nPassionate data science enthusiast with a strong foundation in diverse industries. Equipped with 5+ years\\nof industry experience, highly skilled in problem-solving, and project management. Eager to seamlessly\\nmerge analytical skills with artistic expertise for impactful insights and innovation. Excited to apply data-\\ndriven strategies to challenges, contribute proactively and effectively to the field, and drive innovation. \\[email protected] \\nlinkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal'"
96
- ]
97
- },
98
- "execution_count": 65,
99
- "metadata": {},
100
- "output_type": "execute_result"
101
- }
102
- ],
103
- "source": [
104
- "chunks[0]"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": 66,
110
- "metadata": {},
111
- "outputs": [
112
- {
113
- "data": {
114
- "text/plain": [
115
- "\"linkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal \\nProficiently executed image editing tasks for bigbasket's\\nproduct images, encompassing renaming, retouching, \\ncolor\\ncorrection, content cropping, and photo manipulation. \\nExpertly designed captivating banners and creatives for\\nadvertisements, skillfully integrating combo packs, multi-\\npacks, and hero images into Bigbasket's product pages. \\nContributed to taxonomy by mapping tax codes, manually\\nidentified competitor products, and verified AI-generated\\noutputs for accuracy, assisting in AI improvement efforts.\""
116
- ]
117
- },
118
- "execution_count": 66,
119
- "metadata": {},
120
- "output_type": "execute_result"
121
- }
122
- ],
123
- "source": [
124
- "chunks[1]"
125
- ]
126
- },
127
- {
128
- "cell_type": "markdown",
129
- "metadata": {},
130
- "source": [
131
- "\"linkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal\"\n",
132
- "\n",
133
- "The above text is common(overlap) for both chunks[0] and chunks[1].\n",
134
- "(chunk_overlap=200 - maximum length, it means length is not exceed 200)"
135
- ]
136
- },
137
- {
138
- "cell_type": "code",
139
- "execution_count": 7,
140
- "metadata": {},
141
- "outputs": [],
142
- "source": [
143
- "openai_api_key = input('Enter you OpenAI API Key: ')"
144
- ]
145
- },
146
- {
147
- "cell_type": "code",
148
- "execution_count": 52,
149
- "metadata": {},
150
- "outputs": [],
151
- "source": [
152
- "def openai(openai_api_key, chunks, analyze):\n",
153
- "\n",
154
- " # Using OpenAI service for embedding\n",
155
- " embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)\n",
156
- "\n",
157
- " # Facebook AI Similarity Serach library help us to convert text data to numerical vector\n",
158
- " vectorstores = FAISS.from_texts(chunks, embedding=embeddings)\n",
159
- "\n",
160
- " # compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.\n",
161
- " docs = vectorstores.similarity_search(query=analyze, k=3)\n",
162
- "\n",
163
- " # creates an OpenAI object, using the ChatGPT 3.5 Turbo model\n",
164
- " llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)\n",
165
- "\n",
166
- " # question-answering (QA) pipeline, making use of the load_qa_chain function\n",
167
- " chain = load_qa_chain(llm=llm, chain_type='stuff')\n",
168
- "\n",
169
- " response = chain.run(input_documents=docs, question=analyze)\n",
170
- " return response"
171
- ]
172
- },
173
- {
174
- "cell_type": "code",
175
- "execution_count": 55,
176
- "metadata": {},
177
- "outputs": [
178
- {
179
- "name": "stdout",
180
- "output_type": "stream",
181
- "text": [
182
- "The resume belongs to Gopinath Asokan, who is a data science enthusiast with a strong foundation in diverse industries. He has 5+ years of industry experience and is highly skilled in problem-solving and project management. Gopinath is eager to merge his analytical skills with artistic expertise for impactful insights and innovation. He is excited to apply data-driven strategies to challenges and contribute proactively to the field. \n",
183
- "\n",
184
- "In terms of work experience, Gopinath has worked as a Senior Process Executive - Operations at Mahendra Next Wealth IT India Pvt Ltd from 05/2019 to 12/2022. He was responsible for precise resume analysis and suggestions, as well as integrating Selenium for dynamic LinkedIn data extraction. He also implemented machine learning for precise retail sales predictions, emphasizing preprocessing and algorithm selection.\n",
185
- "\n",
186
- "Gopinath has also worked as an Associate Engineer - Quality at Rudra Blades and Edges Pvt Ltd from 07/2018 to 12/2018. He performed continuous and comprehensive material analysis to ensure structural integrity and maintained high-quality standards at critical stations.\n",
187
- "\n",
188
- "Furthermore, Gopinath worked as a Graduate Engineer Trainee - Quality at Lear Automotive India Pvt Ltd from 07/2016 to 07/2017. He efficiently managed productive customer meetings and maintained stock alignment.\n",
189
- "\n",
190
- "In terms of education, Gopinath has a Master's degree in Data Science from GUVI Geeks Network Pvt Ltd (expected completion in 2023) and a Bachelor's degree in Mechanical Engineering from Knowledge Institute of Technology (2012-2016).\n",
191
- "\n",
192
- "Gopinath possesses a wide range of technical skills, including Python, PostgreSQL, MongoDB, Tableau, PowerBI, Machine Learning, Deep Learning, NLP, LLM, OpenAI, Selenium, Airflow, Hadoop, PySpark, OCR, Numpy, Pandas, Streamlit, Plotly, Matplotlib, and Seaborn.\n",
193
- "\n",
194
- "He has also completed the Microsoft AI-900 Azure AI Fundamentals certificate.\n",
195
- "\n",
196
- "Gopinath has worked on several projects, including an AI Resume Analyzer and LinkedIn Scraper with Selenium, a Retail Sales Forecast using ML, Industrial Copper Modeling using ML, and Airbnb Analysis using Streamlit and Tableau.\n",
197
- "\n",
198
- "In conclusion, Gopinath Asokan is a highly skilled data science enthusiast with a diverse industry background. He has experience in resume analysis, retail sales predictions, and quality assurance. He is proficient in various technical skills and has completed relevant certifications. Gopinath has also worked on several data science projects, showcasing his expertise in machine learning and analysis.\n"
199
- ]
200
- }
201
- ],
202
- "source": [
203
- "def resume_summary(query_with_chunks):\n",
204
- " query = f''' need to detailed summarization of below resume and finally conclude them\n",
205
- "\n",
206
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
207
- " {query_with_chunks}\n",
208
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
209
- " '''\n",
210
- " return query\n",
211
- "\n",
212
- "summary = resume_summary(query_with_chunks=chunks)\n",
213
- "summary_result = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=summary)\n",
214
- "print(summary_result)"
215
- ]
216
- },
217
- {
218
- "cell_type": "code",
219
- "execution_count": 56,
220
- "metadata": {},
221
- "outputs": [
222
- {
223
- "name": "stdout",
224
- "output_type": "stream",
225
- "text": [
226
- "Strengths of Gopinath Asokan's resume:\n",
227
- "\n",
228
- "1. Strong foundation in diverse industries: Gopinath's resume highlights his experience and expertise in various industries, showcasing his adaptability and ability to work in different environments.\n",
229
- "\n",
230
- "2. 5+ years of industry experience: Gopinath's extensive experience in the industry demonstrates his ability to handle real-world challenges and shows his level of expertise in the field.\n",
231
- "\n",
232
- "3. Strong problem-solving and project management skills: Gopinath's resume emphasizes his skills in problem-solving and project management, which are crucial in the field of data science. This indicates his ability to effectively handle complex problems and successfully manage projects.\n",
233
- "\n",
234
- "4. Analytical skills combined with artistic expertise: Gopinath's resume mentions his eagerness to merge his analytical skills with artistic expertise, indicating his ability to think creatively and produce impactful insights and innovations.\n",
235
- "\n",
236
- "5. Data-driven strategies: Gopinath's excitement to apply data-driven strategies to challenges shows his understanding of the importance of data analysis in decision-making and problem-solving.\n",
237
- "\n",
238
- "6. Proficient in technical skills: Gopinath possesses a wide range of technical skills, including Python, machine learning, NLP, Selenium, and more. This showcases his ability to utilize various tools and technologies to solve complex problems and deliver high-quality work.\n",
239
- "\n",
240
- "7. Relevant certifications: Gopinath's completion of the Microsoft AI-900 Azure AI Fundamentals certificate demonstrates his commitment to continuous learning and staying up-to-date with the latest technologies and advancements in the field of data science.\n",
241
- "\n",
242
- "8. Experienced in various projects: Gopinath's experience in projects such as AI Resume Analyzer and LinkedIn Scraper, Retail Sales Forecast using ML, and Industrial Copper Modeling using ML showcases his practical application of data science techniques and his ability to deliver successful projects.\n",
243
- "\n",
244
- "In conclusion, Gopinath Asokan's resume exhibits strengths in his industry experience, problem-solving skills, technical expertise, and project management abilities. His diverse background, eagerness to merge analytical and artistic skills, and relevant certifications make him a strong candidate in the field of data science.\n"
245
- ]
246
- }
247
- ],
248
- "source": [
249
- "def resume_strength(query_with_chunks):\n",
250
- " query = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them\n",
251
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
252
- " {query_with_chunks}\n",
253
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
254
- " '''\n",
255
- " return query\n",
256
- "\n",
257
- "strength = resume_strength(query_with_chunks=summary_result)\n",
258
- "strength_result = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=strength)\n",
259
- "print(strength_result)"
260
- ]
261
- },
262
- {
263
- "cell_type": "code",
264
- "execution_count": 60,
265
- "metadata": {},
266
- "outputs": [
267
- {
268
- "name": "stdout",
269
- "output_type": "stream",
270
- "text": [
271
- "Based on the provided resume details, Gopinath Asokan has a strong foundation in diverse industries and is highly skilled in problem-solving and project management. However, there are a few weaknesses in the resume that can be improved:\n",
272
- "\n",
273
- "1. Lack of a clear career objective: The resume does not mention a specific career objective or goal. It would be beneficial to include a clear and concise objective statement that highlights Gopinath's career aspirations and how his skills and experience align with those goals.\n",
274
- "\n",
275
- "2. Incomplete work experience details: While the resume mentions Gopinath's job titles and responsibilities, it does not provide specific accomplishments or achievements in each role. Adding quantifiable achievements or results-oriented statements would strengthen the resume and demonstrate Gopinath's impact in previous positions.\n",
276
- "\n",
277
- "3. Limited information on education: The resume briefly mentions Gopinath's educational background, but it lacks details on specific coursework or projects related to data science. Including relevant coursework, research projects, or any notable academic achievements would enhance the resume's credibility and showcase Gopinath's academic abilities.\n",
278
- "\n",
279
- "4. Lack of focus on key technical skills: Although the resume mentions a wide range of technical skills, it does not highlight which skills are most relevant to the data science field. It would be helpful to prioritize and emphasize the key technical skills that directly align with the desired job roles in data science.\n",
280
- "\n",
281
- "To improve the resume, consider the following suggestions:\n",
282
- "\n",
283
- "1. Start with a strong career objective statement that clearly communicates Gopinath's goals and how his skills and experience align with those goals.\n",
284
- "\n",
285
- "2. Include specific accomplishments and achievements in each work experience entry, highlighting the impact Gopinath made in previous roles. Use quantitative metrics whenever possible to showcase results.\n",
286
- "\n",
287
- "3. Provide more details on relevant coursework, research projects, or academic achievements related to data science during Gopinath's Master's degree program.\n",
288
- "\n",
289
- "4. Prioritize and highlight the key technical skills that directly align with data science roles. Consider creating a separate section dedicated to technical skills, showcasing proficiency and experience in those areas.\n",
290
- "\n",
291
- "5. Consider including any relevant certifications, online courses, or workshops related to data science or machine learning.\n",
292
- "\n",
293
- "By addressing these weaknesses and implementing these improvements, Gopinath Asokan can create a stronger and more impactful resume that highlights his skills, experience, and potential in the field of data science.\n"
294
- ]
295
- }
296
- ],
297
- "source": [
298
- "def resume_weakness(query_with_chunks):\n",
299
- " query = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume.\n",
300
- "\n",
301
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
302
- " {query_with_chunks}\n",
303
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
304
- " '''\n",
305
- " return query\n",
306
- "\n",
307
- "weakness = resume_weakness(query_with_chunks=summary_result)\n",
308
- "result_weakness = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=weakness)\n",
309
- "print(result_weakness)"
310
- ]
311
- },
312
- {
313
- "cell_type": "code",
314
- "execution_count": 61,
315
- "metadata": {},
316
- "outputs": [
317
- {
318
- "name": "stdout",
319
- "output_type": "stream",
320
- "text": [
321
- "Based on the information provided, some potential job roles that Gopinath Asokan could apply to on LinkedIn include:\n",
322
- "\n",
323
- "1. Data Scientist\n",
324
- "2. Data Analyst\n",
325
- "3. Machine Learning Engineer\n",
326
- "4. Business Analyst\n",
327
- "5. Project Manager\n",
328
- "6. Operations Analyst\n",
329
- "7. Quality Assurance Engineer\n",
330
- "8. Sales Analyst\n",
331
- "9. AI Engineer\n",
332
- "10. Retail Analyst\n",
333
- "\n",
334
- "These job roles align with Gopinath's skills and experience in data science, problem-solving, project management, resume analysis, retail sales forecasting, and quality assurance.\n"
335
- ]
336
- }
337
- ],
338
- "source": [
339
- "def job_title_suggestion(query_with_chunks):\n",
340
- "\n",
341
- " query = f''' what are the job roles i apply to likedin based on below?\n",
342
- " \n",
343
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
344
- " {query_with_chunks}\n",
345
- " \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
346
- " '''\n",
347
- " return query\n",
348
- "\n",
349
- "suggestion = job_title_suggestion(query_with_chunks=summary_result)\n",
350
- "result_suggestion = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=suggestion)\n",
351
- "print(result_suggestion)"
352
- ]
353
- },
354
- {
355
- "cell_type": "code",
356
- "execution_count": null,
357
- "metadata": {},
358
- "outputs": [],
359
- "source": []
360
- }
361
- ],
362
- "metadata": {
363
- "kernelspec": {
364
- "display_name": "Python 3",
365
- "language": "python",
366
- "name": "python3"
367
- },
368
- "language_info": {
369
- "codemirror_mode": {
370
- "name": "ipython",
371
- "version": 3
372
- },
373
- "file_extension": ".py",
374
- "mimetype": "text/x-python",
375
- "name": "python",
376
- "nbconvert_exporter": "python",
377
- "pygments_lexer": "ipython3",
378
- "version": "3.11.6"
379
- },
380
- "orig_nbformat": 4
381
- },
382
- "nbformat": 4,
383
- "nbformat_minor": 2
384
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LinkedIn_scraper_with_Selenium.ipynb CHANGED
@@ -18,17 +18,9 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 118,
22
  "metadata": {},
23
- "outputs": [
24
- {
25
- "name": "stdout",
26
- "output_type": "stream",
27
- "text": [
28
- "['data', 'scientist', 'artificial', 'intelligence', 'ai']\n"
29
- ]
30
- }
31
- ],
32
  "source": [
33
  "user_input_job_title = input('Enter Job Titles (with comma separated):').split()\n",
34
  "print(user_input_job_title)"
@@ -36,17 +28,9 @@
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 119,
40
  "metadata": {},
41
- "outputs": [
42
- {
43
- "name": "stdout",
44
- "output_type": "stream",
45
- "text": [
46
- "data%2C%20scientist%2C%20artificial%2C%20intelligence%2C%20ai\n"
47
- ]
48
- }
49
- ],
50
  "source": [
51
  "b = []\n",
52
  "for i in user_input_job_title:\n",
@@ -958,177 +942,9 @@
958
  },
959
  {
960
  "cell_type": "code",
961
- "execution_count": 134,
962
  "metadata": {},
963
- "outputs": [
964
- {
965
- "data": {
966
- "text/html": [
967
- "<div>\n",
968
- "<style scoped>\n",
969
- " .dataframe tbody tr th:only-of-type {\n",
970
- " vertical-align: middle;\n",
971
- " }\n",
972
- "\n",
973
- " .dataframe tbody tr th {\n",
974
- " vertical-align: top;\n",
975
- " }\n",
976
- "\n",
977
- " .dataframe thead th {\n",
978
- " text-align: right;\n",
979
- " }\n",
980
- "</style>\n",
981
- "<table border=\"1\" class=\"dataframe\">\n",
982
- " <thead>\n",
983
- " <tr style=\"text-align: right;\">\n",
984
- " <th></th>\n",
985
- " <th>Company Name</th>\n",
986
- " <th>Job Title</th>\n",
987
- " <th>Location</th>\n",
988
- " <th>Website URL</th>\n",
989
- " <th>Job Description</th>\n",
990
- " </tr>\n",
991
- " </thead>\n",
992
- " <tbody>\n",
993
- " <tr>\n",
994
- " <th>0</th>\n",
995
- " <td>nasscom</td>\n",
996
- " <td>Artificial Intelligence (AI)</td>\n",
997
- " <td>Noida, Uttar Pradesh, India</td>\n",
998
- " <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
999
- " <td>Selected Intern's Day-to-day Responsibilities ...</td>\n",
1000
- " </tr>\n",
1001
- " <tr>\n",
1002
- " <th>1</th>\n",
1003
- " <td>Deloitte</td>\n",
1004
- " <td>Data Scientist</td>\n",
1005
- " <td>Gurugram, Haryana, India</td>\n",
1006
- " <td>https://in.linkedin.com/jobs/view/data-scienti...</td>\n",
1007
- " <td>What impact will you make?\\nEvery day, your wo...</td>\n",
1008
- " </tr>\n",
1009
- " <tr>\n",
1010
- " <th>2</th>\n",
1011
- " <td>L&amp;T Technology Services</td>\n",
1012
- " <td>Data Scientist</td>\n",
1013
- " <td>Hyderabad, Telangana, India</td>\n",
1014
- " <td>https://in.linkedin.com/jobs/view/data-scienti...</td>\n",
1015
- " <td>About the Role\\nWe are looking for Data Scient...</td>\n",
1016
- " </tr>\n",
1017
- " <tr>\n",
1018
- " <th>3</th>\n",
1019
- " <td>Api Logistics</td>\n",
1020
- " <td>Artificial Intelligence (AI)</td>\n",
1021
- " <td>Gurgaon, Haryana, India</td>\n",
1022
- " <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
1023
- " <td>We will be building an AI bot, which will be a...</td>\n",
1024
- " </tr>\n",
1025
- " <tr>\n",
1026
- " <th>4</th>\n",
1027
- " <td>E2E Networks Limited</td>\n",
1028
- " <td>Artificial Intelligence (AI)</td>\n",
1029
- " <td>Delhi, India</td>\n",
1030
- " <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
1031
- " <td>As an AI intern at E2E Networks Limited, you w...</td>\n",
1032
- " </tr>\n",
1033
- " <tr>\n",
1034
- " <th>5</th>\n",
1035
- " <td>Factspan</td>\n",
1036
- " <td>Data Scientist</td>\n",
1037
- " <td>Bengaluru, Karnataka, India</td>\n",
1038
- " <td>https://in.linkedin.com/jobs/view/data-scienti...</td>\n",
1039
- " <td>Responsibilities\\nSelecting features, building...</td>\n",
1040
- " </tr>\n",
1041
- " <tr>\n",
1042
- " <th>6</th>\n",
1043
- " <td>MakeMyTrip</td>\n",
1044
- " <td>Senior/Lead Data Scientist</td>\n",
1045
- " <td>Bengaluru, Karnataka, India</td>\n",
1046
- " <td>https://in.linkedin.com/jobs/view/senior-lead-...</td>\n",
1047
- " <td>Responsibilities:\\nTrain and deploy best in cl...</td>\n",
1048
- " </tr>\n",
1049
- " <tr>\n",
1050
- " <th>7</th>\n",
1051
- " <td>Persistent Systems</td>\n",
1052
- " <td>Senior Data Scientist</td>\n",
1053
- " <td>Pune, Maharashtra, India</td>\n",
1054
- " <td>https://in.linkedin.com/jobs/view/senior-data-...</td>\n",
1055
- " <td>About Position\\n\\nWe are looking for a highly ...</td>\n",
1056
- " </tr>\n",
1057
- " <tr>\n",
1058
- " <th>8</th>\n",
1059
- " <td>CodeRoofs IT Solutions</td>\n",
1060
- " <td>Generative Artificial Intelligence (AI)</td>\n",
1061
- " <td>Sahibzada Ajit Singh Nagar, Punjab, India</td>\n",
1062
- " <td>https://in.linkedin.com/jobs/view/generative-a...</td>\n",
1063
- " <td>Selected Intern's Day-to-day Responsibilities ...</td>\n",
1064
- " </tr>\n",
1065
- " <tr>\n",
1066
- " <th>9</th>\n",
1067
- " <td>LENS Corporation</td>\n",
1068
- " <td>Artificial Intelligence Researcher</td>\n",
1069
- " <td>Gurugram, Haryana, India</td>\n",
1070
- " <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
1071
- " <td>Requirements:\\nExcellent knowledge of computer...</td>\n",
1072
- " </tr>\n",
1073
- " </tbody>\n",
1074
- "</table>\n",
1075
- "</div>"
1076
- ],
1077
- "text/plain": [
1078
- " Company Name Job Title \\\n",
1079
- "0 nasscom Artificial Intelligence (AI) \n",
1080
- "1 Deloitte Data Scientist \n",
1081
- "2 L&T Technology Services Data Scientist \n",
1082
- "3 Api Logistics Artificial Intelligence (AI) \n",
1083
- "4 E2E Networks Limited Artificial Intelligence (AI) \n",
1084
- "5 Factspan Data Scientist \n",
1085
- "6 MakeMyTrip Senior/Lead Data Scientist \n",
1086
- "7 Persistent Systems Senior Data Scientist \n",
1087
- "8 CodeRoofs IT Solutions Generative Artificial Intelligence (AI) \n",
1088
- "9 LENS Corporation Artificial Intelligence Researcher \n",
1089
- "\n",
1090
- " Location \\\n",
1091
- "0 Noida, Uttar Pradesh, India \n",
1092
- "1 Gurugram, Haryana, India \n",
1093
- "2 Hyderabad, Telangana, India \n",
1094
- "3 Gurgaon, Haryana, India \n",
1095
- "4 Delhi, India \n",
1096
- "5 Bengaluru, Karnataka, India \n",
1097
- "6 Bengaluru, Karnataka, India \n",
1098
- "7 Pune, Maharashtra, India \n",
1099
- "8 Sahibzada Ajit Singh Nagar, Punjab, India \n",
1100
- "9 Gurugram, Haryana, India \n",
1101
- "\n",
1102
- " Website URL \\\n",
1103
- "0 https://in.linkedin.com/jobs/view/artificial-i... \n",
1104
- "1 https://in.linkedin.com/jobs/view/data-scienti... \n",
1105
- "2 https://in.linkedin.com/jobs/view/data-scienti... \n",
1106
- "3 https://in.linkedin.com/jobs/view/artificial-i... \n",
1107
- "4 https://in.linkedin.com/jobs/view/artificial-i... \n",
1108
- "5 https://in.linkedin.com/jobs/view/data-scienti... \n",
1109
- "6 https://in.linkedin.com/jobs/view/senior-lead-... \n",
1110
- "7 https://in.linkedin.com/jobs/view/senior-data-... \n",
1111
- "8 https://in.linkedin.com/jobs/view/generative-a... \n",
1112
- "9 https://in.linkedin.com/jobs/view/artificial-i... \n",
1113
- "\n",
1114
- " Job Description \n",
1115
- "0 Selected Intern's Day-to-day Responsibilities ... \n",
1116
- "1 What impact will you make?\\nEvery day, your wo... \n",
1117
- "2 About the Role\\nWe are looking for Data Scient... \n",
1118
- "3 We will be building an AI bot, which will be a... \n",
1119
- "4 As an AI intern at E2E Networks Limited, you w... \n",
1120
- "5 Responsibilities\\nSelecting features, building... \n",
1121
- "6 Responsibilities:\\nTrain and deploy best in cl... \n",
1122
- "7 About Position\\n\\nWe are looking for a highly ... \n",
1123
- "8 Selected Intern's Day-to-day Responsibilities ... \n",
1124
- "9 Requirements:\\nExcellent knowledge of computer... "
1125
- ]
1126
- },
1127
- "execution_count": 134,
1128
- "metadata": {},
1129
- "output_type": "execute_result"
1130
- }
1131
- ],
1132
  "source": [
1133
  "df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])\n",
1134
  "df"
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": null,
22
  "metadata": {},
23
+ "outputs": [],
 
 
 
 
 
 
 
 
24
  "source": [
25
  "user_input_job_title = input('Enter Job Titles (with comma separated):').split()\n",
26
  "print(user_input_job_title)"
 
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": null,
32
  "metadata": {},
33
+ "outputs": [],
 
 
 
 
 
 
 
 
34
  "source": [
35
  "b = []\n",
36
  "for i in user_input_job_title:\n",
 
942
  },
943
  {
944
  "cell_type": "code",
945
+ "execution_count": null,
946
  "metadata": {},
947
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
948
  "source": [
949
  "df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])\n",
950
  "df"
README.md CHANGED
@@ -9,14 +9,111 @@ app_file: "app.py"
9
  pinned: false
10
  ---
11
 
 
12
 
 
 
13
 
 
 
 
 
 
14
 
15
- # AI-Powered Resume Analyzer and LinkedIn Scraper with Selenium
 
 
 
 
16
 
17
- **Introduction**
18
 
19
- Resume Analyzer AI" leverages the power of LLM and OpenAI as an advanced Streamlit application, specializing in thorough resume analysis. It excels at summarizing the resume, evaluating strengths, identifying weaknesses, and offering personalized improvement suggestions, while also recommending the perfect job titles. Additionally, it seamlessly employs Selenium to extract vital LinkedIn data, encompassing company names, job titles, locations, job URLs, and detailed job descriptions. In essence, Resume Analyzer AI simplifies the job-seeking journey by equipping users with comprehensive insights to elevate their career opportunities.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  <br />
22
 
@@ -38,7 +135,6 @@ Resume Analyzer AI" leverages the power of LLM and OpenAI as an advanced Streaml
38
  - Pandas
39
  - LangChain
40
  - LLM
41
- - OpenAI
42
  - Selenium
43
  - Streamlit
44
  - Hugging Face
@@ -46,31 +142,16 @@ Resume Analyzer AI" leverages the power of LLM and OpenAI as an advanced Streaml
46
 
47
  <br />
48
 
49
- **Installation**
50
-
51
- To run this project, you need to install the following packages:
52
-
53
- ```python
54
- pip install numpy
55
- pip install pandas
56
- pip install streamlit
57
- pip install streamlit_option_menu
58
- pip install streamlit_extras
59
- pip install PyPDF2
60
- pip install langchain
61
- pip install openai
62
- pip install tiktoken
63
- pip install faiss-cpu
64
- pip install selenium
65
- ```
66
-
67
- <br />
68
-
69
  **Usage**
70
 
71
  To use this project, follow these steps:
72
 
73
- `
 
 
 
 
 
74
  2. Install the required packages: ```pip install -r requirements.txt```
75
  3. Run the Streamlit app: ```streamlit run app.py```
76
  4. Access the app in your browser at ```http://localhost:8501```
@@ -80,20 +161,20 @@ To use this project, follow these steps:
80
  **Features**
81
 
82
  **Easy User Experience:**
83
- - Resume Analyzer AI makes it easy for users. You can upload your resume and enter your OpenAI API key without any hassle. The application is designed to be user-friendly so that anyone can use its powerful resume analysis features.
84
  - It also uses the PyPDF2 library to quickly extract text from your uploaded resume, which is the first step in doing a thorough analysis.
85
 
86
  **Smart Text Analysis with Langchain:**
87
  - What makes it special is how it analyzes text. It uses a smart method called the Langchain library to break long sections of text from resumes into smaller chunks, making them more meaningful.
88
  - This clever technique improves the accuracy of the resume analysis, and it gives users practical advice on how to enhance their job prospects.
89
 
90
- **Enhanced OpenAI Integration with FAISS:**
91
- - Seamlessly connecting to OpenAI services, the application establishes a secure connection using your OpenAI API key. This integration forms the basis for robust interactions, facilitating advanced analysis and efficient information retrieval.
92
  - It uses the FAISS(Facebook AI Similarity Search) library to convert both the text chunks and query text data into numerical vectors, simplifying the analysis process and enabling the retrieval of pertinent information.
93
 
94
  **Intelligent Chunk Selection and LLM:**
95
  - Utilizing similarity search, Resume Analyzer AI compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
96
- - Simultaneously, the application creates an OpenAI object, particularly an LLM (Large Language Model), using the ChatGPT 3.5 Turbo model and your OpenAI API key.
97
 
98
  **Robust Question-Answering Pipeline:**
99
  - This integration establishes a robust question-answering (QA) pipeline, making use of the load_qa_chain function, which encompasses multiple components, including the language model.
@@ -105,20 +186,12 @@ To use this project, follow these steps:
105
  - **Weakness:** AI conducts thorough analysis to pinpoint weaknesses and offers tailored solutions for transforming them into strengths, empowering job seekers.
106
  - **Suggestion:** AI provides personalized job title recommendations that align closely with the user's qualifications and resume content, facilitating an optimized job search experience.
107
 
108
- <br />
109
-
110
-
111
-
112
  <br />
113
 
114
  **Selenium-Powered LinkedIn Data Scraping:**
115
  - Utilizing Selenium and a Webdriver automated test tool, this feature enables users to input job titles, automating the data scraping process from LinkedIn. The scraped data includes crucial details such as company names, job titles, locations, URLs, and comprehensive job descriptions.
116
  - This streamlined process enables users to easily review scraped job details and apply for positions, simplifying their job search and application experience.
117
 
118
- <br />
119
-
120
-
121
-
122
  <br />
123
 
124
  **Contributing**
@@ -137,7 +210,5 @@ This project is licensed under the MIT License. Please review the LICENSE file f
137
 
138
  📧 Email: [email protected]
139
 
140
-
141
-
142
  For any further questions or inquiries, feel free to reach out. We are happy to assist you with any queries.
143
 
 
9
  pinned: false
10
  ---
11
 
12
+ # Talent Track AI
13
 
14
+ ## Overview
15
+ "Talent Track AI" leverages the power of LLM as an advanced Streamlit application, specializing in thorough resume analysis. It excels at summarizing the resume, evaluating strengths, identifying weaknesses, and offering personalized improvement suggestions, while also recommending the perfect job titles. Additionally, it seamlessly employs Selenium to extract vital LinkedIn data, encompassing company names, job titles, locations, job URLs, and detailed job descriptions. In essence, Resume Analyzer AI simplifies the job-seeking journey by equipping users with comprehensive insights to elevate their career opportunities.
16
 
17
+ ## Features
18
+ - Resume Analysis
19
+ - LinkedIn Job Scraping
20
+ - Career Guidance
21
+ - Local LLM Processing (No API keys needed)
22
 
23
+ ## System Requirements
24
+ - Python 3.8+
25
+ - 8GB+ RAM
26
+ - 10GB+ free disk space
27
+ - Multi-core CPU recommended
28
 
29
+ ## Setup Instructions
30
 
31
+ 1. Clone the repository:
32
+ ```bash
33
+ git clone https://github.com/yourusername/TalentTrackAI.git
34
+ cd TalentTrackAI
35
+ ```
36
+
37
+ 2. Install required packages:
38
+ ```bash
39
+ pip install -r requirements.txt
40
+ ```
41
+
42
+ 3. Download the model:
43
+ ```bash
44
+ python setup_model.py
45
+ ```
46
+
47
+ 4. Run the application:
48
+ ```bash
49
+ streamlit run app.py
50
+ ```
51
+
52
+ ## Key Components
53
+
54
+ ### Local LLM Integration
55
+ - Uses Llama 2 7B Chat model (quantized version)
56
+ - Processes all requests locally
57
+ - No API keys or internet required for analysis
58
+ - Complete privacy and data security
59
+
60
+ ### Resume Analysis Pipeline
61
+ - PDF text extraction
62
+ - Chunk-based processing
63
+ - Vector embeddings using HuggingFace
64
+ - FAISS for efficient similarity search
65
+ - Local LLM for analysis generation
66
+
67
+ ### LinkedIn Integration
68
+ - Automated job search
69
+ - Real-time data extraction
70
+ - Customizable search parameters
71
+ - Detailed job information retrieval
72
+
73
+ ## Usage Guide
74
+
75
+ 1. **Resume Analysis**
76
+ - Upload your resume (PDF format)
77
+ - Get instant analysis including:
78
+ - Detailed summary
79
+ - Key strengths
80
+ - Areas for improvement
81
+ - Job title suggestions
82
+
83
+ 2. **Job Search**
84
+ - Enter desired job title
85
+ - Specify location (optional)
86
+ - View matching LinkedIn listings
87
+ - Export results if needed
88
+
89
+ ## Technical Details
90
+
91
+ ### Model Specifications
92
+ - Model: Llama 2 7B Chat
93
+ - Format: GGUF (quantized)
94
+ - Context Window: 2048 tokens
95
+ - Memory Usage: ~4GB
96
+ - Processing: CPU-based
97
+
98
+ ### Key Libraries
99
+ - Streamlit
100
+ - LangChain
101
+ - FAISS
102
+ - PyPDF2
103
+ - Selenium
104
+ - HuggingFace Transformers
105
+
106
+ ## Performance Notes
107
+ - First run may take longer due to model loading
108
+ - Subsequent analyses are faster
109
+ - Processing time varies based on resume length
110
+ - RAM usage depends on concurrent operations
111
+
112
+ ## Contributing
113
+ Contributions are welcome! Please feel free to submit a Pull Request.
114
+
115
+ ## License
116
+ This project is licensed under the MIT License - see the LICENSE file for details.
117
 
118
  <br />
119
 
 
135
  - Pandas
136
  - LangChain
137
  - LLM
 
138
  - Selenium
139
  - Streamlit
140
  - Hugging Face
 
142
 
143
  <br />
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  **Usage**
146
 
147
  To use this project, follow these steps:
148
 
149
+ 1. Clone the repository:
150
+ ```bash
151
+ git clone https://github.com/yourusername/TalentTrackAI.git
152
+ cd TalentTrackAI
153
+ ```
154
+
155
  2. Install the required packages: ```pip install -r requirements.txt```
156
  3. Run the Streamlit app: ```streamlit run app.py```
157
  4. Access the app in your browser at ```http://localhost:8501```
 
161
  **Features**
162
 
163
  **Easy User Experience:**
164
+ - Resume Analyzer AI makes it easy for users. You can upload your resume without any hassle. The application is designed to be user-friendly so that anyone can use its powerful resume analysis features.
165
  - It also uses the PyPDF2 library to quickly extract text from your uploaded resume, which is the first step in doing a thorough analysis.
166
 
167
  **Smart Text Analysis with Langchain:**
168
  - What makes it special is how it analyzes text. It uses a smart method called the Langchain library to break long sections of text from resumes into smaller chunks, making them more meaningful.
169
  - This clever technique improves the accuracy of the resume analysis, and it gives users practical advice on how to enhance their job prospects.
170
 
171
+ **Enhanced LLM Integration with FAISS:**
172
+ - The application uses local LLM processing for all analysis tasks, ensuring privacy and eliminating the need for API keys.
173
  - It uses the FAISS(Facebook AI Similarity Search) library to convert both the text chunks and query text data into numerical vectors, simplifying the analysis process and enabling the retrieval of pertinent information.
174
 
175
  **Intelligent Chunk Selection and LLM:**
176
  - Utilizing similarity search, Resume Analyzer AI compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
177
+ - The application processes all requests using a local LLM model, ensuring complete privacy and data security.
178
 
179
  **Robust Question-Answering Pipeline:**
180
  - This integration establishes a robust question-answering (QA) pipeline, making use of the load_qa_chain function, which encompasses multiple components, including the language model.
 
186
  - **Weakness:** AI conducts thorough analysis to pinpoint weaknesses and offers tailored solutions for transforming them into strengths, empowering job seekers.
187
  - **Suggestion:** AI provides personalized job title recommendations that align closely with the user's qualifications and resume content, facilitating an optimized job search experience.
188
 
 
 
 
 
189
  <br />
190
 
191
  **Selenium-Powered LinkedIn Data Scraping:**
192
  - Utilizing Selenium and a Webdriver automated test tool, this feature enables users to input job titles, automating the data scraping process from LinkedIn. The scraped data includes crucial details such as company names, job titles, locations, URLs, and comprehensive job descriptions.
193
  - This streamlined process enables users to easily review scraped job details and apply for positions, simplifying their job search and application experience.
194
 
 
 
 
 
195
  <br />
196
 
197
  **Contributing**
 
210
 
211
  📧 Email: [email protected]
212
 
 
 
213
  For any further questions or inquiries, feel free to reach out. We are happy to assist you with any queries.
214
 
app.py CHANGED
@@ -6,9 +6,9 @@ from streamlit_option_menu import option_menu
6
  from streamlit_extras.add_vertical_space import add_vertical_space
7
  from PyPDF2 import PdfReader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
- from langchain.embeddings.openai import OpenAIEmbeddings
10
- from langchain.vectorstores import FAISS
11
- from langchain.chat_models import ChatOpenAI
12
  from langchain.chains.question_answering import load_qa_chain
13
  from langchain.memory import ConversationBufferMemory
14
  from langchain.chains import ConversationChain
@@ -16,22 +16,37 @@ from selenium import webdriver
16
  from selenium.webdriver.common.by import By
17
  from selenium.webdriver.common.keys import Keys
18
  from selenium.common.exceptions import NoSuchElementException
 
19
 
20
  import warnings
21
  warnings.filterwarnings('ignore')
22
 
23
- # Get OpenAI API key from secrets
24
- def get_openai_api_key():
25
  try:
26
- key = st.secrets["api_keys"]["openai_api_key"]
27
- return key
28
- except Exception:
29
- if "openai_api_key" in st.session_state:
30
- return st.session_state["openai_api_key"]
31
-
32
- st.error("OpenAI API key not found. Please check your secrets configuration.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  return None
34
-
35
 
36
  def streamlit_config():
37
  st.set_page_config(page_title='Talent Track By AI', layout="wide")
@@ -52,7 +67,7 @@ def process_resume(pdf):
52
  with st.spinner('Processing...'):
53
  pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
54
  summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
55
- summary = resume_analyzer.openai(chunks=pdf_chunks, analyze=summary_prompt)
56
  if summary:
57
  st.session_state['resume_data'] = {
58
  'pdf': pdf,
@@ -77,19 +92,41 @@ class resume_analyzer:
77
  chunks = text_splitter.split_text(text=text)
78
  return chunks
79
 
80
- def openai(chunks, analyze):
81
- openai_api_key = get_openai_api_key()
82
- if not openai_api_key:
83
- st.error("OpenAI API key not found. Please check your secrets configuration.")
84
- return None
85
 
86
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
87
- vectorstores = FAISS.from_texts(chunks, embedding=embeddings)
88
- docs = vectorstores.similarity_search(query=analyze, k=3)
89
- llm = ChatOpenAI(model='gpt-3.5-turbo', openai_api_key=openai_api_key)
90
- chain = load_qa_chain(llm=llm, chain_type='stuff')
91
- response = chain.run(input_documents=docs, question=analyze)
92
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  def summary_prompt(query_with_chunks):
95
  query = f''' need to detailed summarization of below resume and finally conclude them
@@ -150,13 +187,13 @@ class resume_analyzer:
150
  if pdf is not None:
151
  if process_resume(pdf):
152
  strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
153
- strength = resume_analyzer.openai(chunks=st.session_state['resume_data']['chunks'], analyze=strength_prompt)
154
  if strength:
155
  st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
156
  st.write(strength)
157
  else:
158
  strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
159
- strength = resume_analyzer.openai(chunks=st.session_state['resume_data']['chunks'], analyze=strength_prompt)
160
  if strength:
161
  st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
162
  st.write(strength)
@@ -168,6 +205,7 @@ class resume_analyzer:
168
  """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
169
  '''
170
  return query
 
171
  def resume_weakness():
172
  with st.form(key='Weakness'):
173
  add_vertical_space(1)
@@ -187,13 +225,13 @@ class resume_analyzer:
187
  if pdf is not None:
188
  if process_resume(pdf):
189
  weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
190
- weakness = resume_analyzer.openai(chunks=st.session_state['resume_data']['chunks'], analyze=weakness_prompt)
191
  if weakness:
192
  st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
193
  st.write(weakness)
194
  else:
195
  weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
196
- weakness = resume_analyzer.openai(chunks=st.session_state['resume_data']['chunks'], analyze=weakness_prompt)
197
  if weakness:
198
  st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
199
  st.write(weakness)
@@ -225,186 +263,327 @@ class resume_analyzer:
225
  if pdf is not None:
226
  if process_resume(pdf):
227
  job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
228
- job_title = resume_analyzer.openai(chunks=st.session_state['resume_data']['chunks'], analyze=job_title_prompt)
229
  if job_title:
230
  st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
231
  st.write(job_title)
232
  else:
233
  job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
234
- job_title = resume_analyzer.openai(chunks=st.session_state['resume_data']['chunks'], analyze=job_title_prompt)
235
  if job_title:
236
  st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
237
  st.write(job_title)
238
 
239
  class linkedin_scraper:
 
240
  def webdriver_setup():
241
- options = webdriver.ChromeOptions()
242
- options.add_argument('--headless')
243
- options.add_argument('--no-sandbox')
244
- options.add_argument('--disable-dev-shm-usage')
245
- driver = webdriver.Chrome(options=options)
246
- driver.maximize_window()
247
- return driver
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
 
249
  def get_userinput():
250
- add_vertical_space(2)
251
- with st.form(key='linkedin_scarp'):
252
- add_vertical_space(1)
253
- col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium')
254
- with col1:
255
- job_title_input = st.text_input(label='Job Title')
256
- job_title_input = job_title_input.split(',')
257
- with col2:
258
- job_location = st.text_input(label='Job Location', value='India')
259
- with col3:
260
- job_count = st.number_input(label='Job Count', min_value=1, value=1, step=1)
261
- add_vertical_space(1)
262
- submit = st.form_submit_button(label='Submit')
263
- add_vertical_space(1)
264
- return job_title_input, job_location, job_count, submit
265
 
 
266
  def build_url(job_title, job_location):
267
- b = []
268
- for i in job_title:
269
- x = i.split()
270
- y = '%20'.join(x)
271
- b.append(y)
272
- job_title = '%2C%20'.join(b)
273
- link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location={job_location}&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0"
274
- return link
275
-
276
- def open_link(driver, link):
277
- while True:
278
- try:
279
- driver.get(link)
280
- driver.implicitly_wait(5)
281
- time.sleep(3)
282
- driver.find_element(by=By.CSS_SELECTOR, value='span.switcher-tabs__placeholder-text.m-auto')
283
- return
284
- except NoSuchElementException:
285
- continue
286
-
287
- def link_open_scrolldown(driver, link, job_count):
288
- linkedin_scraper.open_link(driver, link)
289
- for i in range(0,job_count):
290
- body = driver.find_element(by=By.TAG_NAME, value='body')
291
- body.send_keys(Keys.PAGE_UP)
292
- try:
293
- driver.find_element(by=By.CSS_SELECTOR,
294
- value="button[data-tracking-control-name='public_jobs_contextual-sign-in-modal_modal_dismiss']>icon>svg").click()
295
- except:
296
- pass
297
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
298
- driver.implicitly_wait(2)
299
- try:
300
- x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
301
- driver.implicitly_wait(5)
302
- except:
303
- pass
304
-
305
- def job_title_filter(scrap_job_title, user_job_title_input):
306
- user_input = [i.lower().strip() for i in user_job_title_input]
307
- scrap_title = [i.lower().strip() for i in [scrap_job_title]]
308
- confirmation_count = 0
309
- for i in user_input:
310
- if all(j in scrap_title[0] for j in i.split()):
311
- confirmation_count += 1
312
- if confirmation_count > 0:
313
- return scrap_job_title
314
- else:
315
- return np.nan
316
-
317
- def scrap_company_data(driver, job_title_input, job_location):
318
- company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]')
319
- company_name = [i.text for i in company]
320
- location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
321
- company_location = [i.text for i in location]
322
- title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
323
- job_title = [i.text for i in title]
324
- url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
325
- website_url = [i.get_attribute('href') for i in url]
326
- df = pd.DataFrame(company_name, columns=['Company Name'])
327
- df['Job Title'] = pd.DataFrame(job_title)
328
- df['Location'] = pd.DataFrame(company_location)
329
- df['Website URL'] = pd.DataFrame(website_url)
330
- df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scraper.job_title_filter(x, job_title_input))
331
- df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
332
- df = df.dropna()
333
- df.reset_index(drop=True, inplace=True)
334
- return df
335
-
336
- def scrap_job_description(driver, df, job_count):
337
- website_url = df['Website URL'].tolist()
338
- job_description = []
339
- description_count = 0
340
- for i in range(0, len(website_url)):
341
- try:
342
- linkedin_scraper.open_link(driver, website_url[i])
343
- driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
344
- driver.implicitly_wait(5)
345
- time.sleep(1)
346
- description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
347
- data = [i.text for i in description][0]
348
- if len(data.strip()) > 0 and data not in job_description:
349
- job_description.append(data)
350
- description_count += 1
351
- else:
352
- job_description.append('Description Not Available')
353
- except:
354
- job_description.append('Description Not Available')
355
- if description_count == job_count:
356
- break
357
- df = df.iloc[:len(job_description), :]
358
- df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
359
- df['Job Description'] = df['Job Description'].apply(lambda x: np.nan if x=='Description Not Available' else x)
360
- df = df.dropna()
361
- df.reset_index(drop=True, inplace=True)
362
- return df
363
-
364
- def display_data_userinterface(df_final):
365
- add_vertical_space(1)
366
- if len(df_final) > 0:
367
- for i in range(0, len(df_final)):
368
- st.markdown(f'<h3 style="color: orange;">Job Posting Details : {i+1}</h3>', unsafe_allow_html=True)
369
- st.write(f"Company Name : {df_final.iloc[i,0]}")
370
- st.write(f"Job Title : {df_final.iloc[i,1]}")
371
- st.write(f"Location : {df_final.iloc[i,2]}")
372
- st.write(f"Website URL : {df_final.iloc[i,3]}")
373
- with st.expander(label='Job Desription'):
374
- st.write(df_final.iloc[i, 4])
375
- add_vertical_space(3)
376
- else:
377
- st.markdown(f'<h5 style="text-align: center;color: orange;">No Matching Jobs Found</h5>',
378
- unsafe_allow_html=True)
379
 
380
- def main():
381
- driver = None
382
  try:
383
- job_title_input, job_location, job_count, submit = linkedin_scraper.get_userinput()
384
- add_vertical_space(2)
385
- if submit:
386
- if job_title_input != [] and job_location != '':
387
- with st.spinner('Chrome Webdriver Setup Initializing...'):
388
- driver = linkedin_scraper.webdriver_setup()
389
- with st.spinner('Loading More Job Listings...'):
390
- link = linkedin_scraper.build_url(job_title_input, job_location)
391
- linkedin_scraper.link_open_scrolldown(driver, link, job_count)
392
- with st.spinner('scraping Job Details...'):
393
- df = linkedin_scraper.scrap_company_data(driver, job_title_input, job_location)
394
- df_final = linkedin_scraper.scrap_job_description(driver, df, job_count)
395
- linkedin_scraper.display_data_userinterface(df_final)
396
- elif job_title_input == []:
397
- st.markdown(f'<h5 style="text-align: center;color: orange;">Job Title is Empty</h5>',
398
- unsafe_allow_html=True)
399
- elif job_location == '':
400
- st.markdown(f'<h5 style="text-align: center;color: orange;">Job Location is Empty</h5>',
401
- unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  except Exception as e:
403
- add_vertical_space(2)
404
- st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
405
- finally:
406
- if driver:
407
- driver.quit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
  class career_chatbot:
410
  def initialize_session_state():
@@ -432,7 +611,7 @@ class career_chatbot:
432
  try:
433
  pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
434
  summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
435
- summary = resume_analyzer.openai(chunks=pdf_chunks, analyze=summary_prompt)
436
  if summary:
437
  st.session_state.resume_data = summary
438
  st.success("Resume processed successfully! The chatbot now has context from your resume.")
@@ -467,11 +646,6 @@ Your responses should be helpful, specific, and actionable. Use bullet points fo
467
  return base_prompt
468
 
469
  def process_user_input():
470
- openai_api_key = get_openai_api_key()
471
- if not openai_api_key:
472
- st.error("OpenAI API key not found. Please check your secrets configuration.")
473
- return
474
-
475
  # Get user input and clear the input box
476
  user_input = st.chat_input("Ask me about careers, job search, or resume advice...")
477
 
@@ -486,7 +660,9 @@ Your responses should be helpful, specific, and actionable. Use bullet points fo
486
  # Generate response using the chatbot
487
  try:
488
  with st.spinner("Thinking..."):
489
- llm = ChatOpenAI(model='gpt-3.5-turbo', openai_api_key=openai_api_key)
 
 
490
 
491
  # Update conversation memory
492
  st.session_state.conversation_memory.chat_memory.add_user_message(user_input)
 
6
  from streamlit_extras.add_vertical_space import add_vertical_space
7
  from PyPDF2 import PdfReader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain_community.llms import LlamaCpp
12
  from langchain.chains.question_answering import load_qa_chain
13
  from langchain.memory import ConversationBufferMemory
14
  from langchain.chains import ConversationChain
 
16
  from selenium.webdriver.common.by import By
17
  from selenium.webdriver.common.keys import Keys
18
  from selenium.common.exceptions import NoSuchElementException
19
+ import os
20
 
21
  import warnings
22
  warnings.filterwarnings('ignore')
23
 
24
+ def initialize_llm():
25
+ """Initialize the local LLM model with optimized parameters for better performance"""
26
  try:
27
+ model_path = "models/llama-2-7b-chat.Q4_K_M.gguf"
28
+ if not os.path.exists(model_path):
29
+ st.error(f"Model file not found at {model_path}")
30
+ return None
31
+
32
+ st.info("Loading LLM model... This may take a few moments.")
33
+ llm = LlamaCpp(
34
+ model_path=model_path,
35
+ temperature=0.7,
36
+ max_tokens=2000,
37
+ top_p=0.9,
38
+ verbose=True,
39
+ n_ctx=2048,
40
+ n_threads=4,
41
+ n_batch=512,
42
+ n_gpu_layers=0,
43
+ f16_kv=True,
44
+ seed=42
45
+ )
46
+ return llm
47
+ except Exception as e:
48
+ st.error(f"Error initializing LLM: {str(e)}")
49
  return None
 
50
 
51
  def streamlit_config():
52
  st.set_page_config(page_title='Talent Track By AI', layout="wide")
 
67
  with st.spinner('Processing...'):
68
  pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
69
  summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
70
+ summary = resume_analyzer.local_llm(chunks=pdf_chunks, analyze=summary_prompt)
71
  if summary:
72
  st.session_state['resume_data'] = {
73
  'pdf': pdf,
 
92
  chunks = text_splitter.split_text(text=text)
93
  return chunks
94
 
95
+ def local_llm(chunks, analyze):
96
+ try:
97
+ # Initialize embeddings with error handling
98
+ st.info("Initializing embeddings...")
99
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
100
 
101
+ # Create vector store with error handling
102
+ st.info("Creating vector store...")
103
+ text_splitter = RecursiveCharacterTextSplitter(
104
+ chunk_size=500,
105
+ chunk_overlap=50,
106
+ length_function=len
107
+ )
108
+ split_chunks = []
109
+ for chunk in chunks:
110
+ split_chunks.extend(text_splitter.split_text(chunk))
111
+
112
+ vectorstores = FAISS.from_texts(split_chunks, embedding=embeddings)
113
+ docs = vectorstores.similarity_search(query=analyze, k=3)
114
+
115
+ # Get LLM instance
116
+ st.info("Getting LLM instance...")
117
+ llm = initialize_llm()
118
+ if not llm:
119
+ st.error("Failed to initialize LLM")
120
+ return None
121
+
122
+ # Create and run the chain
123
+ st.info("Running analysis...")
124
+ chain = load_qa_chain(llm=llm, chain_type='stuff')
125
+ response = chain.run(input_documents=docs, question=analyze)
126
+ return response
127
+ except Exception as e:
128
+ st.error(f"Error in LLM processing: {str(e)}")
129
+ return None
130
 
131
  def summary_prompt(query_with_chunks):
132
  query = f''' need to detailed summarization of below resume and finally conclude them
 
187
  if pdf is not None:
188
  if process_resume(pdf):
189
  strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
190
+ strength = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=strength_prompt)
191
  if strength:
192
  st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
193
  st.write(strength)
194
  else:
195
  strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
196
+ strength = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=strength_prompt)
197
  if strength:
198
  st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
199
  st.write(strength)
 
205
  """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
206
  '''
207
  return query
208
+
209
  def resume_weakness():
210
  with st.form(key='Weakness'):
211
  add_vertical_space(1)
 
225
  if pdf is not None:
226
  if process_resume(pdf):
227
  weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
228
+ weakness = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=weakness_prompt)
229
  if weakness:
230
  st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
231
  st.write(weakness)
232
  else:
233
  weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
234
+ weakness = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=weakness_prompt)
235
  if weakness:
236
  st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
237
  st.write(weakness)
 
263
  if pdf is not None:
264
  if process_resume(pdf):
265
  job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
266
+ job_title = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=job_title_prompt)
267
  if job_title:
268
  st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
269
  st.write(job_title)
270
  else:
271
  job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
272
+ job_title = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=job_title_prompt)
273
  if job_title:
274
  st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
275
  st.write(job_title)
276
 
277
  class linkedin_scraper:
278
+ @staticmethod
279
  def webdriver_setup():
280
+ """Set up Chrome webdriver with enhanced anti-detection measures"""
281
+ try:
282
+ options = webdriver.ChromeOptions()
283
+
284
+ # Basic options
285
+ options.add_argument('--no-sandbox')
286
+ options.add_argument('--disable-dev-shm-usage')
287
+ options.add_argument('--disable-gpu')
288
+ options.add_argument('--disable-extensions')
289
+ options.add_argument('--disable-notifications')
290
+
291
+ # Window size and display
292
+ options.add_argument('--window-size=1920,1080')
293
+ options.add_argument('--start-maximized')
294
+
295
+ # Enhanced privacy and security settings
296
+ options.add_argument('--disable-blink-features=AutomationControlled')
297
+ options.add_argument('--disable-web-security')
298
+ options.add_argument('--allow-running-insecure-content')
299
+ options.add_argument('--ignore-certificate-errors')
300
+ options.add_argument('--ignore-ssl-errors')
301
+
302
+ # Random user agent
303
+ user_agents = [
304
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
305
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
306
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
307
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Edge/120.0.0.0'
308
+ ]
309
+ user_agent = np.random.choice(user_agents)
310
+ options.add_argument(f'--user-agent={user_agent}')
311
+
312
+ # Experimental options
313
+ options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
314
+ options.add_experimental_option('useAutomationExtension', False)
315
+
316
+ # Create driver
317
+ driver = webdriver.Chrome(options=options)
318
+
319
+ # Additional JavaScript to avoid detection
320
+ driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": user_agent})
321
+
322
+ # Modify navigator properties
323
+ driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
324
+ driver.execute_script("Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']})")
325
+ driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]})")
326
+
327
+ # Set viewport and window size
328
+ driver.execute_cdp_cmd('Emulation.setDeviceMetricsOverride', {
329
+ 'mobile': False,
330
+ 'width': 1920,
331
+ 'height': 1080,
332
+ 'deviceScaleFactor': 1,
333
+ })
334
+
335
+ return driver
336
+
337
+ except Exception as e:
338
+ st.error(f"Failed to initialize Chrome driver: {str(e)}")
339
+ st.info("Please ensure Chrome browser is installed and updated to the latest version")
340
+ return None
341
 
342
+ @staticmethod
343
  def get_userinput():
344
+ """Get job search parameters from user"""
345
+ job_title = st.text_input('Enter Job Titles (comma separated):', 'Data Scientist')
346
+ job_location = st.text_input('Enter Job Location:', 'India')
347
+ job_count = st.number_input('Enter Number of Jobs to Scrape (max 100):', min_value=1, max_value=100, value=2)
348
+ return job_title.split(','), job_location, job_count
 
 
 
 
 
 
 
 
 
 
349
 
350
+ @staticmethod
351
  def build_url(job_title, job_location):
352
+ """Build LinkedIn search URL"""
353
+ formatted_title = '%20'.join(job_title[0].strip().split()) # Use first job title only
354
+ formatted_location = '%20'.join(job_location.split())
355
+ return f"https://www.linkedin.com/jobs/search?keywords={formatted_title}&location={formatted_location}"
356
+
357
+ @staticmethod
358
+ def scroll_page(driver, job_count):
359
+ """Scroll page to load more jobs"""
360
+ try:
361
+ st.info("Scrolling page to load more jobs...")
362
+ # Calculate number of scrolls needed (25 jobs per scroll approximately)
363
+ scrolls = min(job_count // 25 + 1, 4)
364
+
365
+ for i in range(scrolls):
366
+ st.info(f"Scroll attempt {i+1}/{scrolls}")
367
+ # Scroll to bottom
368
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
369
+ time.sleep(4) # Wait for content to load
370
+
371
+ try:
372
+ # Try to click "Show more" button if present
373
+ show_more_buttons = driver.find_elements(by=By.CSS_SELECTOR, value=[
374
+ "button.infinite-scroller__show-more-button",
375
+ "button.see-more-jobs",
376
+ "button[data-tracking-control-name='infinite-scroller_show-more']"
377
+ ])
378
+
379
+ for button in show_more_buttons:
380
+ if button.is_displayed():
381
+ driver.execute_script("arguments[0].click();", button)
382
+ time.sleep(3) # Wait for new content
383
+ break
384
+
385
+ except Exception as e:
386
+ st.warning(f"Could not find or click 'Show more' button: {str(e)}")
387
+
388
+ # Additional wait after last scroll
389
+ if i == scrolls - 1:
390
+ time.sleep(5)
391
+
392
+ except Exception as e:
393
+ st.warning(f"Error during page scrolling: {str(e)}")
394
+
395
+ @staticmethod
396
+ def scrape_jobs(driver, job_count):
397
+ """Scrape job listings from LinkedIn with updated selectors"""
398
+ jobs_data = {
399
+ 'company_name': [],
400
+ 'job_title': [],
401
+ 'location': [],
402
+ 'job_url': []
403
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
 
 
405
  try:
406
+ # Wait for job cards to load with explicit wait
407
+ st.info("Waiting for page to load...")
408
+ time.sleep(8) # Increased initial wait time
409
+
410
+ # Try multiple selectors for job cards
411
+ selectors = [
412
+ "div.job-card-container",
413
+ "li.jobs-search-results__list-item",
414
+ "div.base-card",
415
+ "div.job-search-card",
416
+ "li.jobs-search-results-list__list-item"
417
+ ]
418
+
419
+ job_cards = []
420
+ for selector in selectors:
421
+ try:
422
+ job_cards = driver.find_elements(by=By.CSS_SELECTOR, value=selector)
423
+ if job_cards:
424
+ st.success(f"Found job cards using selector: {selector}")
425
+ break
426
+ except:
427
+ continue
428
+
429
+ if not job_cards:
430
+ st.error("Could not find any job listings. LinkedIn might have updated their page structure.")
431
+ return pd.DataFrame(jobs_data)
432
+
433
+ # Limit to requested number
434
+ job_cards = job_cards[:job_count]
435
+
436
+ st.info(f"Processing {len(job_cards)} job cards...")
437
+
438
+ for card in job_cards:
439
+ try:
440
+ # Company name selectors
441
+ company_selectors = [
442
+ ".job-card-container__company-name",
443
+ ".base-search-card__subtitle",
444
+ ".company-name",
445
+ "span[data-tracking-control-name='public_jobs_company_name']",
446
+ ".job-card-container__primary-description"
447
+ ]
448
+
449
+ # Job title selectors
450
+ title_selectors = [
451
+ ".job-card-container__title",
452
+ ".base-search-card__title",
453
+ ".job-card-list__title",
454
+ "h3.base-search-card__title",
455
+ ".job-search-card__title"
456
+ ]
457
+
458
+ # Location selectors
459
+ location_selectors = [
460
+ ".job-card-container__metadata-item",
461
+ ".base-search-card__metadata",
462
+ ".job-search-card__location",
463
+ "span[data-tracking-control-name='public_jobs_job-location']",
464
+ ".job-card-container__metadata-wrapper"
465
+ ]
466
+
467
+ # Try to find company name
468
+ company = None
469
+ for selector in company_selectors:
470
+ try:
471
+ element = card.find_element(by=By.CSS_SELECTOR, value=selector)
472
+ company = element.text.strip()
473
+ if company:
474
+ break
475
+ except:
476
+ continue
477
+
478
+ # Try to find job title
479
+ title = None
480
+ for selector in title_selectors:
481
+ try:
482
+ element = card.find_element(by=By.CSS_SELECTOR, value=selector)
483
+ title = element.text.strip()
484
+ if title:
485
+ break
486
+ except:
487
+ continue
488
+
489
+ # Try to find location
490
+ location = None
491
+ for selector in location_selectors:
492
+ try:
493
+ element = card.find_element(by=By.CSS_SELECTOR, value=selector)
494
+ location = element.text.strip()
495
+ if location:
496
+ break
497
+ except:
498
+ continue
499
+
500
+ # Try to find URL
501
+ try:
502
+ url = card.find_element(by=By.CSS_SELECTOR, value="a").get_attribute("href")
503
+ except:
504
+ try:
505
+ url = card.find_element(by=By.CSS_SELECTOR, value="a.base-card__full-link").get_attribute("href")
506
+ except:
507
+ url = None
508
+
509
+ if all([company, title, location, url]):
510
+ jobs_data['company_name'].append(company)
511
+ jobs_data['job_title'].append(title)
512
+ jobs_data['location'].append(location)
513
+ jobs_data['job_url'].append(url)
514
+ st.success(f"Successfully scraped job: {title} at {company}")
515
+
516
+ except Exception as e:
517
+ st.warning(f"Failed to scrape a job card: {str(e)}")
518
+ continue
519
+
520
+ if not jobs_data['company_name']:
521
+ st.error("Could not extract any job information. LinkedIn might be blocking automated access.")
522
+
523
  except Exception as e:
524
+ st.error(f"Error during job scraping: {str(e)}")
525
+
526
+ return pd.DataFrame(jobs_data)
527
+
528
+ @staticmethod
529
+ def display_results(df):
530
+ """Display scraped job results"""
531
+ if df.empty:
532
+ st.error("No jobs were found. Please try again with different search parameters.")
533
+ return
534
+
535
+ st.markdown('### 📊 Scraped Job Listings')
536
+
537
+ # Display summary statistics
538
+ st.markdown(f"**Total Jobs Found:** {len(df)}")
539
+ st.markdown(f"**Unique Companies:** {df['company_name'].nunique()}")
540
+ st.markdown(f"**Locations Covered:** {df['location'].nunique()}")
541
+
542
+ # Display the dataframe
543
+ st.dataframe(df)
544
+
545
+ # Add download button
546
+ csv = df.to_csv(index=False).encode('utf-8')
547
+ st.download_button(
548
+ "Download Results as CSV",
549
+ csv,
550
+ "linkedin_jobs.csv",
551
+ "text/csv",
552
+ key='download-csv'
553
+ )
554
+
555
+ def main():
556
+ st.markdown('## 🔍 LinkedIn Job Search')
557
+
558
+ job_titles, job_location, job_count = linkedin_scraper.get_userinput()
559
+
560
+ if st.button('Start Scraping'):
561
+ with st.spinner('Scraping LinkedIn jobs...'):
562
+ try:
563
+ driver = linkedin_scraper.webdriver_setup()
564
+ if driver is None:
565
+ return
566
+
567
+ url = linkedin_scraper.build_url(job_titles, job_location)
568
+ st.info(f"Searching: {url}")
569
+
570
+ driver.get(url)
571
+ time.sleep(5) # Increased initial wait time
572
+
573
+ linkedin_scraper.scroll_page(driver, job_count)
574
+ df = linkedin_scraper.scrape_jobs(driver, job_count)
575
+
576
+ driver.quit()
577
+
578
+ if not df.empty:
579
+ linkedin_scraper.display_results(df)
580
+ else:
581
+ st.error('No jobs found matching your criteria. Try different search terms or location.')
582
+
583
+ except Exception as e:
584
+ st.error(f'An error occurred while scraping: {str(e)}')
585
+ if 'driver' in locals():
586
+ driver.quit()
587
 
588
  class career_chatbot:
589
  def initialize_session_state():
 
611
  try:
612
  pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
613
  summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
614
+ summary = resume_analyzer.local_llm(chunks=pdf_chunks, analyze=summary_prompt)
615
  if summary:
616
  st.session_state.resume_data = summary
617
  st.success("Resume processed successfully! The chatbot now has context from your resume.")
 
646
  return base_prompt
647
 
648
  def process_user_input():
 
 
 
 
 
649
  # Get user input and clear the input box
650
  user_input = st.chat_input("Ask me about careers, job search, or resume advice...")
651
 
 
660
  # Generate response using the chatbot
661
  try:
662
  with st.spinner("Thinking..."):
663
+ llm = initialize_llm()
664
+ if not llm:
665
+ raise Exception("Failed to initialize LLM")
666
 
667
  # Update conversation memory
668
  st.session_state.conversation_memory.chat_memory.add_user_message(user_input)
config.py DELETED
File without changes
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- numpy
2
- pandas
3
- streamlit
4
- streamlit_option_menu
5
- streamlit_extras
6
- PyPDF2
7
- langchain==0.0.302
8
- openai
9
- tiktoken
10
- faiss-cpu
11
- selenium
 
1
+ streamlit==1.32.0
2
+ streamlit-option-menu==0.3.12
3
+ streamlit-extras==0.3.1
4
+ PyPDF2==3.0.1
5
+ langchain==0.1.12
6
+ faiss-cpu==1.8.0
7
+ sentence-transformers==2.5.1
8
+ llama-cpp-python==0.2.56
9
+ selenium==4.18.1
10
+ pandas==2.2.1
11
+ numpy==1.26.4
setup_model.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from tqdm import tqdm
4
+
5
+ def download_file(url, filename):
6
+ response = requests.get(url, stream=True)
7
+ total_size = int(response.headers.get('content-length', 0))
8
+
9
+ with open(filename, 'wb') as file, tqdm(
10
+ desc=filename,
11
+ total=total_size,
12
+ unit='iB',
13
+ unit_scale=True,
14
+ unit_divisor=1024,
15
+ ) as pbar:
16
+ for data in response.iter_content(chunk_size=1024):
17
+ size = file.write(data)
18
+ pbar.update(size)
19
+
20
+ def main():
21
+ # Create models directory if it doesn't exist
22
+ if not os.path.exists('models'):
23
+ os.makedirs('models')
24
+
25
+ # Model URL (using a smaller model for faster download)
26
+ model_url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
27
+ model_path = "models/llama-2-7b-chat.gguf"
28
+
29
+ print("Starting model download...")
30
+ print("This may take a while depending on your internet connection.")
31
+ print("The model is about 4GB in size.")
32
+
33
+ try:
34
+ download_file(model_url, model_path)
35
+ print("\nModel downloaded successfully!")
36
+ print(f"Model saved to: {model_path}")
37
+ except Exception as e:
38
+ print(f"Error downloading model: {str(e)}")
39
+ print("Please try downloading manually from: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF")
40
+ print("And place the model file in the 'models' directory.")
41
+
42
+ if __name__ == "__main__":
43
+ main()
temp_fix.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def local_llm(chunks, analyze):
2
+ try:
3
+ # Initialize embeddings with error handling
4
+ st.info("Initializing embeddings...")
5
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
6
+
7
+ # Create vector store with error handling
8
+ st.info("Creating vector store...")
9
+ text_splitter = RecursiveCharacterTextSplitter(
10
+ chunk_size=500,
11
+ chunk_overlap=50,
12
+ length_function=len
13
+ )
14
+ split_chunks = []
15
+ for chunk in chunks:
16
+ split_chunks.extend(text_splitter.split_text(chunk))
17
+
18
+ vectorstores = FAISS.from_texts(split_chunks, embedding=embeddings)
19
+ docs = vectorstores.similarity_search(query=analyze, k=3)
20
+
21
+ # Get LLM instance
22
+ st.info("Getting LLM instance...")
23
+ llm = initialize_llm()
24
+ if not llm:
25
+ st.error("Failed to initialize LLM")
26
+ return None
27
+
28
+ # Create and run the chain
29
+ st.info("Running analysis...")
30
+ chain = load_qa_chain(llm=llm, chain_type='stuff')
31
+ response = chain.run(input_documents=docs, question=analyze)
32
+ return response
33
+ except Exception as e:
34
+ st.error(f"Error in LLM processing: {str(e)}")
35
+ return None