Spaces:
Runtime error
Runtime error
Updated
Browse files- .gitignore +1 -3
- .streamlit/config.toml +12 -3
- AI_Powered_Resume_Analyzer.ipynb +0 -384
- LinkedIn_scraper_with_Selenium.ipynb +6 -190
- README.md +110 -39
- app.py +375 -199
- config.py +0 -0
- requirements.txt +11 -11
- setup_model.py +43 -0
- temp_fix.py +35 -0
.gitignore
CHANGED
@@ -1,3 +1 @@
|
|
1 |
-
.
|
2 |
-
.streamlit/secrets.toml
|
3 |
-
chat.py
|
|
|
1 |
+
DOCUMENTATION.md
|
|
|
|
.streamlit/config.toml
CHANGED
@@ -1,7 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
[theme]
|
2 |
base="dark"
|
3 |
primaryColor="#FF4B4B"
|
4 |
-
backgroundColor="#
|
5 |
-
secondaryBackgroundColor="#
|
6 |
-
textColor="#
|
7 |
font="sans serif"
|
|
|
1 |
+
[server]
|
2 |
+
port = 8501
|
3 |
+
address = "localhost"
|
4 |
+
maxUploadSize = 5
|
5 |
+
|
6 |
+
[browser]
|
7 |
+
serverAddress = "localhost"
|
8 |
+
serverPort = 8501
|
9 |
+
|
10 |
[theme]
|
11 |
base="dark"
|
12 |
primaryColor="#FF4B4B"
|
13 |
+
backgroundColor="#FFFFFF"
|
14 |
+
secondaryBackgroundColor="#F0F2F6"
|
15 |
+
textColor="#262730"
|
16 |
font="sans serif"
|
AI_Powered_Resume_Analyzer.ipynb
DELETED
@@ -1,384 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 67,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"from PyPDF2 import PdfReader\n",
|
10 |
-
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
11 |
-
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
12 |
-
"from langchain.vectorstores import FAISS\n",
|
13 |
-
"from langchain.chat_models import ChatOpenAI\n",
|
14 |
-
"from langchain.chains.question_answering import load_qa_chain\n",
|
15 |
-
"import warnings\n",
|
16 |
-
"warnings.filterwarnings('ignore')"
|
17 |
-
]
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"cell_type": "code",
|
21 |
-
"execution_count": 62,
|
22 |
-
"metadata": {},
|
23 |
-
"outputs": [
|
24 |
-
{
|
25 |
-
"name": "stdout",
|
26 |
-
"output_type": "stream",
|
27 |
-
"text": [
|
28 |
-
"<PyPDF2._reader.PdfReader object at 0x00000154154007D0>\n"
|
29 |
-
]
|
30 |
-
}
|
31 |
-
],
|
32 |
-
"source": [
|
33 |
-
"pdf = \"/content/resume.pdf\"\n",
|
34 |
-
"pdf_reader = PdfReader(pdf)\n",
|
35 |
-
"print(pdf_reader)"
|
36 |
-
]
|
37 |
-
},
|
38 |
-
{
|
39 |
-
"cell_type": "code",
|
40 |
-
"execution_count": null,
|
41 |
-
"metadata": {},
|
42 |
-
"outputs": [],
|
43 |
-
"source": [
|
44 |
-
"# extrat text from each page separately\n",
|
45 |
-
"text = \"\"\n",
|
46 |
-
"for page in pdf_reader.pages:\n",
|
47 |
-
" text += page.extract_text()\n",
|
48 |
-
"\n",
|
49 |
-
"print(text)"
|
50 |
-
]
|
51 |
-
},
|
52 |
-
{
|
53 |
-
"cell_type": "code",
|
54 |
-
"execution_count": 64,
|
55 |
-
"metadata": {},
|
56 |
-
"outputs": [
|
57 |
-
{
|
58 |
-
"data": {
|
59 |
-
"text/plain": [
|
60 |
-
"['GOPINATH ASOKAN \\nData Science Enthusiast \\nPassionate data science enthusiast with a strong foundation in diverse industries. Equipped with 5+ years\\nof industry experience, highly skilled in problem-solving, and project management. Eager to seamlessly\\nmerge analytical skills with artistic expertise for impactful insights and innovation. Excited to apply data-\\ndriven strategies to challenges, contribute proactively and effectively to the field, and drive innovation. \\[email protected] \\nlinkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal',\n",
|
61 |
-
" \"linkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal \\nProficiently executed image editing tasks for bigbasket's\\nproduct images, encompassing renaming, retouching, \\ncolor\\ncorrection, content cropping, and photo manipulation. \\nExpertly designed captivating banners and creatives for\\nadvertisements, skillfully integrating combo packs, multi-\\npacks, and hero images into Bigbasket's product pages. \\nContributed to taxonomy by mapping tax codes, manually\\nidentified competitor products, and verified AI-generated\\noutputs for accuracy, assisting in AI improvement efforts.\",\n",
|
62 |
-
" 'Contributed to taxonomy by mapping tax codes, manually\\nidentified competitor products, and verified AI-generated\\noutputs for accuracy, assisting in AI improvement efforts. \\nAssociate Engineer - Quality \\nRudra Blades and Edges Pvt Ltd \\n07/2018 - 12/2018\\n, \\n \\nChennai \\nPerformed continuous and comprehensive material analysis\\nto ensure structural integrity and precise alignment with\\ncustomer specifications as maintaining quality standards. \\nConsistently maintained high quality standards at critical\\nwelding, grinding, and precision parallelism stations, by\\nensuring unwavering quality in the production process. \\nProficiently managed and coordinated material dispatch',\n",
|
63 |
-
" 'welding, grinding, and precision parallelism stations, by\\nensuring unwavering quality in the production process. \\nProficiently managed and coordinated material dispatch\\nwhile meeting both regular order requirements and the\\npriority to ensure prompt and reliable customer service. \\nGraduate Engineer Trainee - Quality \\nLear Automotive India Pvt Ltd \\n07/2016 - 07/2017\\n, \\n \\nChennai \\nEfficiently managed productive customer meetings while\\nskillfully addressing challenging issues through \\ndetailed\\ncomprehensive Minutes of Meeting documentation. \\nMaintained stock alignment, meticulously validating the\\nperpetual and wall-to-wall inventory in physical and QAD',\n",
|
64 |
-
" 'detailed\\ncomprehensive Minutes of Meeting documentation. \\nMaintained stock alignment, meticulously validating the\\nperpetual and wall-to-wall inventory in physical and QAD\\nsoftware systems to ensure inventory precision & accuracy. \\nImplemented Q-Point, ICA, & PCA for quality enhancement\\nand has managed up-to-date Quality Notice documentation\\nwith 8D reports in the SQTS system for the issue resolution. \\nEDUCATION \\nMaster Data Science \\nGUVI Geeks Network Pvt Ltd \\n2023\\n, \\n \\nChennai \\nB.E. in Mechanical Engineering \\nKnowledge Institute of Technology \\n2012 - 2016\\n, \\n \\nSalem \\nSKILLS \\nPython \\nPostgreSQL \\nMongoDB \\nTableau \\nPowerBI \\nMachine Learning \\nDeep Learning \\nNLP \\nLLM \\nOpenAI \\nSelenium \\nAirflow',\n",
|
65 |
-
" 'Knowledge Institute of Technology \\n2012 - 2016\\n, \\n \\nSalem \\nSKILLS \\nPython \\nPostgreSQL \\nMongoDB \\nTableau \\nPowerBI \\nMachine Learning \\nDeep Learning \\nNLP \\nLLM \\nOpenAI \\nSelenium \\nAirflow \\nHadoop \\nPySpark \\nOCR \\nNumpy \\nPandas \\nStreamlit \\nPlotly \\nMatplotlib \\nSeaborn \\nCERTIFICATE \\nMicrosoft AI-900 Azure AI Fundamentals\\n (2023)\\n \\nPROJECTS \\nAI Resume Analyzer and LinkedIn Scraper with Selenium \\nBuilt an \\nInnovative \\nAI-driven Streamlit app with LLM, OpenAI for\\nprecise resume analysis and suggestions. Integrated Selenium for\\ndynamic LinkedIn data extraction, enhancing career insights. \\nAI excels in resume analysis - summarizing, strengths, weaknesses,',\n",
|
66 |
-
" 'precise resume analysis and suggestions. Integrated Selenium for\\ndynamic LinkedIn data extraction, enhancing career insights. \\nAI excels in resume analysis - summarizing, strengths, weaknesses,\\nand suggesting job titles. Leveraging Selenium for LinkedIn data, it\\nstreamlines job searches for comprehensive career insights. \\nTools: Python, LLM, OpenAI, Selenium, Streamlit, Numpy, Pandas.\\n \\ngithub.com/gopiashokan/AI-Resume-Analyzer-LinkedIn-Scraper.git\\n \\nRetail Sales Forecast \\nImplemented ML for precise retail sales predictions, emphasizing\\npreprocessing and algorithm selection. Streamlined Streamlit app\\nintegrates EDA, \\noptimizing decision-making in dynamic retail.',\n",
|
67 |
-
" 'Implemented ML for precise retail sales predictions, emphasizing\\npreprocessing and algorithm selection. Streamlined Streamlit app\\nintegrates EDA, \\noptimizing decision-making in dynamic retail. \\nRevolutionized retail decisions with advanced ML, using a streamlit\\napplication integrating EDA for precise sales forecasts, \\nfeature\\ncomparison & actionable insights by identifying trends & patterns. \\nTools: Python, \\nsklearn, PostgreSQL, Streamlit, Numpy, Pandas, Plotly,\\nMatplotlib, Seaborn.\\n \\nhttps://github.com/gopiashokan/Retail-Sales-Forecast.git\\n \\nIndustrial Copper Modeling \\nLeveraged advanced ML regression models for precise pricing and\\nclassification, enhancing targeted customer engagement by',\n",
|
68 |
-
" 'Industrial Copper Modeling \\nLeveraged advanced ML regression models for precise pricing and\\nclassification, enhancing targeted customer engagement by\\npredicting potential customers in the copper industry landscape. \\nExpert in data preprocessing, feature engineering, cross-validation,\\nhyperparameter tuning, and Streamlit app development, \\nskillfully\\napplying the skills to solve real-world manufacturing challenges. \\nTools: Python, sklearn, Streamlit, Matplotlib,Seaborn, Numpy,Pandas.\\n \\ngithub.com/gopiashokan/Industrial-Copper-Modeling.git\\n \\nAirbnb Analysis \\nLeverage Streamlit for dynamic exploratory data analysis (EDA)\\nwith interactive charts. Extend insights through a comprehensive',\n",
|
69 |
-
" 'Airbnb Analysis \\nLeverage Streamlit for dynamic exploratory data analysis (EDA)\\nwith interactive charts. Extend insights through a comprehensive\\nTableau dashboard, uncovering trends and patterns in the dataset. \\nAnalyzed pricing dynamics & availability patterns in the Hospitality\\nsector, enabling informed decision-making and empowering\\nstakeholders to make choices based on insights and visualizations. \\nTools: Python, MongoDB, PostgreSQL, Tableau, Streamlit, Plotly,\\nPandas.\\n \\ngithub.com/gopiashokan/Airbnb-Analysis.git']"
|
70 |
-
]
|
71 |
-
},
|
72 |
-
"execution_count": 64,
|
73 |
-
"metadata": {},
|
74 |
-
"output_type": "execute_result"
|
75 |
-
}
|
76 |
-
],
|
77 |
-
"source": [
|
78 |
-
"# Split the long text into small chunks\n",
|
79 |
-
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,\n",
|
80 |
-
" chunk_overlap=200,\n",
|
81 |
-
" length_function=len)\n",
|
82 |
-
"\n",
|
83 |
-
"chunks = text_splitter.split_text(text=text)\n",
|
84 |
-
"chunks"
|
85 |
-
]
|
86 |
-
},
|
87 |
-
{
|
88 |
-
"cell_type": "code",
|
89 |
-
"execution_count": 65,
|
90 |
-
"metadata": {},
|
91 |
-
"outputs": [
|
92 |
-
{
|
93 |
-
"data": {
|
94 |
-
"text/plain": [
|
95 |
-
"'GOPINATH ASOKAN \\nData Science Enthusiast \\nPassionate data science enthusiast with a strong foundation in diverse industries. Equipped with 5+ years\\nof industry experience, highly skilled in problem-solving, and project management. Eager to seamlessly\\nmerge analytical skills with artistic expertise for impactful insights and innovation. Excited to apply data-\\ndriven strategies to challenges, contribute proactively and effectively to the field, and drive innovation. \\[email protected] \\nlinkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal'"
|
96 |
-
]
|
97 |
-
},
|
98 |
-
"execution_count": 65,
|
99 |
-
"metadata": {},
|
100 |
-
"output_type": "execute_result"
|
101 |
-
}
|
102 |
-
],
|
103 |
-
"source": [
|
104 |
-
"chunks[0]"
|
105 |
-
]
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"cell_type": "code",
|
109 |
-
"execution_count": 66,
|
110 |
-
"metadata": {},
|
111 |
-
"outputs": [
|
112 |
-
{
|
113 |
-
"data": {
|
114 |
-
"text/plain": [
|
115 |
-
"\"linkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal \\nProficiently executed image editing tasks for bigbasket's\\nproduct images, encompassing renaming, retouching, \\ncolor\\ncorrection, content cropping, and photo manipulation. \\nExpertly designed captivating banners and creatives for\\nadvertisements, skillfully integrating combo packs, multi-\\npacks, and hero images into Bigbasket's product pages. \\nContributed to taxonomy by mapping tax codes, manually\\nidentified competitor products, and verified AI-generated\\noutputs for accuracy, assisting in AI improvement efforts.\""
|
116 |
-
]
|
117 |
-
},
|
118 |
-
"execution_count": 66,
|
119 |
-
"metadata": {},
|
120 |
-
"output_type": "execute_result"
|
121 |
-
}
|
122 |
-
],
|
123 |
-
"source": [
|
124 |
-
"chunks[1]"
|
125 |
-
]
|
126 |
-
},
|
127 |
-
{
|
128 |
-
"cell_type": "markdown",
|
129 |
-
"metadata": {},
|
130 |
-
"source": [
|
131 |
-
"\"linkedin.com/in/gopiashokan \\ngithub.com/gopiashokan \\nWORK EXPERIENCE \\nSenior Process Executive - Operations \\nMahendra Next Wealth IT India Pvt Ltd \\n05/2019 - 12/2022\\n, \\n \\nNamakkal\"\n",
|
132 |
-
"\n",
|
133 |
-
"The above text is common(overlap) for both chunks[0] and chunks[1].\n",
|
134 |
-
"(chunk_overlap=200 - maximum length, it means length is not exceed 200)"
|
135 |
-
]
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"cell_type": "code",
|
139 |
-
"execution_count": 7,
|
140 |
-
"metadata": {},
|
141 |
-
"outputs": [],
|
142 |
-
"source": [
|
143 |
-
"openai_api_key = input('Enter you OpenAI API Key: ')"
|
144 |
-
]
|
145 |
-
},
|
146 |
-
{
|
147 |
-
"cell_type": "code",
|
148 |
-
"execution_count": 52,
|
149 |
-
"metadata": {},
|
150 |
-
"outputs": [],
|
151 |
-
"source": [
|
152 |
-
"def openai(openai_api_key, chunks, analyze):\n",
|
153 |
-
"\n",
|
154 |
-
" # Using OpenAI service for embedding\n",
|
155 |
-
" embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)\n",
|
156 |
-
"\n",
|
157 |
-
" # Facebook AI Similarity Serach library help us to convert text data to numerical vector\n",
|
158 |
-
" vectorstores = FAISS.from_texts(chunks, embedding=embeddings)\n",
|
159 |
-
"\n",
|
160 |
-
" # compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.\n",
|
161 |
-
" docs = vectorstores.similarity_search(query=analyze, k=3)\n",
|
162 |
-
"\n",
|
163 |
-
" # creates an OpenAI object, using the ChatGPT 3.5 Turbo model\n",
|
164 |
-
" llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)\n",
|
165 |
-
"\n",
|
166 |
-
" # question-answering (QA) pipeline, making use of the load_qa_chain function\n",
|
167 |
-
" chain = load_qa_chain(llm=llm, chain_type='stuff')\n",
|
168 |
-
"\n",
|
169 |
-
" response = chain.run(input_documents=docs, question=analyze)\n",
|
170 |
-
" return response"
|
171 |
-
]
|
172 |
-
},
|
173 |
-
{
|
174 |
-
"cell_type": "code",
|
175 |
-
"execution_count": 55,
|
176 |
-
"metadata": {},
|
177 |
-
"outputs": [
|
178 |
-
{
|
179 |
-
"name": "stdout",
|
180 |
-
"output_type": "stream",
|
181 |
-
"text": [
|
182 |
-
"The resume belongs to Gopinath Asokan, who is a data science enthusiast with a strong foundation in diverse industries. He has 5+ years of industry experience and is highly skilled in problem-solving and project management. Gopinath is eager to merge his analytical skills with artistic expertise for impactful insights and innovation. He is excited to apply data-driven strategies to challenges and contribute proactively to the field. \n",
|
183 |
-
"\n",
|
184 |
-
"In terms of work experience, Gopinath has worked as a Senior Process Executive - Operations at Mahendra Next Wealth IT India Pvt Ltd from 05/2019 to 12/2022. He was responsible for precise resume analysis and suggestions, as well as integrating Selenium for dynamic LinkedIn data extraction. He also implemented machine learning for precise retail sales predictions, emphasizing preprocessing and algorithm selection.\n",
|
185 |
-
"\n",
|
186 |
-
"Gopinath has also worked as an Associate Engineer - Quality at Rudra Blades and Edges Pvt Ltd from 07/2018 to 12/2018. He performed continuous and comprehensive material analysis to ensure structural integrity and maintained high-quality standards at critical stations.\n",
|
187 |
-
"\n",
|
188 |
-
"Furthermore, Gopinath worked as a Graduate Engineer Trainee - Quality at Lear Automotive India Pvt Ltd from 07/2016 to 07/2017. He efficiently managed productive customer meetings and maintained stock alignment.\n",
|
189 |
-
"\n",
|
190 |
-
"In terms of education, Gopinath has a Master's degree in Data Science from GUVI Geeks Network Pvt Ltd (expected completion in 2023) and a Bachelor's degree in Mechanical Engineering from Knowledge Institute of Technology (2012-2016).\n",
|
191 |
-
"\n",
|
192 |
-
"Gopinath possesses a wide range of technical skills, including Python, PostgreSQL, MongoDB, Tableau, PowerBI, Machine Learning, Deep Learning, NLP, LLM, OpenAI, Selenium, Airflow, Hadoop, PySpark, OCR, Numpy, Pandas, Streamlit, Plotly, Matplotlib, and Seaborn.\n",
|
193 |
-
"\n",
|
194 |
-
"He has also completed the Microsoft AI-900 Azure AI Fundamentals certificate.\n",
|
195 |
-
"\n",
|
196 |
-
"Gopinath has worked on several projects, including an AI Resume Analyzer and LinkedIn Scraper with Selenium, a Retail Sales Forecast using ML, Industrial Copper Modeling using ML, and Airbnb Analysis using Streamlit and Tableau.\n",
|
197 |
-
"\n",
|
198 |
-
"In conclusion, Gopinath Asokan is a highly skilled data science enthusiast with a diverse industry background. He has experience in resume analysis, retail sales predictions, and quality assurance. He is proficient in various technical skills and has completed relevant certifications. Gopinath has also worked on several data science projects, showcasing his expertise in machine learning and analysis.\n"
|
199 |
-
]
|
200 |
-
}
|
201 |
-
],
|
202 |
-
"source": [
|
203 |
-
"def resume_summary(query_with_chunks):\n",
|
204 |
-
" query = f''' need to detailed summarization of below resume and finally conclude them\n",
|
205 |
-
"\n",
|
206 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
207 |
-
" {query_with_chunks}\n",
|
208 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
209 |
-
" '''\n",
|
210 |
-
" return query\n",
|
211 |
-
"\n",
|
212 |
-
"summary = resume_summary(query_with_chunks=chunks)\n",
|
213 |
-
"summary_result = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=summary)\n",
|
214 |
-
"print(summary_result)"
|
215 |
-
]
|
216 |
-
},
|
217 |
-
{
|
218 |
-
"cell_type": "code",
|
219 |
-
"execution_count": 56,
|
220 |
-
"metadata": {},
|
221 |
-
"outputs": [
|
222 |
-
{
|
223 |
-
"name": "stdout",
|
224 |
-
"output_type": "stream",
|
225 |
-
"text": [
|
226 |
-
"Strengths of Gopinath Asokan's resume:\n",
|
227 |
-
"\n",
|
228 |
-
"1. Strong foundation in diverse industries: Gopinath's resume highlights his experience and expertise in various industries, showcasing his adaptability and ability to work in different environments.\n",
|
229 |
-
"\n",
|
230 |
-
"2. 5+ years of industry experience: Gopinath's extensive experience in the industry demonstrates his ability to handle real-world challenges and shows his level of expertise in the field.\n",
|
231 |
-
"\n",
|
232 |
-
"3. Strong problem-solving and project management skills: Gopinath's resume emphasizes his skills in problem-solving and project management, which are crucial in the field of data science. This indicates his ability to effectively handle complex problems and successfully manage projects.\n",
|
233 |
-
"\n",
|
234 |
-
"4. Analytical skills combined with artistic expertise: Gopinath's resume mentions his eagerness to merge his analytical skills with artistic expertise, indicating his ability to think creatively and produce impactful insights and innovations.\n",
|
235 |
-
"\n",
|
236 |
-
"5. Data-driven strategies: Gopinath's excitement to apply data-driven strategies to challenges shows his understanding of the importance of data analysis in decision-making and problem-solving.\n",
|
237 |
-
"\n",
|
238 |
-
"6. Proficient in technical skills: Gopinath possesses a wide range of technical skills, including Python, machine learning, NLP, Selenium, and more. This showcases his ability to utilize various tools and technologies to solve complex problems and deliver high-quality work.\n",
|
239 |
-
"\n",
|
240 |
-
"7. Relevant certifications: Gopinath's completion of the Microsoft AI-900 Azure AI Fundamentals certificate demonstrates his commitment to continuous learning and staying up-to-date with the latest technologies and advancements in the field of data science.\n",
|
241 |
-
"\n",
|
242 |
-
"8. Experienced in various projects: Gopinath's experience in projects such as AI Resume Analyzer and LinkedIn Scraper, Retail Sales Forecast using ML, and Industrial Copper Modeling using ML showcases his practical application of data science techniques and his ability to deliver successful projects.\n",
|
243 |
-
"\n",
|
244 |
-
"In conclusion, Gopinath Asokan's resume exhibits strengths in his industry experience, problem-solving skills, technical expertise, and project management abilities. His diverse background, eagerness to merge analytical and artistic skills, and relevant certifications make him a strong candidate in the field of data science.\n"
|
245 |
-
]
|
246 |
-
}
|
247 |
-
],
|
248 |
-
"source": [
|
249 |
-
"def resume_strength(query_with_chunks):\n",
|
250 |
-
" query = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them\n",
|
251 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
252 |
-
" {query_with_chunks}\n",
|
253 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
254 |
-
" '''\n",
|
255 |
-
" return query\n",
|
256 |
-
"\n",
|
257 |
-
"strength = resume_strength(query_with_chunks=summary_result)\n",
|
258 |
-
"strength_result = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=strength)\n",
|
259 |
-
"print(strength_result)"
|
260 |
-
]
|
261 |
-
},
|
262 |
-
{
|
263 |
-
"cell_type": "code",
|
264 |
-
"execution_count": 60,
|
265 |
-
"metadata": {},
|
266 |
-
"outputs": [
|
267 |
-
{
|
268 |
-
"name": "stdout",
|
269 |
-
"output_type": "stream",
|
270 |
-
"text": [
|
271 |
-
"Based on the provided resume details, Gopinath Asokan has a strong foundation in diverse industries and is highly skilled in problem-solving and project management. However, there are a few weaknesses in the resume that can be improved:\n",
|
272 |
-
"\n",
|
273 |
-
"1. Lack of a clear career objective: The resume does not mention a specific career objective or goal. It would be beneficial to include a clear and concise objective statement that highlights Gopinath's career aspirations and how his skills and experience align with those goals.\n",
|
274 |
-
"\n",
|
275 |
-
"2. Incomplete work experience details: While the resume mentions Gopinath's job titles and responsibilities, it does not provide specific accomplishments or achievements in each role. Adding quantifiable achievements or results-oriented statements would strengthen the resume and demonstrate Gopinath's impact in previous positions.\n",
|
276 |
-
"\n",
|
277 |
-
"3. Limited information on education: The resume briefly mentions Gopinath's educational background, but it lacks details on specific coursework or projects related to data science. Including relevant coursework, research projects, or any notable academic achievements would enhance the resume's credibility and showcase Gopinath's academic abilities.\n",
|
278 |
-
"\n",
|
279 |
-
"4. Lack of focus on key technical skills: Although the resume mentions a wide range of technical skills, it does not highlight which skills are most relevant to the data science field. It would be helpful to prioritize and emphasize the key technical skills that directly align with the desired job roles in data science.\n",
|
280 |
-
"\n",
|
281 |
-
"To improve the resume, consider the following suggestions:\n",
|
282 |
-
"\n",
|
283 |
-
"1. Start with a strong career objective statement that clearly communicates Gopinath's goals and how his skills and experience align with those goals.\n",
|
284 |
-
"\n",
|
285 |
-
"2. Include specific accomplishments and achievements in each work experience entry, highlighting the impact Gopinath made in previous roles. Use quantitative metrics whenever possible to showcase results.\n",
|
286 |
-
"\n",
|
287 |
-
"3. Provide more details on relevant coursework, research projects, or academic achievements related to data science during Gopinath's Master's degree program.\n",
|
288 |
-
"\n",
|
289 |
-
"4. Prioritize and highlight the key technical skills that directly align with data science roles. Consider creating a separate section dedicated to technical skills, showcasing proficiency and experience in those areas.\n",
|
290 |
-
"\n",
|
291 |
-
"5. Consider including any relevant certifications, online courses, or workshops related to data science or machine learning.\n",
|
292 |
-
"\n",
|
293 |
-
"By addressing these weaknesses and implementing these improvements, Gopinath Asokan can create a stronger and more impactful resume that highlights his skills, experience, and potential in the field of data science.\n"
|
294 |
-
]
|
295 |
-
}
|
296 |
-
],
|
297 |
-
"source": [
|
298 |
-
"def resume_weakness(query_with_chunks):\n",
|
299 |
-
" query = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume.\n",
|
300 |
-
"\n",
|
301 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
302 |
-
" {query_with_chunks}\n",
|
303 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
304 |
-
" '''\n",
|
305 |
-
" return query\n",
|
306 |
-
"\n",
|
307 |
-
"weakness = resume_weakness(query_with_chunks=summary_result)\n",
|
308 |
-
"result_weakness = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=weakness)\n",
|
309 |
-
"print(result_weakness)"
|
310 |
-
]
|
311 |
-
},
|
312 |
-
{
|
313 |
-
"cell_type": "code",
|
314 |
-
"execution_count": 61,
|
315 |
-
"metadata": {},
|
316 |
-
"outputs": [
|
317 |
-
{
|
318 |
-
"name": "stdout",
|
319 |
-
"output_type": "stream",
|
320 |
-
"text": [
|
321 |
-
"Based on the information provided, some potential job roles that Gopinath Asokan could apply to on LinkedIn include:\n",
|
322 |
-
"\n",
|
323 |
-
"1. Data Scientist\n",
|
324 |
-
"2. Data Analyst\n",
|
325 |
-
"3. Machine Learning Engineer\n",
|
326 |
-
"4. Business Analyst\n",
|
327 |
-
"5. Project Manager\n",
|
328 |
-
"6. Operations Analyst\n",
|
329 |
-
"7. Quality Assurance Engineer\n",
|
330 |
-
"8. Sales Analyst\n",
|
331 |
-
"9. AI Engineer\n",
|
332 |
-
"10. Retail Analyst\n",
|
333 |
-
"\n",
|
334 |
-
"These job roles align with Gopinath's skills and experience in data science, problem-solving, project management, resume analysis, retail sales forecasting, and quality assurance.\n"
|
335 |
-
]
|
336 |
-
}
|
337 |
-
],
|
338 |
-
"source": [
|
339 |
-
"def job_title_suggestion(query_with_chunks):\n",
|
340 |
-
"\n",
|
341 |
-
" query = f''' what are the job roles i apply to likedin based on below?\n",
|
342 |
-
" \n",
|
343 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
344 |
-
" {query_with_chunks}\n",
|
345 |
-
" \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n",
|
346 |
-
" '''\n",
|
347 |
-
" return query\n",
|
348 |
-
"\n",
|
349 |
-
"suggestion = job_title_suggestion(query_with_chunks=summary_result)\n",
|
350 |
-
"result_suggestion = openai(openai_api_key=openai_api_key, chunks=chunks, analyze=suggestion)\n",
|
351 |
-
"print(result_suggestion)"
|
352 |
-
]
|
353 |
-
},
|
354 |
-
{
|
355 |
-
"cell_type": "code",
|
356 |
-
"execution_count": null,
|
357 |
-
"metadata": {},
|
358 |
-
"outputs": [],
|
359 |
-
"source": []
|
360 |
-
}
|
361 |
-
],
|
362 |
-
"metadata": {
|
363 |
-
"kernelspec": {
|
364 |
-
"display_name": "Python 3",
|
365 |
-
"language": "python",
|
366 |
-
"name": "python3"
|
367 |
-
},
|
368 |
-
"language_info": {
|
369 |
-
"codemirror_mode": {
|
370 |
-
"name": "ipython",
|
371 |
-
"version": 3
|
372 |
-
},
|
373 |
-
"file_extension": ".py",
|
374 |
-
"mimetype": "text/x-python",
|
375 |
-
"name": "python",
|
376 |
-
"nbconvert_exporter": "python",
|
377 |
-
"pygments_lexer": "ipython3",
|
378 |
-
"version": "3.11.6"
|
379 |
-
},
|
380 |
-
"orig_nbformat": 4
|
381 |
-
},
|
382 |
-
"nbformat": 4,
|
383 |
-
"nbformat_minor": 2
|
384 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LinkedIn_scraper_with_Selenium.ipynb
CHANGED
@@ -18,17 +18,9 @@
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
-
"execution_count":
|
22 |
"metadata": {},
|
23 |
-
"outputs": [
|
24 |
-
{
|
25 |
-
"name": "stdout",
|
26 |
-
"output_type": "stream",
|
27 |
-
"text": [
|
28 |
-
"['data', 'scientist', 'artificial', 'intelligence', 'ai']\n"
|
29 |
-
]
|
30 |
-
}
|
31 |
-
],
|
32 |
"source": [
|
33 |
"user_input_job_title = input('Enter Job Titles (with comma separated):').split()\n",
|
34 |
"print(user_input_job_title)"
|
@@ -36,17 +28,9 @@
|
|
36 |
},
|
37 |
{
|
38 |
"cell_type": "code",
|
39 |
-
"execution_count":
|
40 |
"metadata": {},
|
41 |
-
"outputs": [
|
42 |
-
{
|
43 |
-
"name": "stdout",
|
44 |
-
"output_type": "stream",
|
45 |
-
"text": [
|
46 |
-
"data%2C%20scientist%2C%20artificial%2C%20intelligence%2C%20ai\n"
|
47 |
-
]
|
48 |
-
}
|
49 |
-
],
|
50 |
"source": [
|
51 |
"b = []\n",
|
52 |
"for i in user_input_job_title:\n",
|
@@ -958,177 +942,9 @@
|
|
958 |
},
|
959 |
{
|
960 |
"cell_type": "code",
|
961 |
-
"execution_count":
|
962 |
"metadata": {},
|
963 |
-
"outputs": [
|
964 |
-
{
|
965 |
-
"data": {
|
966 |
-
"text/html": [
|
967 |
-
"<div>\n",
|
968 |
-
"<style scoped>\n",
|
969 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
970 |
-
" vertical-align: middle;\n",
|
971 |
-
" }\n",
|
972 |
-
"\n",
|
973 |
-
" .dataframe tbody tr th {\n",
|
974 |
-
" vertical-align: top;\n",
|
975 |
-
" }\n",
|
976 |
-
"\n",
|
977 |
-
" .dataframe thead th {\n",
|
978 |
-
" text-align: right;\n",
|
979 |
-
" }\n",
|
980 |
-
"</style>\n",
|
981 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
982 |
-
" <thead>\n",
|
983 |
-
" <tr style=\"text-align: right;\">\n",
|
984 |
-
" <th></th>\n",
|
985 |
-
" <th>Company Name</th>\n",
|
986 |
-
" <th>Job Title</th>\n",
|
987 |
-
" <th>Location</th>\n",
|
988 |
-
" <th>Website URL</th>\n",
|
989 |
-
" <th>Job Description</th>\n",
|
990 |
-
" </tr>\n",
|
991 |
-
" </thead>\n",
|
992 |
-
" <tbody>\n",
|
993 |
-
" <tr>\n",
|
994 |
-
" <th>0</th>\n",
|
995 |
-
" <td>nasscom</td>\n",
|
996 |
-
" <td>Artificial Intelligence (AI)</td>\n",
|
997 |
-
" <td>Noida, Uttar Pradesh, India</td>\n",
|
998 |
-
" <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
|
999 |
-
" <td>Selected Intern's Day-to-day Responsibilities ...</td>\n",
|
1000 |
-
" </tr>\n",
|
1001 |
-
" <tr>\n",
|
1002 |
-
" <th>1</th>\n",
|
1003 |
-
" <td>Deloitte</td>\n",
|
1004 |
-
" <td>Data Scientist</td>\n",
|
1005 |
-
" <td>Gurugram, Haryana, India</td>\n",
|
1006 |
-
" <td>https://in.linkedin.com/jobs/view/data-scienti...</td>\n",
|
1007 |
-
" <td>What impact will you make?\\nEvery day, your wo...</td>\n",
|
1008 |
-
" </tr>\n",
|
1009 |
-
" <tr>\n",
|
1010 |
-
" <th>2</th>\n",
|
1011 |
-
" <td>L&T Technology Services</td>\n",
|
1012 |
-
" <td>Data Scientist</td>\n",
|
1013 |
-
" <td>Hyderabad, Telangana, India</td>\n",
|
1014 |
-
" <td>https://in.linkedin.com/jobs/view/data-scienti...</td>\n",
|
1015 |
-
" <td>About the Role\\nWe are looking for Data Scient...</td>\n",
|
1016 |
-
" </tr>\n",
|
1017 |
-
" <tr>\n",
|
1018 |
-
" <th>3</th>\n",
|
1019 |
-
" <td>Api Logistics</td>\n",
|
1020 |
-
" <td>Artificial Intelligence (AI)</td>\n",
|
1021 |
-
" <td>Gurgaon, Haryana, India</td>\n",
|
1022 |
-
" <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
|
1023 |
-
" <td>We will be building an AI bot, which will be a...</td>\n",
|
1024 |
-
" </tr>\n",
|
1025 |
-
" <tr>\n",
|
1026 |
-
" <th>4</th>\n",
|
1027 |
-
" <td>E2E Networks Limited</td>\n",
|
1028 |
-
" <td>Artificial Intelligence (AI)</td>\n",
|
1029 |
-
" <td>Delhi, India</td>\n",
|
1030 |
-
" <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
|
1031 |
-
" <td>As an AI intern at E2E Networks Limited, you w...</td>\n",
|
1032 |
-
" </tr>\n",
|
1033 |
-
" <tr>\n",
|
1034 |
-
" <th>5</th>\n",
|
1035 |
-
" <td>Factspan</td>\n",
|
1036 |
-
" <td>Data Scientist</td>\n",
|
1037 |
-
" <td>Bengaluru, Karnataka, India</td>\n",
|
1038 |
-
" <td>https://in.linkedin.com/jobs/view/data-scienti...</td>\n",
|
1039 |
-
" <td>Responsibilities\\nSelecting features, building...</td>\n",
|
1040 |
-
" </tr>\n",
|
1041 |
-
" <tr>\n",
|
1042 |
-
" <th>6</th>\n",
|
1043 |
-
" <td>MakeMyTrip</td>\n",
|
1044 |
-
" <td>Senior/Lead Data Scientist</td>\n",
|
1045 |
-
" <td>Bengaluru, Karnataka, India</td>\n",
|
1046 |
-
" <td>https://in.linkedin.com/jobs/view/senior-lead-...</td>\n",
|
1047 |
-
" <td>Responsibilities:\\nTrain and deploy best in cl...</td>\n",
|
1048 |
-
" </tr>\n",
|
1049 |
-
" <tr>\n",
|
1050 |
-
" <th>7</th>\n",
|
1051 |
-
" <td>Persistent Systems</td>\n",
|
1052 |
-
" <td>Senior Data Scientist</td>\n",
|
1053 |
-
" <td>Pune, Maharashtra, India</td>\n",
|
1054 |
-
" <td>https://in.linkedin.com/jobs/view/senior-data-...</td>\n",
|
1055 |
-
" <td>About Position\\n\\nWe are looking for a highly ...</td>\n",
|
1056 |
-
" </tr>\n",
|
1057 |
-
" <tr>\n",
|
1058 |
-
" <th>8</th>\n",
|
1059 |
-
" <td>CodeRoofs IT Solutions</td>\n",
|
1060 |
-
" <td>Generative Artificial Intelligence (AI)</td>\n",
|
1061 |
-
" <td>Sahibzada Ajit Singh Nagar, Punjab, India</td>\n",
|
1062 |
-
" <td>https://in.linkedin.com/jobs/view/generative-a...</td>\n",
|
1063 |
-
" <td>Selected Intern's Day-to-day Responsibilities ...</td>\n",
|
1064 |
-
" </tr>\n",
|
1065 |
-
" <tr>\n",
|
1066 |
-
" <th>9</th>\n",
|
1067 |
-
" <td>LENS Corporation</td>\n",
|
1068 |
-
" <td>Artificial Intelligence Researcher</td>\n",
|
1069 |
-
" <td>Gurugram, Haryana, India</td>\n",
|
1070 |
-
" <td>https://in.linkedin.com/jobs/view/artificial-i...</td>\n",
|
1071 |
-
" <td>Requirements:\\nExcellent knowledge of computer...</td>\n",
|
1072 |
-
" </tr>\n",
|
1073 |
-
" </tbody>\n",
|
1074 |
-
"</table>\n",
|
1075 |
-
"</div>"
|
1076 |
-
],
|
1077 |
-
"text/plain": [
|
1078 |
-
" Company Name Job Title \\\n",
|
1079 |
-
"0 nasscom Artificial Intelligence (AI) \n",
|
1080 |
-
"1 Deloitte Data Scientist \n",
|
1081 |
-
"2 L&T Technology Services Data Scientist \n",
|
1082 |
-
"3 Api Logistics Artificial Intelligence (AI) \n",
|
1083 |
-
"4 E2E Networks Limited Artificial Intelligence (AI) \n",
|
1084 |
-
"5 Factspan Data Scientist \n",
|
1085 |
-
"6 MakeMyTrip Senior/Lead Data Scientist \n",
|
1086 |
-
"7 Persistent Systems Senior Data Scientist \n",
|
1087 |
-
"8 CodeRoofs IT Solutions Generative Artificial Intelligence (AI) \n",
|
1088 |
-
"9 LENS Corporation Artificial Intelligence Researcher \n",
|
1089 |
-
"\n",
|
1090 |
-
" Location \\\n",
|
1091 |
-
"0 Noida, Uttar Pradesh, India \n",
|
1092 |
-
"1 Gurugram, Haryana, India \n",
|
1093 |
-
"2 Hyderabad, Telangana, India \n",
|
1094 |
-
"3 Gurgaon, Haryana, India \n",
|
1095 |
-
"4 Delhi, India \n",
|
1096 |
-
"5 Bengaluru, Karnataka, India \n",
|
1097 |
-
"6 Bengaluru, Karnataka, India \n",
|
1098 |
-
"7 Pune, Maharashtra, India \n",
|
1099 |
-
"8 Sahibzada Ajit Singh Nagar, Punjab, India \n",
|
1100 |
-
"9 Gurugram, Haryana, India \n",
|
1101 |
-
"\n",
|
1102 |
-
" Website URL \\\n",
|
1103 |
-
"0 https://in.linkedin.com/jobs/view/artificial-i... \n",
|
1104 |
-
"1 https://in.linkedin.com/jobs/view/data-scienti... \n",
|
1105 |
-
"2 https://in.linkedin.com/jobs/view/data-scienti... \n",
|
1106 |
-
"3 https://in.linkedin.com/jobs/view/artificial-i... \n",
|
1107 |
-
"4 https://in.linkedin.com/jobs/view/artificial-i... \n",
|
1108 |
-
"5 https://in.linkedin.com/jobs/view/data-scienti... \n",
|
1109 |
-
"6 https://in.linkedin.com/jobs/view/senior-lead-... \n",
|
1110 |
-
"7 https://in.linkedin.com/jobs/view/senior-data-... \n",
|
1111 |
-
"8 https://in.linkedin.com/jobs/view/generative-a... \n",
|
1112 |
-
"9 https://in.linkedin.com/jobs/view/artificial-i... \n",
|
1113 |
-
"\n",
|
1114 |
-
" Job Description \n",
|
1115 |
-
"0 Selected Intern's Day-to-day Responsibilities ... \n",
|
1116 |
-
"1 What impact will you make?\\nEvery day, your wo... \n",
|
1117 |
-
"2 About the Role\\nWe are looking for Data Scient... \n",
|
1118 |
-
"3 We will be building an AI bot, which will be a... \n",
|
1119 |
-
"4 As an AI intern at E2E Networks Limited, you w... \n",
|
1120 |
-
"5 Responsibilities\\nSelecting features, building... \n",
|
1121 |
-
"6 Responsibilities:\\nTrain and deploy best in cl... \n",
|
1122 |
-
"7 About Position\\n\\nWe are looking for a highly ... \n",
|
1123 |
-
"8 Selected Intern's Day-to-day Responsibilities ... \n",
|
1124 |
-
"9 Requirements:\\nExcellent knowledge of computer... "
|
1125 |
-
]
|
1126 |
-
},
|
1127 |
-
"execution_count": 134,
|
1128 |
-
"metadata": {},
|
1129 |
-
"output_type": "execute_result"
|
1130 |
-
}
|
1131 |
-
],
|
1132 |
"source": [
|
1133 |
"df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])\n",
|
1134 |
"df"
|
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
+
"execution_count": null,
|
22 |
"metadata": {},
|
23 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
"source": [
|
25 |
"user_input_job_title = input('Enter Job Titles (with comma separated):').split()\n",
|
26 |
"print(user_input_job_title)"
|
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
+
"execution_count": null,
|
32 |
"metadata": {},
|
33 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"source": [
|
35 |
"b = []\n",
|
36 |
"for i in user_input_job_title:\n",
|
|
|
942 |
},
|
943 |
{
|
944 |
"cell_type": "code",
|
945 |
+
"execution_count": null,
|
946 |
"metadata": {},
|
947 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
948 |
"source": [
|
949 |
"df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])\n",
|
950 |
"df"
|
README.md
CHANGED
@@ -9,14 +9,111 @@ app_file: "app.py"
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
|
|
12 |
|
|
|
|
|
13 |
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
<br />
|
22 |
|
@@ -38,7 +135,6 @@ Resume Analyzer AI" leverages the power of LLM and OpenAI as an advanced Streaml
|
|
38 |
- Pandas
|
39 |
- LangChain
|
40 |
- LLM
|
41 |
-
- OpenAI
|
42 |
- Selenium
|
43 |
- Streamlit
|
44 |
- Hugging Face
|
@@ -46,31 +142,16 @@ Resume Analyzer AI" leverages the power of LLM and OpenAI as an advanced Streaml
|
|
46 |
|
47 |
<br />
|
48 |
|
49 |
-
**Installation**
|
50 |
-
|
51 |
-
To run this project, you need to install the following packages:
|
52 |
-
|
53 |
-
```python
|
54 |
-
pip install numpy
|
55 |
-
pip install pandas
|
56 |
-
pip install streamlit
|
57 |
-
pip install streamlit_option_menu
|
58 |
-
pip install streamlit_extras
|
59 |
-
pip install PyPDF2
|
60 |
-
pip install langchain
|
61 |
-
pip install openai
|
62 |
-
pip install tiktoken
|
63 |
-
pip install faiss-cpu
|
64 |
-
pip install selenium
|
65 |
-
```
|
66 |
-
|
67 |
-
<br />
|
68 |
-
|
69 |
**Usage**
|
70 |
|
71 |
To use this project, follow these steps:
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
74 |
2. Install the required packages: ```pip install -r requirements.txt```
|
75 |
3. Run the Streamlit app: ```streamlit run app.py```
|
76 |
4. Access the app in your browser at ```http://localhost:8501```
|
@@ -80,20 +161,20 @@ To use this project, follow these steps:
|
|
80 |
**Features**
|
81 |
|
82 |
**Easy User Experience:**
|
83 |
-
- Resume Analyzer AI makes it easy for users. You can upload your resume
|
84 |
- It also uses the PyPDF2 library to quickly extract text from your uploaded resume, which is the first step in doing a thorough analysis.
|
85 |
|
86 |
**Smart Text Analysis with Langchain:**
|
87 |
- What makes it special is how it analyzes text. It uses a smart method called the Langchain library to break long sections of text from resumes into smaller chunks, making them more meaningful.
|
88 |
- This clever technique improves the accuracy of the resume analysis, and it gives users practical advice on how to enhance their job prospects.
|
89 |
|
90 |
-
**Enhanced
|
91 |
-
-
|
92 |
- It uses the FAISS(Facebook AI Similarity Search) library to convert both the text chunks and query text data into numerical vectors, simplifying the analysis process and enabling the retrieval of pertinent information.
|
93 |
|
94 |
**Intelligent Chunk Selection and LLM:**
|
95 |
- Utilizing similarity search, Resume Analyzer AI compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
|
96 |
-
-
|
97 |
|
98 |
**Robust Question-Answering Pipeline:**
|
99 |
- This integration establishes a robust question-answering (QA) pipeline, making use of the load_qa_chain function, which encompasses multiple components, including the language model.
|
@@ -105,20 +186,12 @@ To use this project, follow these steps:
|
|
105 |
- **Weakness:** AI conducts thorough analysis to pinpoint weaknesses and offers tailored solutions for transforming them into strengths, empowering job seekers.
|
106 |
- **Suggestion:** AI provides personalized job title recommendations that align closely with the user's qualifications and resume content, facilitating an optimized job search experience.
|
107 |
|
108 |
-
<br />
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
<br />
|
113 |
|
114 |
**Selenium-Powered LinkedIn Data Scraping:**
|
115 |
- Utilizing Selenium and a Webdriver automated test tool, this feature enables users to input job titles, automating the data scraping process from LinkedIn. The scraped data includes crucial details such as company names, job titles, locations, URLs, and comprehensive job descriptions.
|
116 |
- This streamlined process enables users to easily review scraped job details and apply for positions, simplifying their job search and application experience.
|
117 |
|
118 |
-
<br />
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
<br />
|
123 |
|
124 |
**Contributing**
|
@@ -137,7 +210,5 @@ This project is licensed under the MIT License. Please review the LICENSE file f
|
|
137 |
|
138 |
📧 Email: [email protected]
|
139 |
|
140 |
-
|
141 |
-
|
142 |
For any further questions or inquiries, feel free to reach out. We are happy to assist you with any queries.
|
143 |
|
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
# Talent Track AI
|
13 |
|
14 |
+
## Overview
|
15 |
+
"Talent Track AI" leverages the power of LLM as an advanced Streamlit application, specializing in thorough resume analysis. It excels at summarizing the resume, evaluating strengths, identifying weaknesses, and offering personalized improvement suggestions, while also recommending the perfect job titles. Additionally, it seamlessly employs Selenium to extract vital LinkedIn data, encompassing company names, job titles, locations, job URLs, and detailed job descriptions. In essence, Resume Analyzer AI simplifies the job-seeking journey by equipping users with comprehensive insights to elevate their career opportunities.
|
16 |
|
17 |
+
## Features
|
18 |
+
- Resume Analysis
|
19 |
+
- LinkedIn Job Scraping
|
20 |
+
- Career Guidance
|
21 |
+
- Local LLM Processing (No API keys needed)
|
22 |
|
23 |
+
## System Requirements
|
24 |
+
- Python 3.8+
|
25 |
+
- 8GB+ RAM
|
26 |
+
- 10GB+ free disk space
|
27 |
+
- Multi-core CPU recommended
|
28 |
|
29 |
+
## Setup Instructions
|
30 |
|
31 |
+
1. Clone the repository:
|
32 |
+
```bash
|
33 |
+
git clone https://github.com/yourusername/TalentTrackAI.git
|
34 |
+
cd TalentTrackAI
|
35 |
+
```
|
36 |
+
|
37 |
+
2. Install required packages:
|
38 |
+
```bash
|
39 |
+
pip install -r requirements.txt
|
40 |
+
```
|
41 |
+
|
42 |
+
3. Download the model:
|
43 |
+
```bash
|
44 |
+
python setup_model.py
|
45 |
+
```
|
46 |
+
|
47 |
+
4. Run the application:
|
48 |
+
```bash
|
49 |
+
streamlit run app.py
|
50 |
+
```
|
51 |
+
|
52 |
+
## Key Components
|
53 |
+
|
54 |
+
### Local LLM Integration
|
55 |
+
- Uses Llama 2 7B Chat model (quantized version)
|
56 |
+
- Processes all requests locally
|
57 |
+
- No API keys or internet required for analysis
|
58 |
+
- Complete privacy and data security
|
59 |
+
|
60 |
+
### Resume Analysis Pipeline
|
61 |
+
- PDF text extraction
|
62 |
+
- Chunk-based processing
|
63 |
+
- Vector embeddings using HuggingFace
|
64 |
+
- FAISS for efficient similarity search
|
65 |
+
- Local LLM for analysis generation
|
66 |
+
|
67 |
+
### LinkedIn Integration
|
68 |
+
- Automated job search
|
69 |
+
- Real-time data extraction
|
70 |
+
- Customizable search parameters
|
71 |
+
- Detailed job information retrieval
|
72 |
+
|
73 |
+
## Usage Guide
|
74 |
+
|
75 |
+
1. **Resume Analysis**
|
76 |
+
- Upload your resume (PDF format)
|
77 |
+
- Get instant analysis including:
|
78 |
+
- Detailed summary
|
79 |
+
- Key strengths
|
80 |
+
- Areas for improvement
|
81 |
+
- Job title suggestions
|
82 |
+
|
83 |
+
2. **Job Search**
|
84 |
+
- Enter desired job title
|
85 |
+
- Specify location (optional)
|
86 |
+
- View matching LinkedIn listings
|
87 |
+
- Export results if needed
|
88 |
+
|
89 |
+
## Technical Details
|
90 |
+
|
91 |
+
### Model Specifications
|
92 |
+
- Model: Llama 2 7B Chat
|
93 |
+
- Format: GGUF (quantized)
|
94 |
+
- Context Window: 2048 tokens
|
95 |
+
- Memory Usage: ~4GB
|
96 |
+
- Processing: CPU-based
|
97 |
+
|
98 |
+
### Key Libraries
|
99 |
+
- Streamlit
|
100 |
+
- LangChain
|
101 |
+
- FAISS
|
102 |
+
- PyPDF2
|
103 |
+
- Selenium
|
104 |
+
- HuggingFace Transformers
|
105 |
+
|
106 |
+
## Performance Notes
|
107 |
+
- First run may take longer due to model loading
|
108 |
+
- Subsequent analyses are faster
|
109 |
+
- Processing time varies based on resume length
|
110 |
+
- RAM usage depends on concurrent operations
|
111 |
+
|
112 |
+
## Contributing
|
113 |
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
114 |
+
|
115 |
+
## License
|
116 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
117 |
|
118 |
<br />
|
119 |
|
|
|
135 |
- Pandas
|
136 |
- LangChain
|
137 |
- LLM
|
|
|
138 |
- Selenium
|
139 |
- Streamlit
|
140 |
- Hugging Face
|
|
|
142 |
|
143 |
<br />
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
**Usage**
|
146 |
|
147 |
To use this project, follow these steps:
|
148 |
|
149 |
+
1. Clone the repository:
|
150 |
+
```bash
|
151 |
+
git clone https://github.com/yourusername/TalentTrackAI.git
|
152 |
+
cd TalentTrackAI
|
153 |
+
```
|
154 |
+
|
155 |
2. Install the required packages: ```pip install -r requirements.txt```
|
156 |
3. Run the Streamlit app: ```streamlit run app.py```
|
157 |
4. Access the app in your browser at ```http://localhost:8501```
|
|
|
161 |
**Features**
|
162 |
|
163 |
**Easy User Experience:**
|
164 |
+
- Resume Analyzer AI makes it easy for users. You can upload your resume without any hassle. The application is designed to be user-friendly so that anyone can use its powerful resume analysis features.
|
165 |
- It also uses the PyPDF2 library to quickly extract text from your uploaded resume, which is the first step in doing a thorough analysis.
|
166 |
|
167 |
**Smart Text Analysis with Langchain:**
|
168 |
- What makes it special is how it analyzes text. It uses a smart method called the Langchain library to break long sections of text from resumes into smaller chunks, making them more meaningful.
|
169 |
- This clever technique improves the accuracy of the resume analysis, and it gives users practical advice on how to enhance their job prospects.
|
170 |
|
171 |
+
**Enhanced LLM Integration with FAISS:**
|
172 |
+
- The application uses local LLM processing for all analysis tasks, ensuring privacy and eliminating the need for API keys.
|
173 |
- It uses the FAISS(Facebook AI Similarity Search) library to convert both the text chunks and query text data into numerical vectors, simplifying the analysis process and enabling the retrieval of pertinent information.
|
174 |
|
175 |
**Intelligent Chunk Selection and LLM:**
|
176 |
- Utilizing similarity search, Resume Analyzer AI compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
|
177 |
+
- The application processes all requests using a local LLM model, ensuring complete privacy and data security.
|
178 |
|
179 |
**Robust Question-Answering Pipeline:**
|
180 |
- This integration establishes a robust question-answering (QA) pipeline, making use of the load_qa_chain function, which encompasses multiple components, including the language model.
|
|
|
186 |
- **Weakness:** AI conducts thorough analysis to pinpoint weaknesses and offers tailored solutions for transforming them into strengths, empowering job seekers.
|
187 |
- **Suggestion:** AI provides personalized job title recommendations that align closely with the user's qualifications and resume content, facilitating an optimized job search experience.
|
188 |
|
|
|
|
|
|
|
|
|
189 |
<br />
|
190 |
|
191 |
**Selenium-Powered LinkedIn Data Scraping:**
|
192 |
- Utilizing Selenium and a Webdriver automated test tool, this feature enables users to input job titles, automating the data scraping process from LinkedIn. The scraped data includes crucial details such as company names, job titles, locations, URLs, and comprehensive job descriptions.
|
193 |
- This streamlined process enables users to easily review scraped job details and apply for positions, simplifying their job search and application experience.
|
194 |
|
|
|
|
|
|
|
|
|
195 |
<br />
|
196 |
|
197 |
**Contributing**
|
|
|
210 |
|
211 |
📧 Email: [email protected]
|
212 |
|
|
|
|
|
213 |
For any further questions or inquiries, feel free to reach out. We are happy to assist you with any queries.
|
214 |
|
app.py
CHANGED
@@ -6,9 +6,9 @@ from streamlit_option_menu import option_menu
|
|
6 |
from streamlit_extras.add_vertical_space import add_vertical_space
|
7 |
from PyPDF2 import PdfReader
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
-
from
|
10 |
-
from
|
11 |
-
from
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
13 |
from langchain.memory import ConversationBufferMemory
|
14 |
from langchain.chains import ConversationChain
|
@@ -16,22 +16,37 @@ from selenium import webdriver
|
|
16 |
from selenium.webdriver.common.by import By
|
17 |
from selenium.webdriver.common.keys import Keys
|
18 |
from selenium.common.exceptions import NoSuchElementException
|
|
|
19 |
|
20 |
import warnings
|
21 |
warnings.filterwarnings('ignore')
|
22 |
|
23 |
-
|
24 |
-
|
25 |
try:
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
return None
|
34 |
-
|
35 |
|
36 |
def streamlit_config():
|
37 |
st.set_page_config(page_title='Talent Track By AI', layout="wide")
|
@@ -52,7 +67,7 @@ def process_resume(pdf):
|
|
52 |
with st.spinner('Processing...'):
|
53 |
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
|
54 |
summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
|
55 |
-
summary = resume_analyzer.
|
56 |
if summary:
|
57 |
st.session_state['resume_data'] = {
|
58 |
'pdf': pdf,
|
@@ -77,19 +92,41 @@ class resume_analyzer:
|
|
77 |
chunks = text_splitter.split_text(text=text)
|
78 |
return chunks
|
79 |
|
80 |
-
def
|
81 |
-
|
82 |
-
|
83 |
-
st.
|
84 |
-
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def summary_prompt(query_with_chunks):
|
95 |
query = f''' need to detailed summarization of below resume and finally conclude them
|
@@ -150,13 +187,13 @@ class resume_analyzer:
|
|
150 |
if pdf is not None:
|
151 |
if process_resume(pdf):
|
152 |
strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
153 |
-
strength = resume_analyzer.
|
154 |
if strength:
|
155 |
st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
|
156 |
st.write(strength)
|
157 |
else:
|
158 |
strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
159 |
-
strength = resume_analyzer.
|
160 |
if strength:
|
161 |
st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
|
162 |
st.write(strength)
|
@@ -168,6 +205,7 @@ class resume_analyzer:
|
|
168 |
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
169 |
'''
|
170 |
return query
|
|
|
171 |
def resume_weakness():
|
172 |
with st.form(key='Weakness'):
|
173 |
add_vertical_space(1)
|
@@ -187,13 +225,13 @@ class resume_analyzer:
|
|
187 |
if pdf is not None:
|
188 |
if process_resume(pdf):
|
189 |
weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
190 |
-
weakness = resume_analyzer.
|
191 |
if weakness:
|
192 |
st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
|
193 |
st.write(weakness)
|
194 |
else:
|
195 |
weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
196 |
-
weakness = resume_analyzer.
|
197 |
if weakness:
|
198 |
st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
|
199 |
st.write(weakness)
|
@@ -225,186 +263,327 @@ class resume_analyzer:
|
|
225 |
if pdf is not None:
|
226 |
if process_resume(pdf):
|
227 |
job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
228 |
-
job_title = resume_analyzer.
|
229 |
if job_title:
|
230 |
st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
|
231 |
st.write(job_title)
|
232 |
else:
|
233 |
job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
234 |
-
job_title = resume_analyzer.
|
235 |
if job_title:
|
236 |
st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
|
237 |
st.write(job_title)
|
238 |
|
239 |
class linkedin_scraper:
|
|
|
240 |
def webdriver_setup():
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
|
|
249 |
def get_userinput():
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
job_title_input = st.text_input(label='Job Title')
|
256 |
-
job_title_input = job_title_input.split(',')
|
257 |
-
with col2:
|
258 |
-
job_location = st.text_input(label='Job Location', value='India')
|
259 |
-
with col3:
|
260 |
-
job_count = st.number_input(label='Job Count', min_value=1, value=1, step=1)
|
261 |
-
add_vertical_space(1)
|
262 |
-
submit = st.form_submit_button(label='Submit')
|
263 |
-
add_vertical_space(1)
|
264 |
-
return job_title_input, job_location, job_count, submit
|
265 |
|
|
|
266 |
def build_url(job_title, job_location):
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
company_name = [i.text for i in company]
|
320 |
-
location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
|
321 |
-
company_location = [i.text for i in location]
|
322 |
-
title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
|
323 |
-
job_title = [i.text for i in title]
|
324 |
-
url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
|
325 |
-
website_url = [i.get_attribute('href') for i in url]
|
326 |
-
df = pd.DataFrame(company_name, columns=['Company Name'])
|
327 |
-
df['Job Title'] = pd.DataFrame(job_title)
|
328 |
-
df['Location'] = pd.DataFrame(company_location)
|
329 |
-
df['Website URL'] = pd.DataFrame(website_url)
|
330 |
-
df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scraper.job_title_filter(x, job_title_input))
|
331 |
-
df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
|
332 |
-
df = df.dropna()
|
333 |
-
df.reset_index(drop=True, inplace=True)
|
334 |
-
return df
|
335 |
-
|
336 |
-
def scrap_job_description(driver, df, job_count):
|
337 |
-
website_url = df['Website URL'].tolist()
|
338 |
-
job_description = []
|
339 |
-
description_count = 0
|
340 |
-
for i in range(0, len(website_url)):
|
341 |
-
try:
|
342 |
-
linkedin_scraper.open_link(driver, website_url[i])
|
343 |
-
driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
|
344 |
-
driver.implicitly_wait(5)
|
345 |
-
time.sleep(1)
|
346 |
-
description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
|
347 |
-
data = [i.text for i in description][0]
|
348 |
-
if len(data.strip()) > 0 and data not in job_description:
|
349 |
-
job_description.append(data)
|
350 |
-
description_count += 1
|
351 |
-
else:
|
352 |
-
job_description.append('Description Not Available')
|
353 |
-
except:
|
354 |
-
job_description.append('Description Not Available')
|
355 |
-
if description_count == job_count:
|
356 |
-
break
|
357 |
-
df = df.iloc[:len(job_description), :]
|
358 |
-
df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
|
359 |
-
df['Job Description'] = df['Job Description'].apply(lambda x: np.nan if x=='Description Not Available' else x)
|
360 |
-
df = df.dropna()
|
361 |
-
df.reset_index(drop=True, inplace=True)
|
362 |
-
return df
|
363 |
-
|
364 |
-
def display_data_userinterface(df_final):
|
365 |
-
add_vertical_space(1)
|
366 |
-
if len(df_final) > 0:
|
367 |
-
for i in range(0, len(df_final)):
|
368 |
-
st.markdown(f'<h3 style="color: orange;">Job Posting Details : {i+1}</h3>', unsafe_allow_html=True)
|
369 |
-
st.write(f"Company Name : {df_final.iloc[i,0]}")
|
370 |
-
st.write(f"Job Title : {df_final.iloc[i,1]}")
|
371 |
-
st.write(f"Location : {df_final.iloc[i,2]}")
|
372 |
-
st.write(f"Website URL : {df_final.iloc[i,3]}")
|
373 |
-
with st.expander(label='Job Desription'):
|
374 |
-
st.write(df_final.iloc[i, 4])
|
375 |
-
add_vertical_space(3)
|
376 |
-
else:
|
377 |
-
st.markdown(f'<h5 style="text-align: center;color: orange;">No Matching Jobs Found</h5>',
|
378 |
-
unsafe_allow_html=True)
|
379 |
|
380 |
-
def main():
|
381 |
-
driver = None
|
382 |
try:
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
except Exception as e:
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
|
409 |
class career_chatbot:
|
410 |
def initialize_session_state():
|
@@ -432,7 +611,7 @@ class career_chatbot:
|
|
432 |
try:
|
433 |
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
|
434 |
summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
|
435 |
-
summary = resume_analyzer.
|
436 |
if summary:
|
437 |
st.session_state.resume_data = summary
|
438 |
st.success("Resume processed successfully! The chatbot now has context from your resume.")
|
@@ -467,11 +646,6 @@ Your responses should be helpful, specific, and actionable. Use bullet points fo
|
|
467 |
return base_prompt
|
468 |
|
469 |
def process_user_input():
|
470 |
-
openai_api_key = get_openai_api_key()
|
471 |
-
if not openai_api_key:
|
472 |
-
st.error("OpenAI API key not found. Please check your secrets configuration.")
|
473 |
-
return
|
474 |
-
|
475 |
# Get user input and clear the input box
|
476 |
user_input = st.chat_input("Ask me about careers, job search, or resume advice...")
|
477 |
|
@@ -486,7 +660,9 @@ Your responses should be helpful, specific, and actionable. Use bullet points fo
|
|
486 |
# Generate response using the chatbot
|
487 |
try:
|
488 |
with st.spinner("Thinking..."):
|
489 |
-
llm =
|
|
|
|
|
490 |
|
491 |
# Update conversation memory
|
492 |
st.session_state.conversation_memory.chat_memory.add_user_message(user_input)
|
|
|
6 |
from streamlit_extras.add_vertical_space import add_vertical_space
|
7 |
from PyPDF2 import PdfReader
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
10 |
+
from langchain_community.vectorstores import FAISS
|
11 |
+
from langchain_community.llms import LlamaCpp
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
13 |
from langchain.memory import ConversationBufferMemory
|
14 |
from langchain.chains import ConversationChain
|
|
|
16 |
from selenium.webdriver.common.by import By
|
17 |
from selenium.webdriver.common.keys import Keys
|
18 |
from selenium.common.exceptions import NoSuchElementException
|
19 |
+
import os
|
20 |
|
21 |
import warnings
|
22 |
warnings.filterwarnings('ignore')
|
23 |
|
24 |
+
def initialize_llm():
|
25 |
+
"""Initialize the local LLM model with optimized parameters for better performance"""
|
26 |
try:
|
27 |
+
model_path = "models/llama-2-7b-chat.Q4_K_M.gguf"
|
28 |
+
if not os.path.exists(model_path):
|
29 |
+
st.error(f"Model file not found at {model_path}")
|
30 |
+
return None
|
31 |
+
|
32 |
+
st.info("Loading LLM model... This may take a few moments.")
|
33 |
+
llm = LlamaCpp(
|
34 |
+
model_path=model_path,
|
35 |
+
temperature=0.7,
|
36 |
+
max_tokens=2000,
|
37 |
+
top_p=0.9,
|
38 |
+
verbose=True,
|
39 |
+
n_ctx=2048,
|
40 |
+
n_threads=4,
|
41 |
+
n_batch=512,
|
42 |
+
n_gpu_layers=0,
|
43 |
+
f16_kv=True,
|
44 |
+
seed=42
|
45 |
+
)
|
46 |
+
return llm
|
47 |
+
except Exception as e:
|
48 |
+
st.error(f"Error initializing LLM: {str(e)}")
|
49 |
return None
|
|
|
50 |
|
51 |
def streamlit_config():
|
52 |
st.set_page_config(page_title='Talent Track By AI', layout="wide")
|
|
|
67 |
with st.spinner('Processing...'):
|
68 |
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
|
69 |
summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
|
70 |
+
summary = resume_analyzer.local_llm(chunks=pdf_chunks, analyze=summary_prompt)
|
71 |
if summary:
|
72 |
st.session_state['resume_data'] = {
|
73 |
'pdf': pdf,
|
|
|
92 |
chunks = text_splitter.split_text(text=text)
|
93 |
return chunks
|
94 |
|
95 |
+
def local_llm(chunks, analyze):
|
96 |
+
try:
|
97 |
+
# Initialize embeddings with error handling
|
98 |
+
st.info("Initializing embeddings...")
|
99 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
100 |
|
101 |
+
# Create vector store with error handling
|
102 |
+
st.info("Creating vector store...")
|
103 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
104 |
+
chunk_size=500,
|
105 |
+
chunk_overlap=50,
|
106 |
+
length_function=len
|
107 |
+
)
|
108 |
+
split_chunks = []
|
109 |
+
for chunk in chunks:
|
110 |
+
split_chunks.extend(text_splitter.split_text(chunk))
|
111 |
+
|
112 |
+
vectorstores = FAISS.from_texts(split_chunks, embedding=embeddings)
|
113 |
+
docs = vectorstores.similarity_search(query=analyze, k=3)
|
114 |
+
|
115 |
+
# Get LLM instance
|
116 |
+
st.info("Getting LLM instance...")
|
117 |
+
llm = initialize_llm()
|
118 |
+
if not llm:
|
119 |
+
st.error("Failed to initialize LLM")
|
120 |
+
return None
|
121 |
+
|
122 |
+
# Create and run the chain
|
123 |
+
st.info("Running analysis...")
|
124 |
+
chain = load_qa_chain(llm=llm, chain_type='stuff')
|
125 |
+
response = chain.run(input_documents=docs, question=analyze)
|
126 |
+
return response
|
127 |
+
except Exception as e:
|
128 |
+
st.error(f"Error in LLM processing: {str(e)}")
|
129 |
+
return None
|
130 |
|
131 |
def summary_prompt(query_with_chunks):
|
132 |
query = f''' need to detailed summarization of below resume and finally conclude them
|
|
|
187 |
if pdf is not None:
|
188 |
if process_resume(pdf):
|
189 |
strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
190 |
+
strength = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=strength_prompt)
|
191 |
if strength:
|
192 |
st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
|
193 |
st.write(strength)
|
194 |
else:
|
195 |
strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
196 |
+
strength = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=strength_prompt)
|
197 |
if strength:
|
198 |
st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
|
199 |
st.write(strength)
|
|
|
205 |
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
206 |
'''
|
207 |
return query
|
208 |
+
|
209 |
def resume_weakness():
|
210 |
with st.form(key='Weakness'):
|
211 |
add_vertical_space(1)
|
|
|
225 |
if pdf is not None:
|
226 |
if process_resume(pdf):
|
227 |
weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
228 |
+
weakness = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=weakness_prompt)
|
229 |
if weakness:
|
230 |
st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
|
231 |
st.write(weakness)
|
232 |
else:
|
233 |
weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
234 |
+
weakness = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=weakness_prompt)
|
235 |
if weakness:
|
236 |
st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
|
237 |
st.write(weakness)
|
|
|
263 |
if pdf is not None:
|
264 |
if process_resume(pdf):
|
265 |
job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
266 |
+
job_title = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=job_title_prompt)
|
267 |
if job_title:
|
268 |
st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
|
269 |
st.write(job_title)
|
270 |
else:
|
271 |
job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=st.session_state['resume_data']['summary'])
|
272 |
+
job_title = resume_analyzer.local_llm(chunks=st.session_state['resume_data']['chunks'], analyze=job_title_prompt)
|
273 |
if job_title:
|
274 |
st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
|
275 |
st.write(job_title)
|
276 |
|
277 |
class linkedin_scraper:
|
278 |
+
@staticmethod
|
279 |
def webdriver_setup():
|
280 |
+
"""Set up Chrome webdriver with enhanced anti-detection measures"""
|
281 |
+
try:
|
282 |
+
options = webdriver.ChromeOptions()
|
283 |
+
|
284 |
+
# Basic options
|
285 |
+
options.add_argument('--no-sandbox')
|
286 |
+
options.add_argument('--disable-dev-shm-usage')
|
287 |
+
options.add_argument('--disable-gpu')
|
288 |
+
options.add_argument('--disable-extensions')
|
289 |
+
options.add_argument('--disable-notifications')
|
290 |
+
|
291 |
+
# Window size and display
|
292 |
+
options.add_argument('--window-size=1920,1080')
|
293 |
+
options.add_argument('--start-maximized')
|
294 |
+
|
295 |
+
# Enhanced privacy and security settings
|
296 |
+
options.add_argument('--disable-blink-features=AutomationControlled')
|
297 |
+
options.add_argument('--disable-web-security')
|
298 |
+
options.add_argument('--allow-running-insecure-content')
|
299 |
+
options.add_argument('--ignore-certificate-errors')
|
300 |
+
options.add_argument('--ignore-ssl-errors')
|
301 |
+
|
302 |
+
# Random user agent
|
303 |
+
user_agents = [
|
304 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
305 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
306 |
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
307 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Edge/120.0.0.0'
|
308 |
+
]
|
309 |
+
user_agent = np.random.choice(user_agents)
|
310 |
+
options.add_argument(f'--user-agent={user_agent}')
|
311 |
+
|
312 |
+
# Experimental options
|
313 |
+
options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
|
314 |
+
options.add_experimental_option('useAutomationExtension', False)
|
315 |
+
|
316 |
+
# Create driver
|
317 |
+
driver = webdriver.Chrome(options=options)
|
318 |
+
|
319 |
+
# Additional JavaScript to avoid detection
|
320 |
+
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": user_agent})
|
321 |
+
|
322 |
+
# Modify navigator properties
|
323 |
+
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
324 |
+
driver.execute_script("Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']})")
|
325 |
+
driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]})")
|
326 |
+
|
327 |
+
# Set viewport and window size
|
328 |
+
driver.execute_cdp_cmd('Emulation.setDeviceMetricsOverride', {
|
329 |
+
'mobile': False,
|
330 |
+
'width': 1920,
|
331 |
+
'height': 1080,
|
332 |
+
'deviceScaleFactor': 1,
|
333 |
+
})
|
334 |
+
|
335 |
+
return driver
|
336 |
+
|
337 |
+
except Exception as e:
|
338 |
+
st.error(f"Failed to initialize Chrome driver: {str(e)}")
|
339 |
+
st.info("Please ensure Chrome browser is installed and updated to the latest version")
|
340 |
+
return None
|
341 |
|
342 |
+
@staticmethod
|
343 |
def get_userinput():
|
344 |
+
"""Get job search parameters from user"""
|
345 |
+
job_title = st.text_input('Enter Job Titles (comma separated):', 'Data Scientist')
|
346 |
+
job_location = st.text_input('Enter Job Location:', 'India')
|
347 |
+
job_count = st.number_input('Enter Number of Jobs to Scrape (max 100):', min_value=1, max_value=100, value=2)
|
348 |
+
return job_title.split(','), job_location, job_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
|
350 |
+
@staticmethod
|
351 |
def build_url(job_title, job_location):
|
352 |
+
"""Build LinkedIn search URL"""
|
353 |
+
formatted_title = '%20'.join(job_title[0].strip().split()) # Use first job title only
|
354 |
+
formatted_location = '%20'.join(job_location.split())
|
355 |
+
return f"https://www.linkedin.com/jobs/search?keywords={formatted_title}&location={formatted_location}"
|
356 |
+
|
357 |
+
@staticmethod
|
358 |
+
def scroll_page(driver, job_count):
|
359 |
+
"""Scroll page to load more jobs"""
|
360 |
+
try:
|
361 |
+
st.info("Scrolling page to load more jobs...")
|
362 |
+
# Calculate number of scrolls needed (25 jobs per scroll approximately)
|
363 |
+
scrolls = min(job_count // 25 + 1, 4)
|
364 |
+
|
365 |
+
for i in range(scrolls):
|
366 |
+
st.info(f"Scroll attempt {i+1}/{scrolls}")
|
367 |
+
# Scroll to bottom
|
368 |
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
369 |
+
time.sleep(4) # Wait for content to load
|
370 |
+
|
371 |
+
try:
|
372 |
+
# Try to click "Show more" button if present
|
373 |
+
show_more_buttons = driver.find_elements(by=By.CSS_SELECTOR, value=[
|
374 |
+
"button.infinite-scroller__show-more-button",
|
375 |
+
"button.see-more-jobs",
|
376 |
+
"button[data-tracking-control-name='infinite-scroller_show-more']"
|
377 |
+
])
|
378 |
+
|
379 |
+
for button in show_more_buttons:
|
380 |
+
if button.is_displayed():
|
381 |
+
driver.execute_script("arguments[0].click();", button)
|
382 |
+
time.sleep(3) # Wait for new content
|
383 |
+
break
|
384 |
+
|
385 |
+
except Exception as e:
|
386 |
+
st.warning(f"Could not find or click 'Show more' button: {str(e)}")
|
387 |
+
|
388 |
+
# Additional wait after last scroll
|
389 |
+
if i == scrolls - 1:
|
390 |
+
time.sleep(5)
|
391 |
+
|
392 |
+
except Exception as e:
|
393 |
+
st.warning(f"Error during page scrolling: {str(e)}")
|
394 |
+
|
395 |
+
@staticmethod
|
396 |
+
def scrape_jobs(driver, job_count):
|
397 |
+
"""Scrape job listings from LinkedIn with updated selectors"""
|
398 |
+
jobs_data = {
|
399 |
+
'company_name': [],
|
400 |
+
'job_title': [],
|
401 |
+
'location': [],
|
402 |
+
'job_url': []
|
403 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
|
|
|
|
405 |
try:
|
406 |
+
# Wait for job cards to load with explicit wait
|
407 |
+
st.info("Waiting for page to load...")
|
408 |
+
time.sleep(8) # Increased initial wait time
|
409 |
+
|
410 |
+
# Try multiple selectors for job cards
|
411 |
+
selectors = [
|
412 |
+
"div.job-card-container",
|
413 |
+
"li.jobs-search-results__list-item",
|
414 |
+
"div.base-card",
|
415 |
+
"div.job-search-card",
|
416 |
+
"li.jobs-search-results-list__list-item"
|
417 |
+
]
|
418 |
+
|
419 |
+
job_cards = []
|
420 |
+
for selector in selectors:
|
421 |
+
try:
|
422 |
+
job_cards = driver.find_elements(by=By.CSS_SELECTOR, value=selector)
|
423 |
+
if job_cards:
|
424 |
+
st.success(f"Found job cards using selector: {selector}")
|
425 |
+
break
|
426 |
+
except:
|
427 |
+
continue
|
428 |
+
|
429 |
+
if not job_cards:
|
430 |
+
st.error("Could not find any job listings. LinkedIn might have updated their page structure.")
|
431 |
+
return pd.DataFrame(jobs_data)
|
432 |
+
|
433 |
+
# Limit to requested number
|
434 |
+
job_cards = job_cards[:job_count]
|
435 |
+
|
436 |
+
st.info(f"Processing {len(job_cards)} job cards...")
|
437 |
+
|
438 |
+
for card in job_cards:
|
439 |
+
try:
|
440 |
+
# Company name selectors
|
441 |
+
company_selectors = [
|
442 |
+
".job-card-container__company-name",
|
443 |
+
".base-search-card__subtitle",
|
444 |
+
".company-name",
|
445 |
+
"span[data-tracking-control-name='public_jobs_company_name']",
|
446 |
+
".job-card-container__primary-description"
|
447 |
+
]
|
448 |
+
|
449 |
+
# Job title selectors
|
450 |
+
title_selectors = [
|
451 |
+
".job-card-container__title",
|
452 |
+
".base-search-card__title",
|
453 |
+
".job-card-list__title",
|
454 |
+
"h3.base-search-card__title",
|
455 |
+
".job-search-card__title"
|
456 |
+
]
|
457 |
+
|
458 |
+
# Location selectors
|
459 |
+
location_selectors = [
|
460 |
+
".job-card-container__metadata-item",
|
461 |
+
".base-search-card__metadata",
|
462 |
+
".job-search-card__location",
|
463 |
+
"span[data-tracking-control-name='public_jobs_job-location']",
|
464 |
+
".job-card-container__metadata-wrapper"
|
465 |
+
]
|
466 |
+
|
467 |
+
# Try to find company name
|
468 |
+
company = None
|
469 |
+
for selector in company_selectors:
|
470 |
+
try:
|
471 |
+
element = card.find_element(by=By.CSS_SELECTOR, value=selector)
|
472 |
+
company = element.text.strip()
|
473 |
+
if company:
|
474 |
+
break
|
475 |
+
except:
|
476 |
+
continue
|
477 |
+
|
478 |
+
# Try to find job title
|
479 |
+
title = None
|
480 |
+
for selector in title_selectors:
|
481 |
+
try:
|
482 |
+
element = card.find_element(by=By.CSS_SELECTOR, value=selector)
|
483 |
+
title = element.text.strip()
|
484 |
+
if title:
|
485 |
+
break
|
486 |
+
except:
|
487 |
+
continue
|
488 |
+
|
489 |
+
# Try to find location
|
490 |
+
location = None
|
491 |
+
for selector in location_selectors:
|
492 |
+
try:
|
493 |
+
element = card.find_element(by=By.CSS_SELECTOR, value=selector)
|
494 |
+
location = element.text.strip()
|
495 |
+
if location:
|
496 |
+
break
|
497 |
+
except:
|
498 |
+
continue
|
499 |
+
|
500 |
+
# Try to find URL
|
501 |
+
try:
|
502 |
+
url = card.find_element(by=By.CSS_SELECTOR, value="a").get_attribute("href")
|
503 |
+
except:
|
504 |
+
try:
|
505 |
+
url = card.find_element(by=By.CSS_SELECTOR, value="a.base-card__full-link").get_attribute("href")
|
506 |
+
except:
|
507 |
+
url = None
|
508 |
+
|
509 |
+
if all([company, title, location, url]):
|
510 |
+
jobs_data['company_name'].append(company)
|
511 |
+
jobs_data['job_title'].append(title)
|
512 |
+
jobs_data['location'].append(location)
|
513 |
+
jobs_data['job_url'].append(url)
|
514 |
+
st.success(f"Successfully scraped job: {title} at {company}")
|
515 |
+
|
516 |
+
except Exception as e:
|
517 |
+
st.warning(f"Failed to scrape a job card: {str(e)}")
|
518 |
+
continue
|
519 |
+
|
520 |
+
if not jobs_data['company_name']:
|
521 |
+
st.error("Could not extract any job information. LinkedIn might be blocking automated access.")
|
522 |
+
|
523 |
except Exception as e:
|
524 |
+
st.error(f"Error during job scraping: {str(e)}")
|
525 |
+
|
526 |
+
return pd.DataFrame(jobs_data)
|
527 |
+
|
528 |
+
@staticmethod
|
529 |
+
def display_results(df):
|
530 |
+
"""Display scraped job results"""
|
531 |
+
if df.empty:
|
532 |
+
st.error("No jobs were found. Please try again with different search parameters.")
|
533 |
+
return
|
534 |
+
|
535 |
+
st.markdown('### 📊 Scraped Job Listings')
|
536 |
+
|
537 |
+
# Display summary statistics
|
538 |
+
st.markdown(f"**Total Jobs Found:** {len(df)}")
|
539 |
+
st.markdown(f"**Unique Companies:** {df['company_name'].nunique()}")
|
540 |
+
st.markdown(f"**Locations Covered:** {df['location'].nunique()}")
|
541 |
+
|
542 |
+
# Display the dataframe
|
543 |
+
st.dataframe(df)
|
544 |
+
|
545 |
+
# Add download button
|
546 |
+
csv = df.to_csv(index=False).encode('utf-8')
|
547 |
+
st.download_button(
|
548 |
+
"Download Results as CSV",
|
549 |
+
csv,
|
550 |
+
"linkedin_jobs.csv",
|
551 |
+
"text/csv",
|
552 |
+
key='download-csv'
|
553 |
+
)
|
554 |
+
|
555 |
+
def main():
|
556 |
+
st.markdown('## 🔍 LinkedIn Job Search')
|
557 |
+
|
558 |
+
job_titles, job_location, job_count = linkedin_scraper.get_userinput()
|
559 |
+
|
560 |
+
if st.button('Start Scraping'):
|
561 |
+
with st.spinner('Scraping LinkedIn jobs...'):
|
562 |
+
try:
|
563 |
+
driver = linkedin_scraper.webdriver_setup()
|
564 |
+
if driver is None:
|
565 |
+
return
|
566 |
+
|
567 |
+
url = linkedin_scraper.build_url(job_titles, job_location)
|
568 |
+
st.info(f"Searching: {url}")
|
569 |
+
|
570 |
+
driver.get(url)
|
571 |
+
time.sleep(5) # Increased initial wait time
|
572 |
+
|
573 |
+
linkedin_scraper.scroll_page(driver, job_count)
|
574 |
+
df = linkedin_scraper.scrape_jobs(driver, job_count)
|
575 |
+
|
576 |
+
driver.quit()
|
577 |
+
|
578 |
+
if not df.empty:
|
579 |
+
linkedin_scraper.display_results(df)
|
580 |
+
else:
|
581 |
+
st.error('No jobs found matching your criteria. Try different search terms or location.')
|
582 |
+
|
583 |
+
except Exception as e:
|
584 |
+
st.error(f'An error occurred while scraping: {str(e)}')
|
585 |
+
if 'driver' in locals():
|
586 |
+
driver.quit()
|
587 |
|
588 |
class career_chatbot:
|
589 |
def initialize_session_state():
|
|
|
611 |
try:
|
612 |
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
|
613 |
summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
|
614 |
+
summary = resume_analyzer.local_llm(chunks=pdf_chunks, analyze=summary_prompt)
|
615 |
if summary:
|
616 |
st.session_state.resume_data = summary
|
617 |
st.success("Resume processed successfully! The chatbot now has context from your resume.")
|
|
|
646 |
return base_prompt
|
647 |
|
648 |
def process_user_input():
|
|
|
|
|
|
|
|
|
|
|
649 |
# Get user input and clear the input box
|
650 |
user_input = st.chat_input("Ask me about careers, job search, or resume advice...")
|
651 |
|
|
|
660 |
# Generate response using the chatbot
|
661 |
try:
|
662 |
with st.spinner("Thinking..."):
|
663 |
+
llm = initialize_llm()
|
664 |
+
if not llm:
|
665 |
+
raise Exception("Failed to initialize LLM")
|
666 |
|
667 |
# Update conversation memory
|
668 |
st.session_state.conversation_memory.chat_memory.add_user_message(user_input)
|
config.py
DELETED
File without changes
|
requirements.txt
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
streamlit
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
1 |
+
streamlit==1.32.0
|
2 |
+
streamlit-option-menu==0.3.12
|
3 |
+
streamlit-extras==0.3.1
|
4 |
+
PyPDF2==3.0.1
|
5 |
+
langchain==0.1.12
|
6 |
+
faiss-cpu==1.8.0
|
7 |
+
sentence-transformers==2.5.1
|
8 |
+
llama-cpp-python==0.2.56
|
9 |
+
selenium==4.18.1
|
10 |
+
pandas==2.2.1
|
11 |
+
numpy==1.26.4
|
setup_model.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from tqdm import tqdm
|
4 |
+
|
5 |
+
def download_file(url, filename):
|
6 |
+
response = requests.get(url, stream=True)
|
7 |
+
total_size = int(response.headers.get('content-length', 0))
|
8 |
+
|
9 |
+
with open(filename, 'wb') as file, tqdm(
|
10 |
+
desc=filename,
|
11 |
+
total=total_size,
|
12 |
+
unit='iB',
|
13 |
+
unit_scale=True,
|
14 |
+
unit_divisor=1024,
|
15 |
+
) as pbar:
|
16 |
+
for data in response.iter_content(chunk_size=1024):
|
17 |
+
size = file.write(data)
|
18 |
+
pbar.update(size)
|
19 |
+
|
20 |
+
def main():
|
21 |
+
# Create models directory if it doesn't exist
|
22 |
+
if not os.path.exists('models'):
|
23 |
+
os.makedirs('models')
|
24 |
+
|
25 |
+
# Model URL (using a smaller model for faster download)
|
26 |
+
model_url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
|
27 |
+
model_path = "models/llama-2-7b-chat.gguf"
|
28 |
+
|
29 |
+
print("Starting model download...")
|
30 |
+
print("This may take a while depending on your internet connection.")
|
31 |
+
print("The model is about 4GB in size.")
|
32 |
+
|
33 |
+
try:
|
34 |
+
download_file(model_url, model_path)
|
35 |
+
print("\nModel downloaded successfully!")
|
36 |
+
print(f"Model saved to: {model_path}")
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error downloading model: {str(e)}")
|
39 |
+
print("Please try downloading manually from: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF")
|
40 |
+
print("And place the model file in the 'models' directory.")
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
main()
|
temp_fix.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def local_llm(chunks, analyze):
|
2 |
+
try:
|
3 |
+
# Initialize embeddings with error handling
|
4 |
+
st.info("Initializing embeddings...")
|
5 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
6 |
+
|
7 |
+
# Create vector store with error handling
|
8 |
+
st.info("Creating vector store...")
|
9 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
10 |
+
chunk_size=500,
|
11 |
+
chunk_overlap=50,
|
12 |
+
length_function=len
|
13 |
+
)
|
14 |
+
split_chunks = []
|
15 |
+
for chunk in chunks:
|
16 |
+
split_chunks.extend(text_splitter.split_text(chunk))
|
17 |
+
|
18 |
+
vectorstores = FAISS.from_texts(split_chunks, embedding=embeddings)
|
19 |
+
docs = vectorstores.similarity_search(query=analyze, k=3)
|
20 |
+
|
21 |
+
# Get LLM instance
|
22 |
+
st.info("Getting LLM instance...")
|
23 |
+
llm = initialize_llm()
|
24 |
+
if not llm:
|
25 |
+
st.error("Failed to initialize LLM")
|
26 |
+
return None
|
27 |
+
|
28 |
+
# Create and run the chain
|
29 |
+
st.info("Running analysis...")
|
30 |
+
chain = load_qa_chain(llm=llm, chain_type='stuff')
|
31 |
+
response = chain.run(input_documents=docs, question=analyze)
|
32 |
+
return response
|
33 |
+
except Exception as e:
|
34 |
+
st.error(f"Error in LLM processing: {str(e)}")
|
35 |
+
return None
|