PluginLiveInterns commited on
Commit
7eb1624
·
1 Parent(s): b0074bf

Add application file

Browse files
Files changed (6) hide show
  1. .env +2 -0
  2. app.py +535 -0
  3. main.py +720 -0
  4. main2.py +499 -0
  5. rag.py +99 -0
  6. requirements.txt +12 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ GEMINI_API_KEY=AIzaSyAOK9vRTSRQzd22B2gmbiuIePbZTDyaGYs
2
+ RAPIDAPI_KEY=a9712241damsh9d248dc7bd8afabp171beajsn5d30f6e126b7
app.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import http.client
3
+ import json
4
+ import os
5
+ import PyPDF2
6
+ import io
7
+ import requests
8
+ import time
9
+ from google import genai
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables from .env file
13
+ load_dotenv()
14
+
15
+ # ----------------------------
16
+ # RAG Chatbot Implementation
17
+ # ----------------------------
18
+ from sentence_transformers import SentenceTransformer
19
+ import faiss
20
+ import numpy as np
21
+
22
+ class SimpleRAG:
23
+ def __init__(self, api_key):
24
+ # Initialize the embedding model and generative AI client
25
+ self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
26
+ self.client = genai.Client(api_key=api_key)
27
+ self.model = "gemini-2.0-flash"
28
+ self.index = None
29
+ self.chunks = []
30
+ self.is_initialized = False
31
+ self.processing_status = None
32
+
33
+ def chunk_text(self, text, chunk_size=700):
34
+ """Split text into smaller chunks."""
35
+ words = text.split()
36
+ return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
37
+
38
+ def process_search_data(self, search_data):
39
+ """
40
+ Process job search result data and index it.
41
+ For each job posting, extract only the 'job_title' and 'job_description' fields.
42
+ """
43
+ try:
44
+ self.processing_status = "Processing job search data..."
45
+ job_docs = []
46
+ for job in search_data:
47
+ title = job.get('job_title', '')
48
+ description = job.get('job_description', '')
49
+ # Create a document string with only job title and description.
50
+ doc = f"Job Title: {title}. Job Description: {description}."
51
+ job_docs.append(doc)
52
+ # Join each job document with a delimiter.
53
+ combined_text = " ||| ".join(job_docs)
54
+ if not combined_text.strip():
55
+ raise Exception("No text found in job search results.")
56
+ self.chunks = self.chunk_text(combined_text)
57
+ if not self.chunks:
58
+ raise Exception("No content chunks were generated from job data.")
59
+ embeddings = self.embedder.encode(self.chunks)
60
+ vector_dimension = embeddings.shape[1]
61
+ self.index = faiss.IndexFlatL2(vector_dimension)
62
+ self.index.add(np.array(embeddings).astype('float32'))
63
+ self.is_initialized = True
64
+ self.processing_status = f"RAG system initialized with {len(self.chunks)} chunks."
65
+ return {"status": "success", "message": self.processing_status}
66
+ except Exception as e:
67
+ self.processing_status = f"Error: {str(e)}"
68
+ self.is_initialized = False
69
+ return {"status": "error", "message": str(e)}
70
+
71
+ def get_status(self):
72
+ """Return current processing status."""
73
+ return {
74
+ "is_initialized": self.is_initialized,
75
+ "status": self.processing_status
76
+ }
77
+
78
+ def get_relevant_chunks(self, query, k=3):
79
+ """Retrieve top-k relevant text chunks for a query."""
80
+ query_vector = self.embedder.encode([query])
81
+ distances, chunk_indices = self.index.search(query_vector.astype('float32'), k)
82
+ return [self.chunks[i] for i in chunk_indices[0]]
83
+
84
+ def query(self, question):
85
+ """Query the RAG system with a question."""
86
+ if not self.is_initialized:
87
+ raise Exception("RAG system not initialized. Please process job data first.")
88
+ try:
89
+ context = self.get_relevant_chunks(question)
90
+ prompt = f"""
91
+ Based on the following context, provide a clear and concise answer.
92
+ If the context doesn't contain enough relevant information, say "I don't have enough information to answer that question."
93
+
94
+ Context:
95
+ {' '.join(context)}
96
+
97
+ Question: {question}
98
+ """
99
+ response = self.client.models.generate_content(model=self.model, contents=prompt)
100
+ return {
101
+ "status": "success",
102
+ "answer": response.text.strip(),
103
+ "context": context
104
+ }
105
+ except Exception as e:
106
+ return {
107
+ "status": "error",
108
+ "message": str(e)
109
+ }
110
+
111
+ # ----------------------------
112
+ # Main Job Search Engine Code
113
+ # ----------------------------
114
+ # Configure page
115
+ st.set_page_config(page_title="AI Job Finder", page_icon="💼", layout="wide")
116
+
117
+ # Styling
118
+ st.markdown("""
119
+ <style>
120
+ .main-header {
121
+ font-size: 2.5rem;
122
+ color: #4169E1;
123
+ }
124
+ .sub-header {
125
+ font-size: 1.5rem;
126
+ color: #6C757D;
127
+ }
128
+ .success-message {
129
+ background-color: #D4EDDA;
130
+ color: #155724;
131
+ padding: 10px;
132
+ border-radius: 5px;
133
+ margin-bottom: 20px;
134
+ }
135
+ .info-box {
136
+ background-color: #E7F3FE;
137
+ border-left: 6px solid #2196F3;
138
+ padding: 10px;
139
+ margin-bottom: 15px;
140
+ }
141
+ .search-options {
142
+ margin-top: 20px;
143
+ margin-bottom: 20px;
144
+ }
145
+ /* Chatbot styling */
146
+ .chat-box {
147
+ background-color: #F8F9FA;
148
+ border-radius: 10px;
149
+ padding: 20px;
150
+ margin-bottom: 20px;
151
+ }
152
+ .user-message {
153
+ color: #0D6EFD;
154
+ font-weight: bold;
155
+ }
156
+ .bot-message {
157
+ color: #198754;
158
+ font-weight: bold;
159
+ }
160
+ </style>
161
+ """, unsafe_allow_html=True)
162
+
163
+ # Header
164
+ st.markdown('<p class="main-header">AI-Powered Job Finder</p>', unsafe_allow_html=True)
165
+ st.markdown('<p class="sub-header">Upload your resume and find relevant jobs</p>', unsafe_allow_html=True)
166
+
167
+ # Initialize session state variables
168
+ if 'resume_text' not in st.session_state:
169
+ st.session_state.resume_text = ""
170
+ if 'resume_parsed' not in st.session_state:
171
+ st.session_state.resume_parsed = False
172
+ if 'parsed_data' not in st.session_state:
173
+ st.session_state.parsed_data = {}
174
+ if 'job_results' not in st.session_state:
175
+ st.session_state.job_results = []
176
+ if 'search_completed' not in st.session_state:
177
+ st.session_state.search_completed = False
178
+
179
+ # Define the JSON schema for resume parsing
180
+ RESUME_SCHEMA = {
181
+ "schema": {
182
+ "basic_info": {
183
+ "name": "string",
184
+ "email": "string",
185
+ "phone": "string",
186
+ "location": "string"
187
+ },
188
+ "professional_summary": "string",
189
+ "skills": ["string"],
190
+ "technical_skills": ["string"],
191
+ "soft_skills": ["string"],
192
+ "experience": [{
193
+ "job_title": "string",
194
+ "company": "string",
195
+ "duration": "string",
196
+ "description": "string"
197
+ }],
198
+ "education": [{
199
+ "degree": "string",
200
+ "institution": "string",
201
+ "year": "string"
202
+ }],
203
+ "certifications": ["string"],
204
+ "years_of_experience": "number"
205
+ }
206
+ }
207
+
208
+ # Function to extract text from PDF
209
+ def extract_text_from_pdf(pdf_file):
210
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
211
+ text = ""
212
+ for page_num in range(len(pdf_reader.pages)):
213
+ text += pdf_reader.pages[page_num].extract_text()
214
+ return text
215
+
216
+ # Function to parse resume with Gemini
217
+ def parse_resume_with_gemini(resume_text):
218
+ try:
219
+ client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
220
+ prompt = f"""
221
+ Parse the following resume text and extract information according to this exact JSON schema:
222
+
223
+ {json.dumps(RESUME_SCHEMA, indent=2)}
224
+
225
+ Resume text:
226
+ {resume_text}
227
+
228
+ Make sure to follow the schema exactly. If any information is not available, use empty strings or empty arrays as appropriate.
229
+ Return ONLY the JSON object with no additional text.
230
+ """
231
+ response = client.models.generate_content(model="gemini-2.0-flash", contents=prompt)
232
+ try:
233
+ parsed_data = json.loads(response.text)
234
+ return parsed_data
235
+ except json.JSONDecodeError:
236
+ import re
237
+ json_match = re.search(r'```json\n(.*?)\n```', response.text, re.DOTALL)
238
+ if json_match:
239
+ return json.loads(json_match.group(1))
240
+ else:
241
+ st.error("Could not parse the response as JSON")
242
+ return RESUME_SCHEMA["schema"]
243
+ except Exception as e:
244
+ st.error(f"Error parsing resume: {str(e)}")
245
+ return RESUME_SCHEMA["schema"]
246
+
247
+ # Function to search for jobs
248
+ def search_jobs(query, location="", page=1):
249
+ try:
250
+ conn = http.client.HTTPSConnection("jsearch.p.rapidapi.com")
251
+ search_query = query.replace(" ", "%20")
252
+ if location:
253
+ search_query += f"%20in%20{location.replace(' ', '%20')}"
254
+ headers = {
255
+ 'X-RapidAPI-Key': os.getenv('RAPIDAPI_KEY'),
256
+ 'X-RapidAPI-Host': "jsearch.p.rapidapi.com"
257
+ }
258
+ conn.request("GET", f"/search?query={search_query}&page={page}&num_pages=1", headers=headers)
259
+ res = conn.getresponse()
260
+ data = res.read()
261
+ return json.loads(data.decode("utf-8"))
262
+ except Exception as e:
263
+ st.error(f"Error searching for jobs: {str(e)}")
264
+ return {"data": []}
265
+
266
+ if 'filter_remote_only' not in st.session_state:
267
+ st.session_state.filter_remote_only = False
268
+ if 'filter_employment_types' not in st.session_state:
269
+ st.session_state.filter_employment_types = []
270
+ if 'filter_date_posted' not in st.session_state:
271
+ st.session_state.filter_date_posted = 0
272
+ if 'min_salary' not in st.session_state:
273
+ st.session_state.min_salary = 0
274
+ if 'max_salary' not in st.session_state:
275
+ st.session_state.max_salary = 1000000
276
+ if 'filter_company_types' not in st.session_state:
277
+ st.session_state.filter_company_types = []
278
+
279
+ # Function to apply filters to job results
280
+ def apply_filters(jobs):
281
+ filtered_jobs = []
282
+ for job in jobs:
283
+ if st.session_state.filter_remote_only and not job.get('job_is_remote', False):
284
+ continue
285
+ if st.session_state.filter_employment_types and job.get('job_employment_type') not in st.session_state.filter_employment_types:
286
+ continue
287
+ if st.session_state.filter_date_posted > 0:
288
+ current_time = int(time.time())
289
+ posted_time = job.get('job_posted_at_timestamp', 0)
290
+ days_ago = (current_time - posted_time) / (60 * 60 * 24)
291
+ if days_ago > st.session_state.filter_date_posted:
292
+ continue
293
+ if job.get('job_min_salary') is not None and job.get('job_min_salary') < st.session_state.min_salary:
294
+ continue
295
+ if job.get('job_max_salary') is not None and job.get('job_max_salary') > st.session_state.max_salary:
296
+ continue
297
+ if st.session_state.filter_company_types and job.get('employer_company_type') not in st.session_state.filter_company_types:
298
+ continue
299
+ filtered_jobs.append(job)
300
+ return filtered_jobs
301
+
302
+ # ----------------------------
303
+ # Step 1: Resume Upload Section
304
+ # ----------------------------
305
+ st.subheader("Step 1: Upload Your Resume First")
306
+ uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=['pdf'])
307
+ if uploaded_file is not None:
308
+ with st.spinner('Processing your resume...'):
309
+ resume_text = extract_text_from_pdf(uploaded_file)
310
+ st.session_state.resume_text = resume_text
311
+ parsed_data = parse_resume_with_gemini(resume_text)
312
+ st.session_state.parsed_data = parsed_data
313
+ st.session_state.resume_parsed = True
314
+ with st.expander("Resume Parsed Information", expanded=True):
315
+ col1, col2 = st.columns(2)
316
+ with col1:
317
+ st.markdown("### Basic Information")
318
+ basic_info = parsed_data.get("basic_info", {})
319
+ st.write(f"**Name:** {basic_info.get('name', 'Not found')}")
320
+ st.write(f"**Email:** {basic_info.get('email', 'Not found')}")
321
+ st.write(f"**Phone:** {basic_info.get('phone', 'Not found')}")
322
+ st.write(f"**Location:** {basic_info.get('location', 'Not found')}")
323
+ st.markdown("### Experience")
324
+ for exp in parsed_data.get("experience", []):
325
+ st.markdown(f"**{exp.get('job_title', 'Role')} at {exp.get('company', 'Company')}**")
326
+ st.write(f"*{exp.get('duration', 'Duration not specified')}*")
327
+ st.write(exp.get('description', 'No description available'))
328
+ st.write("---")
329
+ with col2:
330
+ st.markdown("### Skills")
331
+ st.write("**Technical Skills:**")
332
+ tech_skills = parsed_data.get("technical_skills", [])
333
+ st.write(", ".join(tech_skills) if tech_skills else "No technical skills found")
334
+ st.write("**Soft Skills:**")
335
+ soft_skills = parsed_data.get("soft_skills", [])
336
+ st.write(", ".join(soft_skills) if soft_skills else "No soft skills found")
337
+ st.write("**General Skills:**")
338
+ skills = parsed_data.get("skills", [])
339
+ st.write(", ".join(skills) if skills else "No general skills found")
340
+ st.markdown("### Education")
341
+ for edu in parsed_data.get("education", []):
342
+ st.write(f"**{edu.get('degree', 'Degree')}** - {edu.get('institution', 'Institution')}")
343
+ st.write(f"*{edu.get('year', 'Year not specified')}*")
344
+ st.write(f"**Years of Experience:** {parsed_data.get('years_of_experience', 'Not specified')}")
345
+
346
+ st.markdown("---")
347
+ # ----------------------------
348
+ # Step 2: Job Search Section
349
+ # ----------------------------
350
+ st.subheader("Step 2: Search for Jobs")
351
+ search_query = st.text_input("Enter your job search query (e.g., 'Python Developer')")
352
+ location = st.text_input("Location (e.g., 'New York', 'Remote')")
353
+ st.sidebar.markdown("### Filter Options")
354
+ st.sidebar.checkbox("Remote Only", key="filter_remote_only")
355
+ employment_types = ["FULLTIME", "PARTTIME", "CONTRACTOR", "INTERN"]
356
+ st.sidebar.multiselect("Employment Type", employment_types, default=None, key="filter_employment_types")
357
+ date_options = {"Any time": 0, "Past 24 hours": 1, "Past week": 7, "Past month": 30}
358
+ selected_date = st.sidebar.selectbox("Date Posted", options=list(date_options.keys()), index=0)
359
+ st.session_state.filter_date_posted = date_options[selected_date]
360
+ st.sidebar.markdown("### Salary Range")
361
+ col1, col2 = st.sidebar.columns(2)
362
+ with col1:
363
+ st.number_input("Min ($)", value=0, step=10000, key="min_salary")
364
+ with col2:
365
+ st.number_input("Max ($)", value=1000000, step=10000, key="max_salary")
366
+ company_types = ["Public", "Private", "Nonprofit", "Government", "Startup", "Other"]
367
+ st.sidebar.multiselect("Company Type", company_types, default=None, key="filter_company_types")
368
+
369
+ if st.button("Search Jobs"):
370
+ if search_query:
371
+ with st.spinner('Searching for jobs...'):
372
+ final_query = search_query
373
+ job_results = search_jobs(final_query, location)
374
+ st.session_state.job_results = job_results.get('data', [])
375
+ st.session_state.search_completed = True
376
+ else:
377
+ st.warning("Please enter a search query")
378
+
379
+ # Display Job Search Results
380
+ if st.session_state.search_completed:
381
+ st.markdown("---")
382
+ st.subheader("Job Search Results")
383
+ if st.session_state.job_results:
384
+ filtered_jobs = apply_filters(st.session_state.job_results)
385
+ if filtered_jobs:
386
+ st.success(f"Found {len(filtered_jobs)} jobs matching your criteria")
387
+ if st.session_state.resume_parsed:
388
+ tech_skills = set(st.session_state.parsed_data.get("technical_skills", []))
389
+ general_skills = set(st.session_state.parsed_data.get("skills", []))
390
+ soft_skills = set(st.session_state.parsed_data.get("soft_skills", []))
391
+ all_skills = tech_skills.union(general_skills).union(soft_skills)
392
+ for job in filtered_jobs:
393
+ if job.get('job_description'):
394
+ desc = job.get('job_description', '').lower()
395
+ matched_skills = [skill for skill in all_skills if skill.lower() in desc]
396
+ match_percentage = int((len(matched_skills) / max(1, len(all_skills))) * 100)
397
+ job['match_percentage'] = match_percentage
398
+ job['matched_skills'] = matched_skills
399
+ else:
400
+ job['match_percentage'] = 0
401
+ job['matched_skills'] = []
402
+ sort_by_match = st.checkbox("Sort jobs by skill match percentage", value=True)
403
+ if sort_by_match:
404
+ filtered_jobs = sorted(filtered_jobs, key=lambda x: x.get('match_percentage', 0), reverse=True)
405
+ for job_idx, job in enumerate(filtered_jobs):
406
+ if st.session_state.resume_parsed and 'match_percentage' in job:
407
+ job_title = f"{job_idx+1}. {job.get('job_title', 'Job Title Not Available')} - {job.get('employer_name', 'Company Not Available')} [Match: {job.get('match_percentage')}%]"
408
+ else:
409
+ job_title = f"{job_idx+1}. {job.get('job_title', 'Job Title Not Available')} - {job.get('employer_name', 'Company Not Available')}"
410
+ with st.expander(job_title):
411
+ cols = st.columns([2, 1])
412
+ with cols[0]:
413
+ st.write(f"**Company:** {job.get('employer_name', 'Not Available')}")
414
+ st.write(f"**Location:** {job.get('job_city', 'Not Available')}, {job.get('job_country', 'Not Available')}")
415
+ st.write(f"**Employment Type:** {job.get('job_employment_type', 'Not Available')}")
416
+ st.write(f"**Remote:** {'Yes' if job.get('job_is_remote') else 'No'}")
417
+ if job.get('job_posted_at_datetime_utc'):
418
+ st.write(f"**Posted:** {job.get('job_posted_at_datetime_utc', 'Not Available')}")
419
+ if job.get('job_min_salary') and job.get('job_max_salary'):
420
+ st.write(f"**Salary Range:** ${job.get('job_min_salary', 'Not Available')} - ${job.get('job_max_salary', 'Not Available')} {job.get('job_salary_currency', 'USD')}")
421
+ with cols[1]:
422
+ if st.session_state.resume_parsed:
423
+ match_percentage = job.get('match_percentage', 0)
424
+ matched_skills = job.get('matched_skills', [])
425
+ st.markdown("### Skills Match")
426
+ bar_color = "green" if match_percentage > 70 else "orange" if match_percentage > 40 else "red"
427
+ st.progress(match_percentage / 100)
428
+ st.markdown(f"<h4 style='color:{bar_color};margin-top:0'>{match_percentage}% Match</h4>", unsafe_allow_html=True)
429
+ if matched_skills:
430
+ st.markdown("**Matching Skills:**")
431
+ skill_cols = st.columns(2)
432
+ for skill_idx, skill in enumerate(matched_skills[:10]):
433
+ col_idx = skill_idx % 2
434
+ with skill_cols[col_idx]:
435
+ st.markdown(f"✅ {skill}")
436
+ if len(matched_skills) > 10:
437
+ st.markdown(f"*...and {len(matched_skills)-10} more*")
438
+ else:
439
+ st.write("⚠️ No direct skill matches found")
440
+ st.markdown("**Job Description:**")
441
+ full_desc = job.get('job_description', 'No description available')
442
+ if len(full_desc) > 1000:
443
+ st.markdown(full_desc[:1000] + "...")
444
+ if st.button(f"Show Full Description for Job {job_idx+1}", key=f"show_desc_{job_idx}"):
445
+ st.markdown(full_desc)
446
+ else:
447
+ st.markdown(full_desc)
448
+ st.markdown("**Apply Links:**")
449
+ apply_options = job.get('apply_options', [])
450
+ if apply_options:
451
+ for option in apply_options:
452
+ st.markdown(f"[Apply on {option.get('publisher', 'Job Board')}]({option.get('apply_link')})")
453
+ elif job.get('job_apply_link'):
454
+ st.markdown(f"[Apply for this job]({job.get('job_apply_link')})")
455
+ else:
456
+ st.info("No jobs match your filters. Try adjusting your filter criteria.")
457
+ else:
458
+ st.info("No jobs found matching your search criteria. Try adjusting your search terms or location.")
459
+
460
+ st.markdown("---")
461
+ st.markdown("### How to use this app")
462
+ st.markdown("""
463
+ 1. Upload your resume in PDF format to extract your skills and experience
464
+ 2. Enter your job search query and preferred location
465
+ 3. Review job listings and apply directly to positions you're interested in
466
+ """)
467
+
468
+ # Display app statistics
469
+ st.sidebar.markdown("### App Statistics")
470
+ if st.session_state.resume_parsed:
471
+ st.sidebar.success("✅ Resume Parsed")
472
+ skill_count = len(st.session_state.parsed_data.get("skills", [])) + len(st.session_state.parsed_data.get("technical_skills", []))
473
+ st.sidebar.metric("Skills Detected", skill_count)
474
+ else:
475
+ st.sidebar.warning("❌ No Resume Uploaded")
476
+ if st.session_state.search_completed:
477
+ st.sidebar.success("✅ Job Search Completed")
478
+ st.sidebar.metric("Jobs Found", len(st.session_state.job_results))
479
+ else:
480
+ st.sidebar.warning("❌ No Search Performed")
481
+
482
+ # ----------------------------
483
+ # Step 3: RAG Chatbot Interface
484
+ # ----------------------------
485
+ st.markdown("---")
486
+ st.subheader("Chat with Job Data (RAG Chatbot)")
487
+
488
+ # Initialize RAG session state variables
489
+ if 'rag_system' not in st.session_state:
490
+ API_KEY = 'AIzaSyAOK9vRTSRQzd22B2gmbiuIePbZTDyaGYs'
491
+ st.session_state.rag_system = SimpleRAG(api_key=API_KEY)
492
+ if 'rag_initialized' not in st.session_state:
493
+ st.session_state.rag_initialized = False
494
+ if 'rag_chat_history' not in st.session_state:
495
+ st.session_state.rag_chat_history = []
496
+
497
+ # Button to load job search data into the RAG system
498
+ if st.button("Load Job Data into Chatbot"):
499
+ if st.session_state.job_results:
500
+ with st.spinner("Processing job data for chatbot..."):
501
+ result = st.session_state.rag_system.process_search_data(st.session_state.job_results)
502
+ if result['status'] == 'success':
503
+ st.success(result['message'])
504
+ st.session_state.rag_initialized = True
505
+ else:
506
+ st.error(result['message'])
507
+ else:
508
+ st.warning("No job data available. Please perform a job search first.")
509
+
510
+ # Chat input form
511
+ with st.form("rag_chat_form", clear_on_submit=True):
512
+ user_question = st.text_input("Ask a question about the job data")
513
+ submit_chat = st.form_submit_button("Send")
514
+
515
+ if submit_chat and user_question:
516
+ if st.session_state.rag_initialized:
517
+ st.session_state.rag_chat_history.append({"user": user_question})
518
+ with st.spinner("Querying chatbot..."):
519
+ result = st.session_state.rag_system.query(user_question)
520
+ if result["status"] == "success":
521
+ bot_answer = result["answer"]
522
+ st.session_state.rag_chat_history.append({"bot": bot_answer})
523
+ else:
524
+ st.session_state.rag_chat_history.append({"bot": "Error: " + result.get("message", "Unknown error")})
525
+ else:
526
+ st.error("RAG system not initialized. Please load job data into the chatbot first.")
527
+
528
+ # Display chat history
529
+ st.markdown('<div class="chat-box">', unsafe_allow_html=True)
530
+ for msg in st.session_state.rag_chat_history:
531
+ if "user" in msg:
532
+ st.markdown(f"<p class='user-message'>User: {msg['user']}</p>", unsafe_allow_html=True)
533
+ elif "bot" in msg:
534
+ st.markdown(f"<p class='bot-message'>Bot: {msg['bot']}</p>", unsafe_allow_html=True)
535
+ st.markdown('</div>', unsafe_allow_html=True)
main.py ADDED
@@ -0,0 +1,720 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import http.client
3
+ import json
4
+ import os
5
+ import PyPDF2
6
+ import io
7
+ from google import genai
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ import time
11
+
12
+ # Load environment variables from .env file
13
+ load_dotenv()
14
+
15
+ # Configure page
16
+ st.set_page_config(page_title="AI Job Finder", page_icon="💼", layout="wide")
17
+
18
+ # Styling
19
+ st.markdown("""
20
+ <style>
21
+ /* Global typography */
22
+ body {
23
+ font-family: 'Inter', sans-serif;
24
+ }
25
+
26
+ /* Headers */
27
+ .main-header {
28
+ font-size: 2.8rem;
29
+ font-weight: 700;
30
+ color: #1E3A8A;
31
+ margin-bottom: 0.2rem;
32
+ background: linear-gradient(90deg, #1E3A8A, #3B82F6);
33
+ -webkit-background-clip: text;
34
+ -webkit-text-fill-color: transparent;
35
+ }
36
+
37
+ .sub-header {
38
+ font-size: 1.2rem;
39
+ color: #64748B;
40
+ margin-bottom: 2rem;
41
+ font-weight: 400;
42
+ }
43
+
44
+ /* Section headers */
45
+ .section-header {
46
+ font-size: 1.5rem;
47
+ font-weight: 600;
48
+ color: #1E3A8A;
49
+ margin-top: 1.5rem;
50
+ margin-bottom: 1rem;
51
+ padding-bottom: 0.5rem;
52
+ border-bottom: 2px solid #E2E8F0;
53
+ }
54
+
55
+ /* Cards and containers */
56
+ .card {
57
+ background-color: #FFFFFF;
58
+ border-radius: 8px;
59
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
60
+ padding: 1.5rem;
61
+ margin-bottom: 1.5rem;
62
+ border: 1px solid #E2E8F0;
63
+ }
64
+
65
+ .info-box {
66
+ background-color: #EFF6FF;
67
+ border-left: 6px solid #3B82F6;
68
+ padding: 1rem;
69
+ border-radius: 4px;
70
+ margin-bottom: 1.5rem;
71
+ }
72
+
73
+ /* Messages */
74
+ .success-message {
75
+ background-color: #ECFDF5;
76
+ color: #065F46;
77
+ padding: 1rem;
78
+ border-radius: 4px;
79
+ margin-bottom: 1.5rem;
80
+ display: flex;
81
+ align-items: center;
82
+ }
83
+
84
+ .success-message:before {
85
+ content: "✅";
86
+ margin-right: 0.75rem;
87
+ font-size: 1.2rem;
88
+ }
89
+
90
+ .warning-message {
91
+ background-color: #FFFBEB;
92
+ color: #92400E;
93
+ padding: 1rem;
94
+ border-radius: 4px;
95
+ margin-bottom: 1.5rem;
96
+ display: flex;
97
+ align-items: center;
98
+ }
99
+
100
+ .warning-message:before {
101
+ content: "⚠️";
102
+ margin-right: 0.75rem;
103
+ font-size: 1.2rem;
104
+ }
105
+
106
+ /* Buttons */
107
+ .stButton>button {
108
+ background-color: #3B82F6;
109
+ color: white;
110
+ font-weight: 500;
111
+ border-radius: 6px;
112
+ padding: 0.5rem 1.5rem;
113
+ transition: all 0.2s ease;
114
+ }
115
+
116
+ .stButton>button:hover {
117
+ background-color: #1D4ED8;
118
+ box-shadow: 0 4px 6px rgba(29, 78, 216, 0.15);
119
+ }
120
+
121
+ /* Job listings */
122
+ .job-card {
123
+ border-left: 5px solid #3B82F6;
124
+ background-color: #F8FAFC;
125
+ padding: 1rem;
126
+ margin-bottom: 1rem;
127
+ border-radius: 0 4px 4px 0;
128
+ }
129
+
130
+ .job-title {
131
+ font-size: 1.2rem;
132
+ font-weight: 600;
133
+ color: #1E3A8A;
134
+ margin-bottom: 0.5rem;
135
+ }
136
+
137
+ .job-company {
138
+ font-size: 1rem;
139
+ color: #64748B;
140
+ margin-bottom: 0.5rem;
141
+ }
142
+
143
+ .job-detail {
144
+ display: flex;
145
+ align-items: center;
146
+ margin-bottom: 0.3rem;
147
+ font-size: 0.9rem;
148
+ }
149
+
150
+ .job-detail:before {
151
+ content: "•";
152
+ color: #3B82F6;
153
+ margin-right: 0.5rem;
154
+ }
155
+
156
+ /* Match indicators */
157
+ .match-indicator {
158
+ background-color: #F0F9FF;
159
+ border-radius: 6px;
160
+ padding: 1rem;
161
+ }
162
+
163
+ .match-high {
164
+ color: #047857;
165
+ font-weight: 600;
166
+ }
167
+
168
+ .match-medium {
169
+ color: #B45309;
170
+ font-weight: 600;
171
+ }
172
+
173
+ .match-low {
174
+ color: #DC2626;
175
+ font-weight: 600;
176
+ }
177
+
178
+ .skill-tag {
179
+ display: inline-block;
180
+ background-color: #E0F2FE;
181
+ color: #0369A1;
182
+ font-size: 0.8rem;
183
+ padding: 0.25rem 0.75rem;
184
+ border-radius: 9999px;
185
+ margin-right: 0.5rem;
186
+ margin-bottom: 0.5rem;
187
+ }
188
+
189
+ /* Form elements */
190
+ .stTextInput>div>div>input {
191
+ border-radius: 6px;
192
+ border: 1px solid #CBD5E1;
193
+ }
194
+
195
+ .stFileUploader>div>button {
196
+ background-color: #F1F5F9;
197
+ color: #475569;
198
+ border-radius: 6px;
199
+ }
200
+
201
+ /* Sidebar */
202
+ .sidebar .sidebar-content {
203
+ background-color: #F8FAFC;
204
+ }
205
+
206
+ /* Expander */
207
+ .streamlit-expanderHeader {
208
+ font-weight: 600;
209
+ color: #1E3A8A;
210
+ background-color: #F1F5F9;
211
+ border-radius: 4px;
212
+ }
213
+
214
+ /* Dividers */
215
+ hr {
216
+ margin: 2rem 0;
217
+ border: 0;
218
+ height: 1px;
219
+ background: #E2E8F0;
220
+ }
221
+
222
+ /* Progress bar */
223
+ .stProgress > div > div > div > div {
224
+ background-color: #3B82F6;
225
+ }
226
+
227
+ /* Metrics */
228
+ .metric-card {
229
+ background-color: #F1F5F9;
230
+ border-radius: 8px;
231
+ padding: 1rem;
232
+ text-align: center;
233
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
234
+ }
235
+
236
+ .metric-value {
237
+ font-size: 2rem;
238
+ font-weight: 700;
239
+ color: #1E3A8A;
240
+ }
241
+
242
+ .metric-label {
243
+ font-size: 0.9rem;
244
+ color: #64748B;
245
+ }
246
+ </style>
247
+ """, unsafe_allow_html=True)
248
+
249
+ # Header
250
+ st.markdown('<p class="main-header">AI-Powered Job Finder</p>', unsafe_allow_html=True)
251
+ st.markdown('<p class="sub-header">Upload your resume and find relevant jobs tailored to your skills and experience</p>', unsafe_allow_html=True)
252
+
253
+ # Initialize session state variables
254
+ if 'resume_text' not in st.session_state:
255
+ st.session_state.resume_text = ""
256
+ if 'resume_parsed' not in st.session_state:
257
+ st.session_state.resume_parsed = False
258
+ if 'parsed_data' not in st.session_state:
259
+ st.session_state.parsed_data = {}
260
+ if 'job_results' not in st.session_state:
261
+ st.session_state.job_results = []
262
+ if 'search_completed' not in st.session_state:
263
+ st.session_state.search_completed = False
264
+
265
+ # Define the JSON schema for resume parsing
266
+ RESUME_SCHEMA = {
267
+ "schema": {
268
+ "basic_info": {
269
+ "name": "string",
270
+ "email": "string",
271
+ "phone": "string",
272
+ "location": "string"
273
+ },
274
+ "professional_summary": "string",
275
+ "skills": ["string"],
276
+ "technical_skills": ["string"],
277
+ "soft_skills": ["string"],
278
+ "experience": [{
279
+ "job_title": "string",
280
+ "company": "string",
281
+ "duration": "string",
282
+ "description": "string"
283
+ }],
284
+ "education": [{
285
+ "degree": "string",
286
+ "institution": "string",
287
+ "year": "string"
288
+ }],
289
+ "certifications": ["string"],
290
+ "years_of_experience": "number"
291
+ }
292
+ }
293
+
294
+ # Function to extract text from PDF
295
+ def extract_text_from_pdf(pdf_file):
296
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
297
+ text = ""
298
+ for page_num in range(len(pdf_reader.pages)):
299
+ text += pdf_reader.pages[page_num].extract_text()
300
+ return text
301
+
302
+ # Function to parse resume with Gemini
303
+ def parse_resume_with_gemini(resume_text):
304
+ try:
305
+ # Configure the Gemini API
306
+ client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
307
+
308
+ # Construct the prompt with schema
309
+ prompt = f"""
310
+ Parse the following resume text and extract information according to this exact JSON schema:
311
+
312
+ {json.dumps(RESUME_SCHEMA, indent=2)}
313
+
314
+ Resume text:
315
+ {resume_text}
316
+
317
+ Make sure to follow the schema exactly. If any information is not available, use empty strings or empty arrays as appropriate.
318
+ Return ONLY the JSON object with no additional text.
319
+ """
320
+
321
+ # Generate the response
322
+ response = client.models.generate_content(model="gemini-2.0-flash", contents=prompt)
323
+
324
+ # Parse the response to get JSON
325
+ try:
326
+ parsed_data = json.loads(response.text)
327
+ return parsed_data
328
+ except json.JSONDecodeError:
329
+ # Try to extract JSON from the text if not directly parseable
330
+ import re
331
+ json_match = re.search(r'```json\n(.*?)\n```', response.text, re.DOTALL)
332
+ if json_match:
333
+ return json.loads(json_match.group(1))
334
+ else:
335
+ st.error("Could not parse the response as JSON")
336
+ return RESUME_SCHEMA["schema"]
337
+ except Exception as e:
338
+ st.error(f"Error parsing resume: {str(e)}")
339
+ return RESUME_SCHEMA["schema"]
340
+
341
+ # Function to search for jobs
342
+ def search_jobs(query, location="", page=1):
343
+ try:
344
+ conn = http.client.HTTPSConnection("jsearch.p.rapidapi.com")
345
+
346
+ # Format the query string
347
+ search_query = query.replace(" ", "%20")
348
+ if location:
349
+ search_query += f"%20in%20{location.replace(' ', '%20')}"
350
+
351
+ headers = {
352
+ 'X-RapidAPI-Key': os.getenv('RAPIDAPI_KEY'),
353
+ 'X-RapidAPI-Host': "jsearch.p.rapidapi.com"
354
+ }
355
+
356
+ conn.request("GET", f"/search?query={search_query}&page={page}&num_pages=1", headers=headers)
357
+
358
+ res = conn.getresponse()
359
+ data = res.read()
360
+
361
+ return json.loads(data.decode("utf-8"))
362
+ except Exception as e:
363
+ st.error(f"Error searching for jobs: {str(e)}")
364
+ return {"data": []}
365
+
366
+ if 'filter_remote_only' not in st.session_state:
367
+ st.session_state.filter_remote_only = False
368
+ if 'filter_employment_types' not in st.session_state:
369
+ st.session_state.filter_employment_types = []
370
+ if 'filter_date_posted' not in st.session_state:
371
+ st.session_state.filter_date_posted = 0
372
+ if 'min_salary' not in st.session_state:
373
+ st.session_state.min_salary = 0
374
+ if 'max_salary' not in st.session_state:
375
+ st.session_state.max_salary = 1000000
376
+ if 'filter_company_types' not in st.session_state:
377
+ st.session_state.filter_company_types = []
378
+
379
+ # Function to apply filters to job results
380
+ def apply_filters(jobs):
381
+ filtered_jobs = []
382
+
383
+ for job in jobs:
384
+ # Check remote filter
385
+ if st.session_state.filter_remote_only and not job.get('job_is_remote', False):
386
+ continue
387
+
388
+ # Check employment type filter
389
+ if st.session_state.filter_employment_types and job.get('job_employment_type') not in st.session_state.filter_employment_types:
390
+ continue
391
+
392
+ # Check date posted filter (in days)
393
+ if st.session_state.filter_date_posted > 0:
394
+ current_time = int(time.time())
395
+ posted_time = job.get('job_posted_at_timestamp', 0)
396
+ days_ago = (current_time - posted_time) / (60 * 60 * 24)
397
+ if days_ago > st.session_state.filter_date_posted:
398
+ continue
399
+
400
+ # Check salary filter
401
+ if job.get('job_min_salary') is not None and job.get('job_min_salary') < st.session_state.min_salary:
402
+ continue
403
+
404
+ if job.get('job_max_salary') is not None and job.get('job_max_salary') > st.session_state.max_salary:
405
+ continue
406
+
407
+ # Check company type filter
408
+ if st.session_state.filter_company_types and job.get('employer_company_type') not in st.session_state.filter_company_types:
409
+ continue
410
+
411
+ # All filters passed, add job to filtered results
412
+ filtered_jobs.append(job)
413
+
414
+ return filtered_jobs
415
+
416
+ # Main layout
417
+ col1, col2 = st.columns([3, 1])
418
+
419
+ with col1:
420
+ # Resume Upload Section
421
+ st.markdown('<p class="section-header">📄 Upload Your Resume</p>', unsafe_allow_html=True)
422
+
423
+ st.markdown('<div class="info-box">Upload your resume to enable AI-powered job matching based on your skills and experience</div>', unsafe_allow_html=True)
424
+
425
+ uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=['pdf'])
426
+
427
+ if uploaded_file is not None:
428
+ with st.spinner('Processing your resume...'):
429
+ # Extract text from the PDF
430
+ resume_text = extract_text_from_pdf(uploaded_file)
431
+ st.session_state.resume_text = resume_text
432
+
433
+ # Parse the resume
434
+ parsed_data = parse_resume_with_gemini(resume_text)
435
+ st.session_state.parsed_data = parsed_data
436
+ st.session_state.resume_parsed = True
437
+
438
+ # Display success message
439
+ st.markdown('<div class="success-message">Resume successfully parsed!</div>', unsafe_allow_html=True)
440
+
441
+ # Display the parsed information
442
+ with st.expander("View Parsed Resume Information", expanded=True):
443
+ tab1, tab2, tab3 = st.tabs(["Basic Info", "Experience", "Skills & Education"])
444
+
445
+ with tab1:
446
+ # Basic information card
447
+ basic_info = parsed_data.get("basic_info", {})
448
+ st.markdown('<div class="card">', unsafe_allow_html=True)
449
+
450
+ col1, col2 = st.columns(2)
451
+ with col1:
452
+ st.markdown(f"**Name:** {basic_info.get('name', 'Not found')}")
453
+ st.markdown(f"**Email:** {basic_info.get('email', 'Not found')}")
454
+
455
+ with col2:
456
+ st.markdown(f"**Phone:** {basic_info.get('phone', 'Not found')}")
457
+ st.markdown(f"**Location:** {basic_info.get('location', 'Not found')}")
458
+
459
+ if parsed_data.get("professional_summary"):
460
+ st.markdown("<hr style='margin: 1rem 0'>", unsafe_allow_html=True)
461
+ st.markdown("**Professional Summary:**")
462
+ st.markdown(parsed_data.get("professional_summary", ""))
463
+
464
+ st.markdown('</div>', unsafe_allow_html=True)
465
+
466
+ with tab2:
467
+ if parsed_data.get("experience"):
468
+ for exp in parsed_data.get("experience", []):
469
+ st.markdown('<div class="card">', unsafe_allow_html=True)
470
+ st.markdown(f"<div style='color: #1E3A8A; font-weight: 600; font-size: 1.1rem;'>{exp.get('job_title', 'Role')}</div>", unsafe_allow_html=True)
471
+ st.markdown(f"<div style='color: #64748B; font-weight: 500;'>{exp.get('company', 'Company')}</div>", unsafe_allow_html=True)
472
+ st.markdown(f"<div style='color: #94A3B8; font-size: 0.9rem; margin-bottom: 0.75rem;'>{exp.get('duration', 'Duration not specified')}</div>", unsafe_allow_html=True)
473
+ st.markdown(exp.get('description', 'No description available'))
474
+ st.markdown('</div>', unsafe_allow_html=True)
475
+ else:
476
+ st.info("No experience information found in your resume")
477
+
478
+ with tab3:
479
+ col1, col2 = st.columns(2)
480
+
481
+ with col1:
482
+ st.markdown('<div class="card">', unsafe_allow_html=True)
483
+ st.markdown("<strong>Skills</strong>")
484
+
485
+ # Technical skills
486
+ st.markdown("**Technical Skills:**")
487
+ tech_skills = parsed_data.get("technical_skills", [])
488
+ if tech_skills:
489
+ for skill in tech_skills:
490
+ st.markdown(f"<span class='skill-tag'>{skill}</span>", unsafe_allow_html=True)
491
+ else:
492
+ st.markdown("No technical skills found")
493
+
494
+ # Soft skills
495
+ st.markdown("<div style='margin-top: 1rem;'>**Soft Skills:**</div>", unsafe_allow_html=True)
496
+ soft_skills = parsed_data.get("soft_skills", [])
497
+ if soft_skills:
498
+ for skill in soft_skills:
499
+ st.markdown(f"<span class='skill-tag'>{skill}</span>", unsafe_allow_html=True)
500
+ else:
501
+ st.markdown("No soft skills found")
502
+
503
+ # General skills
504
+ st.markdown("<div style='margin-top: 1rem;'>**General Skills:**</div>", unsafe_allow_html=True)
505
+ skills = parsed_data.get("skills", [])
506
+ if skills:
507
+ for skill in skills:
508
+ st.markdown(f"<span class='skill-tag'>{skill}</span>", unsafe_allow_html=True)
509
+ else:
510
+ st.markdown("No general skills found")
511
+
512
+ st.markdown(f"<div style='margin-top: 1rem;'>**Years of Experience:** {parsed_data.get('years_of_experience', 'Not specified')}</div>", unsafe_allow_html=True)
513
+ st.markdown('</div>', unsafe_allow_html=True)
514
+
515
+ with col2:
516
+ st.markdown('<div class="card">', unsafe_allow_html=True)
517
+ st.markdown("<strong>Education</strong>")
518
+
519
+ for edu in parsed_data.get("education", []):
520
+ st.markdown(f"<div style='margin-bottom: 1rem;'>", unsafe_allow_html=True)
521
+ st.markdown(f"<div style='font-weight: 600;'>{edu.get('degree', 'Degree')}</div>", unsafe_allow_html=True)
522
+ st.markdown(f"<div>{edu.get('institution', 'Institution')}</div>", unsafe_allow_html=True)
523
+ st.markdown(f"<div style='color: #94A3B8; font-size: 0.9rem;'>{edu.get('year', 'Year not specified')}</div>", unsafe_allow_html=True)
524
+ st.markdown("</div>", unsafe_allow_html=True)
525
+
526
+ # Certifications if available
527
+ if parsed_data.get("certifications"):
528
+ st.markdown("<hr style='margin: 1rem 0'>", unsafe_allow_html=True)
529
+ st.markdown("<strong>Certifications</strong>")
530
+ for cert in parsed_data.get("certifications", []):
531
+ st.markdown(f"• {cert}")
532
+
533
+ st.markdown('</div>', unsafe_allow_html=True)
534
+
535
+ # Job Search Section
536
+ st.markdown('<p class="section-header">🔍 Search for Jobs</p>', unsafe_allow_html=True)
537
+
538
+ # Search form with improved styling
539
+ st.markdown('<div class="card">', unsafe_allow_html=True)
540
+
541
+ search_query = st.text_input("Job Title", placeholder="e.g., Python Developer, Product Manager")
542
+
543
+ col1, col2 = st.columns(2)
544
+ with col1:
545
+ location = st.text_input("Location", placeholder="e.g., New York, Remote")
546
+ with col2:
547
+ search_button = st.button("Search Jobs", use_container_width=True)
548
+
549
+ if st.session_state.resume_parsed:
550
+ st.markdown('<div class="success-message">Resume skills will be used for job matching</div>', unsafe_allow_html=True)
551
+
552
+ st.markdown('</div>', unsafe_allow_html=True)
553
+
554
+ if search_button:
555
+ if search_query:
556
+ with st.spinner('Searching for relevant jobs...'):
557
+ # Search for jobs
558
+ job_results = search_jobs(search_query, location)
559
+
560
+ # Store the results in session state
561
+ st.session_state.job_results = job_results.get('data', [])
562
+ st.session_state.search_completed = True
563
+ else:
564
+ st.markdown('<div class="warning-message">Please enter a job title to search</div>', unsafe_allow_html=True)
565
+
566
+ with col2:
567
+ # Filter sidebar
568
+ st.markdown('<p class="section-header">⚙️ Filters</p>', unsafe_allow_html=True)
569
+
570
+ st.markdown('<div class="card">', unsafe_allow_html=True)
571
+
572
+ # Remote work filter
573
+ st.checkbox("Remote Only", key="filter_remote_only")
574
+
575
+ # Employment type filter
576
+ st.markdown("<div style='margin-top: 1rem;'><strong>Employment Type</strong></div>", unsafe_allow_html=True)
577
+ employment_types = ["FULLTIME", "PARTTIME", "CONTRACTOR", "INTERN"]
578
+ st.multiselect(
579
+ "Select types",
580
+ employment_types,
581
+ default=None,
582
+ key="filter_employment_types",
583
+ label_visibility="collapsed"
584
+ )
585
+
586
+ # Date posted filter
587
+ st.markdown("<div style='margin-top: 1rem;'><strong>Date Posted</strong></div>", unsafe_allow_html=True)
588
+ date_options = {
589
+ "Any time": 0,
590
+ "Past 24 hours": 1,
591
+ "Past week": 7,
592
+ "Past month": 30
593
+ }
594
+ selected_date = st.selectbox(
595
+ "Select timeframe",
596
+ options=list(date_options.keys()),
597
+ index=0,
598
+ label_visibility="collapsed"
599
+ )
600
+ st.session_state.filter_date_posted = date_options[selected_date]
601
+
602
+ # Salary range filter
603
+ st.markdown("<div style='margin-top: 1rem;'><strong>Salary Range</strong></div>", unsafe_allow_html=True)
604
+ col1, col2 = st.columns(2)
605
+ with col1:
606
+ st.number_input("Min (₹)", value=0, step=10000, key="min_salary")
607
+ with col2:
608
+ st.number_input("Max (₹)", value=1000000, step=10000, key="max_salary")
609
+
610
+ # Company type filter
611
+ st.markdown("<div style='margin-top: 1rem;'><strong>Company Type</strong></div>", unsafe_allow_html=True)
612
+ company_types = ["Public", "Private", "Nonprofit", "Government", "Startup", "Other"]
613
+ st.multiselect(
614
+ "Select types",
615
+ company_types,
616
+ default=None,
617
+ key="filter_company_types",
618
+ label_visibility="collapsed"
619
+ )
620
+
621
+ st.markdown('</div>', unsafe_allow_html=True)
622
+
623
+ # App metrics
624
+ st.markdown('<p class="section-header">📊 Stats</p>', unsafe_allow_html=True)
625
+
626
+ st.markdown('<div class="card">', unsafe_allow_html=True)
627
+ col1, col2 = st.columns(2)
628
+
629
+ with col1:
630
+ if st.session_state.resume_parsed:
631
+ skill_count = len(st.session_state.parsed_data.get("skills", [])) + len(st.session_state.parsed_data.get("technical_skills", []))
632
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
633
+ st.markdown(f'<div class="metric-value">{skill_count}</div>', unsafe_allow_html=True)
634
+ st.markdown('<div class="metric-label">Skills</div>', unsafe_allow_html=True)
635
+ st.markdown('</div>', unsafe_allow_html=True)
636
+ else:
637
+ st.markdown('<div class="metric-card" style="opacity: 0.5">', unsafe_allow_html=True)
638
+ st.markdown('<div class="metric-value">-</div>', unsafe_allow_html=True)
639
+ st.markdown('<div class="metric-label">Skills</div>', unsafe_allow_html=True)
640
+ st.markdown('</div>', unsafe_allow_html=True)
641
+
642
+ with col2:
643
+ if st.session_state.search_completed:
644
+ job_count = len(st.session_state.job_results)
645
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
646
+ st.markdown(f'<div class="metric-value">{job_count}</div>', unsafe_allow_html=True)
647
+ st.markdown('<div class="metric-label">Jobs</div>', unsafe_allow_html=True)
648
+ st.markdown('</div>', unsafe_allow_html=True)
649
+ else:
650
+ st.markdown('<div class="metric-card" style="opacity: 0.5">', unsafe_allow_html=True)
651
+ st.markdown('<div class="metric-value">-</div>', unsafe_allow_html=True)
652
+ st.markdown('<div class="metric-label">Jobs</div>', unsafe_allow_html=True)
653
+ st.markdown('</div>', unsafe_allow_html=True)
654
+
655
+ st.markdown('</div>', unsafe_allow_html=True)
656
+
657
+ # Display Results
658
+ if st.session_state.search_completed:
659
+ st.markdown('<p class="section-header">🎯 Job Matches</p>', unsafe_allow_html=True)
660
+
661
+ if st.session_state.job_results:
662
+ # Apply filters
663
+ filtered_jobs = apply_filters(st.session_state.job_results)
664
+
665
+ if filtered_jobs:
666
+ st.markdown(f'<div class="success-message">Found {len(filtered_jobs)} jobs matching your criteria</div>', unsafe_allow_html=True)
667
+
668
+ # Calculate skill match percentages if resume is uploaded
669
+ if st.session_state.resume_parsed:
670
+ # Extract all skills from resume
671
+ tech_skills = set(st.session_state.parsed_data.get("technical_skills", []))
672
+ general_skills = set(st.session_state.parsed_data.get("skills", []))
673
+ soft_skills = set(st.session_state.parsed_data.get("soft_skills", []))
674
+ all_skills = tech_skills.union(general_skills).union(soft_skills)
675
+
676
+ # Add match score to each job
677
+ for job in filtered_jobs:
678
+ if job.get('job_description'):
679
+ desc = job.get('job_description', '').lower()
680
+ matched_skills = [skill for skill in all_skills if skill.lower() in desc]
681
+ match_percentage = int((len(matched_skills) / max(1, len(all_skills))) * 100)
682
+ job['match_percentage'] = match_percentage
683
+ job['matched_skills'] = matched_skills
684
+ else:
685
+ job['match_percentage'] = 0
686
+ job['matched_skills'] = []
687
+
688
+ # Option to sort by match percentage
689
+ col1, col2 = st.columns([1, 2])
690
+ with col1:
691
+ sort_by_match = st.checkbox("Sort by match percentage", value=True)
692
+
693
+ if sort_by_match:
694
+ filtered_jobs = sorted(filtered_jobs, key=lambda x: x.get('match_percentage', 0), reverse=True)
695
+
696
+ for job_idx, job in enumerate(filtered_jobs):
697
+ # Create a job card
698
+ st.markdown('<div class="card job-card">', unsafe_allow_html=True)
699
+
700
+ # Job header
701
+ cols = st.columns([3, 1])
702
+
703
+ with cols[0]:
704
+ if st.session_state.resume_parsed and 'match_percentage' in job:
705
+ match_percentage = job.get('match_percentage', 0)
706
+ if match_percentage > 70:
707
+ match_class = "match-high"
708
+ elif match_percentage > 40:
709
+ match_class = "match-medium"
710
+ else:
711
+ match_class = "match-low"
712
+
713
+ st.markdown(f"<div style='display: flex; align-items: center;'>", unsafe_allow_html=True)
714
+ st.markdown(f"<div class='job-title'>{job.get('job_title', 'Job Title Not Available')}</div>", unsafe_allow_html=True)
715
+ st.markdown(f"<div style='margin-left: 0.75rem; padding: 0.2rem 0.5rem; border-radius: 4px; font-size: 0.8rem;' class='{match_class}'>{match_percentage}% Match</div>", unsafe_allow_html=True)
716
+ st.markdown("</div>", unsafe_allow_html=True)
717
+ else:
718
+ st.markdown(f"<div class='job-title'>{job.get('job_title', 'Job Title Not Available')}</div>", unsafe_allow_html=True)
719
+
720
+ st.markdown(f"<div class='job-company'>{job.get('employer_name', 'Company Not Available')}</div>", unsafe_allow_html=True)
main2.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import http.client
3
+ import json
4
+ import os
5
+ import PyPDF2
6
+ import io
7
+ from google import genai
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ import time
11
+
12
+ # Load environment variables from .env file
13
+ load_dotenv()
14
+
15
+ # Configure page
16
+ st.set_page_config(page_title="AI Job Finder", page_icon="💼", layout="wide")
17
+
18
+ # Styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-header {
22
+ font-size: 2.5rem;
23
+ color: #4169E1;
24
+ }
25
+ .sub-header {
26
+ font-size: 1.5rem;
27
+ color: #6C757D;
28
+ }
29
+ .success-message {
30
+ background-color: #D4EDDA;
31
+ color: #155724;
32
+ padding: 10px;
33
+ border-radius: 5px;
34
+ margin-bottom: 20px;
35
+ }
36
+ .info-box {
37
+ background-color: #E7F3FE;
38
+ border-left: 6px solid #2196F3;
39
+ padding: 10px;
40
+ margin-bottom: 15px;
41
+ }
42
+ .search-options {
43
+ margin-top: 20px;
44
+ margin-bottom: 20px;
45
+ }
46
+ </style>
47
+ """, unsafe_allow_html=True)
48
+
49
+ # Header
50
+ st.markdown('<p class="main-header">AI-Powered Job Finder</p>', unsafe_allow_html=True)
51
+ st.markdown('<p class="sub-header">Upload your resume and find relevant jobs</p>', unsafe_allow_html=True)
52
+
53
+ # Initialize session state variables
54
+ if 'resume_text' not in st.session_state:
55
+ st.session_state.resume_text = ""
56
+ if 'resume_parsed' not in st.session_state:
57
+ st.session_state.resume_parsed = False
58
+ if 'parsed_data' not in st.session_state:
59
+ st.session_state.parsed_data = {}
60
+ if 'job_results' not in st.session_state:
61
+ st.session_state.job_results = []
62
+ if 'search_completed' not in st.session_state:
63
+ st.session_state.search_completed = False
64
+
65
+ # Define the JSON schema for resume parsing
66
+ RESUME_SCHEMA = {
67
+ "schema": {
68
+ "basic_info": {
69
+ "name": "string",
70
+ "email": "string",
71
+ "phone": "string",
72
+ "location": "string"
73
+ },
74
+ "professional_summary": "string",
75
+ "skills": ["string"],
76
+ "technical_skills": ["string"],
77
+ "soft_skills": ["string"],
78
+ "experience": [{
79
+ "job_title": "string",
80
+ "company": "string",
81
+ "duration": "string",
82
+ "description": "string"
83
+ }],
84
+ "education": [{
85
+ "degree": "string",
86
+ "institution": "string",
87
+ "year": "string"
88
+ }],
89
+ "certifications": ["string"],
90
+ "years_of_experience": "number"
91
+ }
92
+ }
93
+
94
+ # Function to extract text from PDF
95
+ def extract_text_from_pdf(pdf_file):
96
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
97
+ text = ""
98
+ for page_num in range(len(pdf_reader.pages)):
99
+ text += pdf_reader.pages[page_num].extract_text()
100
+ return text
101
+
102
+ # Function to parse resume with Gemini
103
+ def parse_resume_with_gemini(resume_text):
104
+ try:
105
+ # Configure the Gemini API
106
+ client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
107
+
108
+ # Construct the prompt with schema
109
+ prompt = f"""
110
+ Parse the following resume text and extract information according to this exact JSON schema:
111
+
112
+ {json.dumps(RESUME_SCHEMA, indent=2)}
113
+
114
+ Resume text:
115
+ {resume_text}
116
+
117
+ Make sure to follow the schema exactly. If any information is not available, use empty strings or empty arrays as appropriate.
118
+ Return ONLY the JSON object with no additional text.
119
+ """
120
+
121
+ # Get the model
122
+
123
+ # Generate the response
124
+ response = client.models.generate_content(model="gemini-2.0-flash", contents=prompt)
125
+
126
+
127
+ # Parse the response to get JSON
128
+ try:
129
+ parsed_data = json.loads(response.text)
130
+ return parsed_data
131
+ except json.JSONDecodeError:
132
+ # Try to extract JSON from the text if not directly parseable
133
+ import re
134
+ json_match = re.search(r'```json\n(.*?)\n```', response.text, re.DOTALL)
135
+ if json_match:
136
+ return json.loads(json_match.group(1))
137
+ else:
138
+ st.error("Could not parse the response as JSON")
139
+ return RESUME_SCHEMA["schema"]
140
+ except Exception as e:
141
+ st.error(f"Error parsing resume: {str(e)}")
142
+ return RESUME_SCHEMA["schema"]
143
+
144
+ # Function to search for jobs
145
+ def search_jobs(query, location="", page=1):
146
+ try:
147
+ conn = http.client.HTTPSConnection("jsearch.p.rapidapi.com")
148
+
149
+ # Format the query string
150
+ search_query = query.replace(" ", "%20")
151
+ if location:
152
+ search_query += f"%20in%20{location.replace(' ', '%20')}"
153
+
154
+ headers = {
155
+ 'X-RapidAPI-Key': os.getenv('RAPIDAPI_KEY'),
156
+ 'X-RapidAPI-Host': "jsearch.p.rapidapi.com"
157
+ }
158
+
159
+ conn.request("GET", f"/search?query={search_query}&page={page}&num_pages=1", headers=headers)
160
+
161
+ res = conn.getresponse()
162
+ data = res.read()
163
+
164
+ return json.loads(data.decode("utf-8"))
165
+ except Exception as e:
166
+ st.error(f"Error searching for jobs: {str(e)}")
167
+ return {"data": []}
168
+
169
+ if 'filter_remote_only' not in st.session_state:
170
+ st.session_state.filter_remote_only = False
171
+ if 'filter_employment_types' not in st.session_state:
172
+ st.session_state.filter_employment_types = []
173
+ if 'filter_date_posted' not in st.session_state:
174
+ st.session_state.filter_date_posted = 0
175
+ if 'min_salary' not in st.session_state:
176
+ st.session_state.min_salary = 0
177
+ if 'max_salary' not in st.session_state:
178
+ st.session_state.max_salary = 1000000
179
+ if 'filter_company_types' not in st.session_state:
180
+ st.session_state.filter_company_types = []
181
+
182
+ # Function to apply filters to job results
183
+ def apply_filters(jobs):
184
+ filtered_jobs = []
185
+
186
+ for job in jobs:
187
+ # Check remote filter
188
+ if st.session_state.filter_remote_only and not job.get('job_is_remote', False):
189
+ continue
190
+
191
+ # Check employment type filter
192
+ if st.session_state.filter_employment_types and job.get('job_employment_type') not in st.session_state.filter_employment_types:
193
+ continue
194
+
195
+ # Check date posted filter (in days)
196
+ if st.session_state.filter_date_posted > 0:
197
+ current_time = int(time.time())
198
+ posted_time = job.get('job_posted_at_timestamp', 0)
199
+ days_ago = (current_time - posted_time) / (60 * 60 * 24)
200
+ if days_ago > st.session_state.filter_date_posted:
201
+ continue
202
+
203
+ # Check salary filter
204
+ if job.get('job_min_salary') is not None and job.get('job_min_salary') < st.session_state.min_salary:
205
+ continue
206
+
207
+ if job.get('job_max_salary') is not None and job.get('job_max_salary') > st.session_state.max_salary:
208
+ continue
209
+
210
+ # Check company type filter
211
+ if st.session_state.filter_company_types and job.get('employer_company_type') not in st.session_state.filter_company_types:
212
+ continue
213
+
214
+ # All filters passed, add job to filtered results
215
+ filtered_jobs.append(job)
216
+
217
+ return filtered_jobs
218
+
219
+ # Resume Upload Section
220
+ st.subheader("Step 1: Upload Your Resume First")
221
+ uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=['pdf'])
222
+
223
+ if uploaded_file is not None:
224
+ with st.spinner('Processing your resume...'):
225
+ # Extract text from the PDF
226
+ resume_text = extract_text_from_pdf(uploaded_file)
227
+ st.session_state.resume_text = resume_text
228
+
229
+ # Parse the resume
230
+ parsed_data = parse_resume_with_gemini(resume_text)
231
+ st.session_state.parsed_data = parsed_data
232
+ st.session_state.resume_parsed = True
233
+
234
+ # Display the parsed information
235
+ with st.expander("Resume Parsed Information", expanded=True):
236
+ col1, col2 = st.columns(2)
237
+
238
+ with col1:
239
+ st.markdown("### Basic Information")
240
+ basic_info = parsed_data.get("basic_info", {})
241
+ st.write(f"**Name:** {basic_info.get('name', 'Not found')}")
242
+ st.write(f"**Email:** {basic_info.get('email', 'Not found')}")
243
+ st.write(f"**Phone:** {basic_info.get('phone', 'Not found')}")
244
+ st.write(f"**Location:** {basic_info.get('location', 'Not found')}")
245
+
246
+ st.markdown("### Experience")
247
+ for exp in parsed_data.get("experience", []):
248
+ st.markdown(f"**{exp.get('job_title', 'Role')} at {exp.get('company', 'Company')}**")
249
+ st.write(f"*{exp.get('duration', 'Duration not specified')}*")
250
+ st.write(exp.get('description', 'No description available'))
251
+ st.write("---")
252
+
253
+ with col2:
254
+ st.markdown("### Skills")
255
+
256
+ # Technical skills
257
+ st.write("**Technical Skills:**")
258
+ tech_skills = parsed_data.get("technical_skills", [])
259
+ if tech_skills:
260
+ st.write(", ".join(tech_skills))
261
+ else:
262
+ st.write("No technical skills found")
263
+
264
+ # Soft skills
265
+ st.write("**Soft Skills:**")
266
+ soft_skills = parsed_data.get("soft_skills", [])
267
+ if soft_skills:
268
+ st.write(", ".join(soft_skills))
269
+ else:
270
+ st.write("No soft skills found")
271
+
272
+ # General skills
273
+ st.write("**General Skills:**")
274
+ skills = parsed_data.get("skills", [])
275
+ if skills:
276
+ st.write(", ".join(skills))
277
+ else:
278
+ st.write("No general skills found")
279
+
280
+ st.markdown("### Education")
281
+ for edu in parsed_data.get("education", []):
282
+ st.write(f"**{edu.get('degree', 'Degree')}** - {edu.get('institution', 'Institution')}")
283
+ st.write(f"*{edu.get('year', 'Year not specified')}*")
284
+
285
+ st.write(f"**Years of Experience:** {parsed_data.get('years_of_experience', 'Not specified')}")
286
+ st.markdown("---")
287
+ st.subheader("Step 2: Search for Jobs")
288
+
289
+ # Query input
290
+ search_query = st.text_input("Enter your job search query (e.g., 'Python Developer')")
291
+ location = st.text_input("Location (e.g., 'New York', 'Remote')")
292
+
293
+ # Add filter options to sidebar
294
+ st.sidebar.markdown("### Filter Options")
295
+
296
+ # Remote work filter
297
+ st.sidebar.checkbox("Remote Only", key="filter_remote_only")
298
+
299
+ # Employment type filter
300
+ employment_types = ["FULLTIME", "PARTTIME", "CONTRACTOR", "INTERN"]
301
+ st.sidebar.multiselect(
302
+ "Employment Type",
303
+ employment_types,
304
+ default=None,
305
+ key="filter_employment_types"
306
+ )
307
+
308
+ # Date posted filter
309
+ date_options = {
310
+ "Any time": 0,
311
+ "Past 24 hours": 1,
312
+ "Past week": 7,
313
+ "Past month": 30
314
+ }
315
+ selected_date = st.sidebar.selectbox(
316
+ "Date Posted",
317
+ options=list(date_options.keys()),
318
+ index=0
319
+ )
320
+ st.session_state.filter_date_posted = date_options[selected_date]
321
+
322
+ # Salary range filter (only if salary data is available)
323
+ st.sidebar.markdown("### Salary Range")
324
+ col1, col2 = st.sidebar.columns(2)
325
+ with col1:
326
+ st.number_input("Min ($)", value=0, step=10000, key="min_salary")
327
+ with col2:
328
+ st.number_input("Max ($)", value=1000000, step=10000, key="max_salary")
329
+
330
+ # Company type filter
331
+ company_types = ["Public", "Private", "Nonprofit", "Government", "Startup", "Other"]
332
+ st.sidebar.multiselect(
333
+ "Company Type",
334
+ company_types,
335
+ default=None,
336
+ key="filter_company_types"
337
+ )
338
+
339
+ if st.button("Search Jobs"):
340
+ if search_query:
341
+ with st.spinner('Searching for jobs...'):
342
+ final_query = search_query
343
+
344
+ # Search for jobs
345
+ job_results = search_jobs(final_query, location)
346
+
347
+ # Store the results in session state
348
+ st.session_state.job_results = job_results.get('data', [])
349
+ st.session_state.search_completed = True
350
+ else:
351
+ st.warning("Please enter a search query")
352
+
353
+ # Display Results
354
+ if st.session_state.search_completed:
355
+ st.markdown("---")
356
+ st.subheader("Job Search Results")
357
+
358
+ if st.session_state.job_results:
359
+ # Apply filters
360
+ filtered_jobs = apply_filters(st.session_state.job_results)
361
+
362
+ if filtered_jobs:
363
+ st.success(f"Found {len(filtered_jobs)} jobs matching your criteria")
364
+
365
+ # Calculate skill match percentages if resume is uploaded
366
+ if st.session_state.resume_parsed:
367
+ # Extract all skills from resume
368
+ tech_skills = set(st.session_state.parsed_data.get("technical_skills", []))
369
+ general_skills = set(st.session_state.parsed_data.get("skills", []))
370
+ soft_skills = set(st.session_state.parsed_data.get("soft_skills", []))
371
+ all_skills = tech_skills.union(general_skills).union(soft_skills)
372
+
373
+ # Add match score to each job
374
+ for job in filtered_jobs:
375
+ if job.get('job_description'):
376
+ desc = job.get('job_description', '').lower()
377
+ matched_skills = [skill for skill in all_skills if skill.lower() in desc]
378
+ match_percentage = int((len(matched_skills) / max(1, len(all_skills))) * 100)
379
+ job['match_percentage'] = match_percentage
380
+ job['matched_skills'] = matched_skills
381
+ else:
382
+ job['match_percentage'] = 0
383
+ job['matched_skills'] = []
384
+
385
+ # Option to sort by match percentage
386
+ sort_by_match = st.checkbox("Sort jobs by skill match percentage", value=True)
387
+ if sort_by_match:
388
+ filtered_jobs = sorted(filtered_jobs, key=lambda x: x.get('match_percentage', 0), reverse=True)
389
+
390
+ for job_idx, job in enumerate(filtered_jobs):
391
+ # Customize job title based on match percentage if resume uploaded
392
+ if st.session_state.resume_parsed and 'match_percentage' in job:
393
+ job_title = f"{job_idx+1}. {job.get('job_title', 'Job Title Not Available')} - {job.get('employer_name', 'Company Not Available')} "
394
+ job_title += f"[Match: {job.get('match_percentage')}%]"
395
+ else:
396
+ job_title = f"{job_idx+1}. {job.get('job_title', 'Job Title Not Available')} - {job.get('employer_name', 'Company Not Available')}"
397
+
398
+ with st.expander(job_title):
399
+ cols = st.columns([2, 1])
400
+
401
+ with cols[0]:
402
+ # Job details
403
+ st.write(f"**Company:** {job.get('employer_name', 'Not Available')}")
404
+ st.write(f"**Location:** {job.get('job_city', 'Not Available')}, {job.get('job_country', 'Not Available')}")
405
+ st.write(f"**Employment Type:** {job.get('job_employment_type', 'Not Available')}")
406
+
407
+ # Remote information
408
+ st.write(f"**Remote:** {'Yes' if job.get('job_is_remote') else 'No'}")
409
+
410
+ # Date posted and expiration
411
+ if job.get('job_posted_at_datetime_utc'):
412
+ st.write(f"**Posted:** {job.get('job_posted_at_datetime_utc', 'Not Available')}")
413
+
414
+ # Salary information
415
+ if job.get('job_min_salary') and job.get('job_max_salary'):
416
+ st.write(f"**Salary Range:** ${job.get('job_min_salary', 'Not Available')} - ${job.get('job_max_salary', 'Not Available')} {job.get('job_salary_currency', 'USD')}")
417
+
418
+ with cols[1]:
419
+ # Enhanced skills match section
420
+ if st.session_state.resume_parsed:
421
+ match_percentage = job.get('match_percentage', 0)
422
+ matched_skills = job.get('matched_skills', [])
423
+
424
+ # Create a visual progress bar for match percentage
425
+ st.markdown("### Skills Match")
426
+
427
+ # Color coding based on match percentage
428
+ if match_percentage > 70:
429
+ bar_color = "green"
430
+ elif match_percentage > 40:
431
+ bar_color = "orange"
432
+ else:
433
+ bar_color = "red"
434
+
435
+ # Display progress bar
436
+ st.progress(match_percentage / 100)
437
+ st.markdown(f"<h4 style='color:{bar_color};margin-top:0'>{match_percentage}% Match</h4>", unsafe_allow_html=True)
438
+
439
+ if matched_skills:
440
+ st.markdown("**Matching Skills:**")
441
+ skill_cols = st.columns(2)
442
+ for skill_idx, skill in enumerate(matched_skills[:10]): # Changed variable name from i to skill_idx
443
+ col_idx = skill_idx % 2
444
+ with skill_cols[col_idx]:
445
+ st.markdown(f"✅ {skill}")
446
+
447
+ if len(matched_skills) > 10:
448
+ st.markdown(f"*...and {len(matched_skills)-10} more*")
449
+ else:
450
+ st.write("⚠️ No direct skill matches found")
451
+
452
+ # Description
453
+ st.markdown("**Job Description:**")
454
+ full_desc = job.get('job_description', 'No description available')
455
+
456
+ if len(full_desc) > 1000:
457
+ st.markdown(full_desc[:1000] + "...")
458
+ if st.button(f"Show Full Description for Job {job_idx+1}", key=f"show_desc_{job_idx}"):
459
+ st.markdown(full_desc)
460
+ else:
461
+ st.markdown(full_desc)
462
+
463
+ # Display ALL application links
464
+ st.markdown("**Apply Links:**")
465
+ apply_options = job.get('apply_options', [])
466
+ if apply_options:
467
+ for option in apply_options:
468
+ st.markdown(f"[Apply on {option.get('publisher', 'Job Board')}]({option.get('apply_link')})")
469
+ elif job.get('job_apply_link'):
470
+ st.markdown(f"[Apply for this job]({job.get('job_apply_link')})")
471
+ else:
472
+ st.info("No jobs match your filters. Try adjusting your filter criteria.")
473
+ else:
474
+ st.info("No jobs found matching your search criteria. Try adjusting your search terms or location.")
475
+
476
+
477
+ st.markdown("---")
478
+ st.markdown("### How to use this app")
479
+ st.markdown("""
480
+ 1. Upload your resume in PDF format to extract your skills and experience
481
+ 2. Enter your job search query and preferred location
482
+ 3. Review job listings and apply directly to positions you're interested in
483
+ """)
484
+
485
+
486
+ # Display app statistics
487
+ st.sidebar.markdown("### App Statistics")
488
+ if st.session_state.resume_parsed:
489
+ st.sidebar.success("✅ Resume Parsed")
490
+ skill_count = len(st.session_state.parsed_data.get("skills", [])) + len(st.session_state.parsed_data.get("technical_skills", []))
491
+ st.sidebar.metric("Skills Detected", skill_count)
492
+ else:
493
+ st.sidebar.warning("❌ No Resume Uploaded")
494
+
495
+ if st.session_state.search_completed:
496
+ st.sidebar.success("✅ Job Search Completed")
497
+ st.sidebar.metric("Jobs Found", len(st.session_state.job_results))
498
+ else:
499
+ st.sidebar.warning("❌ No Search Performed")
rag.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rag.py
2
+ from sentence_transformers import SentenceTransformer
3
+ import faiss
4
+ import numpy as np
5
+ import google as genai
6
+ import os
7
+
8
+ class SimpleRAG:
9
+ def __init__(self, api_key):
10
+ # Initialize the embedding model and generative AI
11
+ self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
12
+ genai.configure(api_key=api_key)
13
+ self.model = genai.GenerativeModel("gemini-1.5-flash")
14
+ self.index = None
15
+ self.chunks = []
16
+ self.is_initialized = False
17
+ self.processing_status = None
18
+
19
+ def chunk_text(self, text, chunk_size=700):
20
+ """Split text into smaller chunks."""
21
+ words = text.split()
22
+ return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
23
+
24
+ def process_search_data(self, search_data):
25
+ """
26
+ Process search result data and index it.
27
+ 'search_data' should be a list of job posting dictionaries.
28
+ For each job posting, we combine key fields (e.g., job title and description) and then chunk the text.
29
+ """
30
+ try:
31
+ self.processing_status = "Processing search data..."
32
+ combined_text = ""
33
+ for job in search_data:
34
+ # Combine job title and job description (you can add more fields if needed)
35
+ job_title = job.get('job_title', '')
36
+ job_description = job.get('job_description', '')
37
+ combined_text += f"Job Title: {job_title}. Description: {job_description}. "
38
+
39
+ if not combined_text.strip():
40
+ raise Exception("No text found in search results.")
41
+
42
+ # Chunk the combined text
43
+ self.chunks = self.chunk_text(combined_text)
44
+ if not self.chunks:
45
+ raise Exception("No content chunks were generated from search data.")
46
+
47
+ # Generate embeddings and create the FAISS index
48
+ embeddings = self.embedder.encode(self.chunks)
49
+ vector_dimension = embeddings.shape[1]
50
+ self.index = faiss.IndexFlatL2(vector_dimension)
51
+ self.index.add(np.array(embeddings).astype('float32'))
52
+
53
+ self.is_initialized = True
54
+ self.processing_status = f"RAG system initialized with {len(self.chunks)} chunks."
55
+ return {"status": "success", "message": self.processing_status}
56
+ except Exception as e:
57
+ self.processing_status = f"Error: {str(e)}"
58
+ self.is_initialized = False
59
+ return {"status": "error", "message": str(e)}
60
+
61
+ def get_status(self):
62
+ """Return the current processing status."""
63
+ return {
64
+ "is_initialized": self.is_initialized,
65
+ "status": self.processing_status
66
+ }
67
+
68
+ def get_relevant_chunks(self, query, k=3):
69
+ """Retrieve the top-k most relevant text chunks for a given query."""
70
+ query_vector = self.embedder.encode([query])
71
+ distances, chunk_indices = self.index.search(query_vector.astype('float32'), k)
72
+ return [self.chunks[i] for i in chunk_indices[0]]
73
+
74
+ def query(self, question):
75
+ """Query the RAG system with a user question."""
76
+ if not self.is_initialized:
77
+ raise Exception("RAG system not initialized. Please process search data first.")
78
+ try:
79
+ context = self.get_relevant_chunks(question)
80
+ prompt = f"""
81
+ Based on the following context, provide a clear and concise answer.
82
+ If the context doesn't contain enough relevant information, say "I don't have enough information to answer that question."
83
+
84
+ Context:
85
+ {' '.join(context)}
86
+
87
+ Question: {question}
88
+ """
89
+ response = self.model.generate_content(prompt)
90
+ return {
91
+ "status": "success",
92
+ "answer": response.text.strip(),
93
+ "context": context
94
+ }
95
+ except Exception as e:
96
+ return {
97
+ "status": "error",
98
+ "message": str(e)
99
+ }
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ python-dotenv
3
+ PyPDF2
4
+ requests
5
+ sentence-transformers
6
+ faiss-cpu
7
+ numpy
8
+ google-genai
9
+ certifi
10
+ charset-normalizer
11
+ idna
12
+ urllib3