Yassmen commited on
Commit
5a1c96e
·
verified ·
1 Parent(s): 5a89aa5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +594 -0
app.py ADDED
@@ -0,0 +1,594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import requests
4
+ import numpy as np
5
+ from streamlit_lottie import st_lottie
6
+ from PIL import Image
7
+ import warnings
8
+ warnings.filterwarnings("ignore")
9
+ import requests
10
+ import pandas as pd
11
+ import numpy as np
12
+ from bs4 import BeautifulSoup
13
+ import bs4
14
+ from urllib.request import urlopen
15
+ import time
16
+ import re
17
+ import time
18
+ import matplotlib.pyplot as plt
19
+ import seaborn as sns
20
+ import matplotlib as mpl
21
+ import plotly
22
+ import plotly.express as px
23
+ import plotly.graph_objs as go
24
+ import plotly.offline as py
25
+ from plotly.offline import iplot
26
+ from plotly.subplots import make_subplots
27
+ import plotly.figure_factory as ff
28
+ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
29
+ from selenium import webdriver
30
+ from selenium.webdriver.common.by import By
31
+ from selenium.webdriver.common.keys import Keys
32
+ from selenium.webdriver.support.ui import WebDriverWait
33
+ from selenium.webdriver.support import expected_conditions as EC
34
+
35
+ #Settings for using the driver without a UI
36
+ options = webdriver.ChromeOptions()
37
+ options.add_argument('--headless')
38
+ options.add_argument('--no-sandbox')
39
+ options.add_argument('--disable-dev-shm-usage')
40
+
41
+ options.add_argument("start-maximized")
42
+ options.add_argument("disable-infobars")
43
+ options.add_argument("--disable-extensions")
44
+ driver = webdriver.Chrome('chromedriver',options=options)
45
+
46
+
47
+ # wuzzuf function
48
+ def Wuzzuf_scrapping(job_type , job_num):
49
+ job1 = job_type.split(" ")[0]
50
+ job2 = job_type.split(" ")[1]
51
+ link1 = 'https://wuzzuf.net/search/jobs/?a=navbl&q='+job1+'%20'+job1
52
+ title = []
53
+ location = []
54
+ country = []
55
+ job_description = []
56
+ Job_Requirements =[]
57
+ company_name = []
58
+ links = []
59
+ Jop_type = []
60
+ Career_Level = []
61
+ company_logo = []
62
+ Job_Categories = []
63
+ Skills_And_Tools = []
64
+ Experience_Needed =[]
65
+ post_time = []
66
+ Title = []
67
+ pages_num = np.ceil(job_num/15)
68
+
69
+
70
+ for i in range(int(pages_num) ):
71
+ link_new = link1 +'&start='+str(i)
72
+ data = requests.get(link_new)
73
+ soup = BeautifulSoup(data.content)
74
+ Title = soup.find_all('h2' , {'class': 'css-m604qf'})
75
+
76
+ # to get the info about jobs
77
+
78
+ for x in range(0,len(Title)):
79
+ t = re.split('\(|\-',Title[x].find('a').text)
80
+ title.append(t[0].strip())
81
+ loc = re.split(',' , soup.find_all('span' , {'class': 'css-5wys0k'})[x].text)
82
+ r = ""
83
+ for i in range(len(loc[:-1])):
84
+ r= r+ ', ' +loc[:-1][i].strip()
85
+ location.append(r.replace(',', '', 1).strip())
86
+ country.append(loc[-1].strip())
87
+ links.append('https://wuzzuf.net' + Title[x].find('a').attrs['href'])
88
+ m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
89
+ company_name.append(m)
90
+ c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
91
+ if len(c) ==1:
92
+ Jop_type.append(c[0].text)
93
+ else:
94
+ n =[]
95
+ for i in range(len(c)):
96
+ n.append(c[i].text)
97
+ Jop_type.append(n)
98
+ n =soup.find_all('div' ,{'class':'css-y4udm8'})[x].find_all('div')[1].find_all(['a','span'])
99
+ Career_Level.append(n[0].text)
100
+ n =soup.find_all('div' ,{'class':'css-y4udm8'})[x].find_all('div')[1].find_all(['a','span'])
101
+
102
+ yy = n[1].text.replace('·',' ').strip()
103
+ yy = re.findall('[0-9-+]*',yy)
104
+ y1 =""
105
+ for i in range(len(yy)):
106
+
107
+ if any(yy[i]):
108
+ y1 = y1+yy[i]
109
+ if y1 != "":
110
+ Experience_Needed.append(y1)
111
+ else:
112
+ Experience_Needed.append("Not Specified")
113
+ time = (soup.find_all('div' ,{'class':'css-d7j1kk'}))[x].find('div')
114
+ post_time.append(time.text)
115
+
116
+ # to get the logo of the company
117
+
118
+ data1 = requests.get(links[x])
119
+ soup1 = BeautifulSoup(data1.content)
120
+ company_logo.append(soup1.find_all('meta',{'property':"og:image"})[0]['content'])
121
+ #time.sleep(4)
122
+
123
+
124
+ # get Job_Categories , Skills_And_Tools , job_description , and job_requirements from urls
125
+ driver = webdriver.Chrome('chromedriver',options=options)
126
+ #driver.implicitly_wait(10)
127
+ driver.get(links[x])
128
+ Job_Categories.append(driver.find_element(By.XPATH ,'//*[@id="app"]/div/main/section[2]/div[5]').text.split("\n")[1:])
129
+ Skills_And_Tools.append(driver.find_element(By.XPATH ,'//*[@id="app"]/div/main/section[2]/div[6]').text.split("\n")[1:])
130
+ job_description.append(driver.find_element(By.XPATH ,'//*[@id="app"]/div/main/section[3]').text.split("\n")[1:])
131
+ all =driver.find_elements(By.XPATH ,'//*[@id="app"]/div/main/section[4]/div')
132
+ dict_other = {}
133
+
134
+ new = all[0].text.split("\n\n")
135
+
136
+ if len(new)!=1 :
137
+ for i in range(len(new)):
138
+ result =[]
139
+ for k in (new[i].split('\n')[1:]):
140
+ result.append(k.replace("\u202f"," "))
141
+ dict_other[new[i].split('\n')[0]] = result
142
+
143
+ #result = re.sub('[\W_]+', '', ini_string)
144
+
145
+ Job_Requirements.append(dict_other)
146
+
147
+ else:
148
+ nn = new[0].replace("\u202f"," ")
149
+ Job_Requirements.append(nn.split('\n'))
150
+
151
+
152
+ # create data frame to combine all together
153
+
154
+ df = pd.DataFrame({'Title' : title , 'Location' : location ,'country':country,'URLs':links ,'Company_Name' : company_name,'Career_Level':Career_Level,'post_time':post_time,'Experience_Needed':Experience_Needed,'Company_Logo':company_logo,"Job_Categories":Job_Categories , "Skills_And_Tools":Skills_And_Tools , "job_description":job_description,"Job_Requirements":Job_Requirements})
155
+
156
+ df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
157
+ return df[:job_num]
158
+
159
+
160
+ # linkedin function
161
+
162
+
163
+ def LINKEDIN_Scrapping(job_search , num_jobs):
164
+ job1 = job_search.split(" ")[0]
165
+ job2 = job_search.split(" ")[1]
166
+
167
+ link1 = 'https://www.linkedin.com/jobs/search?keywords='+job1 +'%20' +job2 +'&location=&geoId=&trk=public_jobs_jobs-search-bar_search-submit&position=1&pageNum=0'
168
+
169
+ # FIRST get main informations about jobs
170
+
171
+ title = []
172
+ location = []
173
+ country = []
174
+ company_name = []
175
+ post_time = []
176
+ links =[]
177
+ # get the specific numbers of jobs
178
+ l1 = ""
179
+ ll =""
180
+ driver = webdriver.Chrome('chromedriver',options=options)
181
+ driver.get(link1)
182
+ SCROLL_PAUSE_TIME = 0.5
183
+ while True :
184
+ l1 = driver.find_elements(By.XPATH,'//*[@id="main-content"]/section[2]/ul/li[*]/div')
185
+ ll= driver.find_elements(By.XPATH ,'//*[@id="main-content"]/section[2]/ul/li[*]/div/a')
186
+
187
+ if len(l1) >= num_jobs:
188
+ break
189
+ time.sleep(3)
190
+ # Get scroll height
191
+ last_height = driver.execute_script("return document.body.scrollHeight")
192
+ while True:
193
+
194
+ # Scroll down to bottom
195
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
196
+
197
+ # Wait to load page
198
+ time.sleep(SCROLL_PAUSE_TIME)
199
+
200
+ # Calculate new scroll height and compare with last scroll height
201
+ new_height = driver.execute_script("return document.body.scrollHeight")
202
+ if new_height == last_height:
203
+ break
204
+ last_height = new_height
205
+
206
+ options.add_argument("window-size=1200x600")
207
+ WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main-content"]/section[2]/button'))).click()
208
+ print(len(l1))
209
+ time.sleep(2)
210
+
211
+
212
+
213
+ l2 = l1[:num_jobs]
214
+
215
+ for info in l2:
216
+ info_tot = info.text.split("\n")
217
+ if len(info_tot)==5:
218
+ title.append(info_tot[1])
219
+ location.append(info_tot[3])
220
+ company_name.append(info_tot[2])
221
+ post_time.append(info_tot[4])
222
+ else:
223
+ title.append(info_tot[1])
224
+ location.append(info_tot[3])
225
+ company_name.append(info_tot[2])
226
+ post_time.append(info_tot[5])
227
+
228
+ # get links for jobs
229
+ l3 = ll[:num_jobs]
230
+ for i in l3:
231
+ links.append(i.get_attribute('href'))
232
+
233
+ df_ml = pd.DataFrame({'Title' : title , 'Location' : location ,'URLs':links ,'Company_Name' : company_name ,'post_time':post_time})
234
+
235
+
236
+
237
+
238
+ # GET DESCRIPTION AND LOGO
239
+ def all_description_LOGO(urls):
240
+ description =[]
241
+ LOGO =[]
242
+ for link in urls:
243
+ driver = webdriver.Chrome('chromedriver',options=options)
244
+ driver.get(link)
245
+ options.add_argument("window-size=1200x600")
246
+ WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main-content"]/section[1]/div/div[1]/section[1]/div/div/section/button[1]'))).click()
247
+ qqq= 4+444*58/7+65
248
+ K = driver.find_element(By.XPATH,'//*[@id="main-content"]/section[1]/div/section[2]/div/a/img')
249
+ LOGO.append(K.get_attribute('src'))
250
+ time.sleep(3)
251
+ t = driver.find_element(By.XPATH ,'//*[@id="main-content"]/section[1]/div/div[1]/section[1]/div/div/section/div')
252
+ t_reverse=t.text[::-1]
253
+
254
+ if t_reverse[:9] =="erom wohs":
255
+ l = len(t.text)
256
+ strings=t.text[:l-9].split("\n")
257
+ strings[:] = [x for x in strings if x]
258
+ description.append(strings)
259
+ else:
260
+ strings=t.text.split("\n")
261
+ strings[:] = [x for x in strings if x]
262
+ description.append(strings)
263
+ df_ml = pd.DataFrame({'all_about_job' : description ,'company_logo':LOGO})
264
+
265
+ return df_ml
266
+
267
+ # apply desc. and logo function
268
+ E = all_description_LOGO(links)
269
+
270
+ # other info function
271
+ def other(urls):
272
+ frames =[]
273
+ for url in urls:
274
+ data1 = requests.get(url)
275
+ soup1 = BeautifulSoup(data1.content)
276
+ j = soup1.find('ul' , {'class': 'description__job-criteria-list'})
277
+ time.sleep(4)
278
+ jj=j.find_all('h3')
279
+ dic ={}
280
+ for i in range(len(jj)):
281
+ dic[jj[i].text.replace('\n',' ').strip()] = j.find_all('span')[i].text.replace('\n',' ').strip()
282
+ output = pd.DataFrame()
283
+ output = output.append(dic, ignore_index=True)
284
+ frames.append(output)
285
+ result = pd.concat(frames)
286
+ return result
287
+
288
+ # apply Other function
289
+ df = other(links)
290
+ df.fillna('Not_Found',inplace= True)
291
+ df.reset_index(inplace=True, drop=True)
292
+
293
+ # combine all together
294
+ result = pd.concat([df_ml,E, df ], axis=1)
295
+
296
+ return result
297
+
298
+
299
+ ##################### map_bubble #####################
300
+
301
+ #### function to show map for loaction of the job
302
+
303
+
304
+
305
+ def map_bubble(df):
306
+
307
+ import requests
308
+ import urllib.parse
309
+ g =[]
310
+ for i in range(len(df.Location)):
311
+
312
+ if df.Location.loc[i].split(","):
313
+ g.append(df.Location.loc[i].split(",")[0])
314
+ else:
315
+ g.append(df.Location.loc[i])
316
+ df['new_loc']=g
317
+ if 'country' in df.columns:
318
+ df["full_location"] = df["new_loc"] + ", " +df["country"]
319
+ dict_cities = dict(df.full_location.value_counts())
320
+ else :
321
+ dict_cities = dict(df.new_loc.value_counts())
322
+ lat = []
323
+ lon = []
324
+ bubble_df = pd.DataFrame()
325
+ add=[]
326
+ val=[]
327
+ try:
328
+ for address in dict_cities.keys():
329
+ url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
330
+
331
+ response = requests.get(url).json()
332
+ lat.append(response[0]["lat"])
333
+ lon.append(response[0]["lon"])
334
+ add.append(address)
335
+ val.append(dict_cities[address])
336
+ except:
337
+ pass
338
+
339
+ bubble_df['address'] =add
340
+ bubble_df['lat'] = lat
341
+ bubble_df['lon'] = lon
342
+ bubble_df['value'] = val
343
+
344
+
345
+ # import the library
346
+ import folium
347
+
348
+ # Make an empty map
349
+ m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)
350
+ # add marker one by one on the map
351
+ for i in range(0,len(bubble_df)):
352
+ folium.Circle(
353
+ location=[bubble_df.iloc[i]['lat'], bubble_df.iloc[i]['lon']],
354
+
355
+ popup=bubble_df.iloc[i][['address','value']].values,
356
+ radius=float(bubble_df.iloc[i]['value'])*500,
357
+ color='#69b3a2',
358
+ fill=True,
359
+ fill_color='#69b3a2'
360
+ ).add_to(m)
361
+ m
362
+ # Show the map again
363
+ return m
364
+
365
+
366
+ ##########################
367
+
368
+
369
+
370
+
371
+
372
+ #########################
373
+ #### wuzzuf analysis
374
+ def wuzzuf_exp(df1):
375
+ top10_job_title = df1['Title'].value_counts()[:10]
376
+ fig1 = px.bar(y=top10_job_title.values,
377
+ x=top10_job_title.index,
378
+ color = top10_job_title.index,
379
+ color_discrete_sequence=px.colors.sequential.deep,
380
+ text=top10_job_title.values,
381
+ title= 'Top 10 Job Titles',
382
+ template= 'plotly_dark')
383
+ fig1.update_layout(height=500,width=500,
384
+ xaxis_title="Job Titles",
385
+ yaxis_title="count",
386
+ font = dict(size=17,family="Franklin Gothic"))
387
+ st.plotly_chart(fig1)
388
+
389
+ type_grouped = df1['Career_Level'].value_counts()
390
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
391
+ e_type =dict(df1['Career_Level'].value_counts()).keys()
392
+ fig2 = px.bar(x = e_type, y = type_grouped.values,
393
+ color = type_grouped.index,
394
+ color_discrete_sequence=px.colors.sequential.dense,
395
+ template = 'plotly_dark',
396
+ text = type_grouped.values, title = 'Career Level Distribution')
397
+ fig2.update_layout( height=500, width=500,
398
+ xaxis_title="Career Level",
399
+ yaxis_title="count",
400
+ font = dict(size=17,family="Franklin Gothic"))
401
+ fig2.update_traces(width=0.5)
402
+ st.plotly_chart(fig2)
403
+ residence = df1['Location'].value_counts()
404
+ top10_employee_location = residence[:10]
405
+ fig3 = px.bar(y=top10_employee_location.values,
406
+ x=top10_employee_location.index,
407
+ color = top10_employee_location.index,
408
+ color_discrete_sequence=px.colors.sequential.deep,
409
+ text=top10_employee_location.values,
410
+ title= 'Top 10 Location of job',
411
+ template= 'plotly_dark')
412
+ fig3.update_layout(height=500,width=500,
413
+ xaxis_title="Location of job",
414
+ yaxis_title="count",
415
+ font = dict(size=17,family="Franklin Gothic"))
416
+ st.plotly_chart(fig3)
417
+
418
+ type_grouped = df1['Experience_Needed'].value_counts()
419
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
420
+ e_type =dict(df1['Experience_Needed'].value_counts()).keys()
421
+ fig4 = px.bar(x = e_type, y = type_grouped.values,
422
+ color = type_grouped.index,
423
+ color_discrete_sequence=px.colors.sequential.dense,
424
+ template = 'plotly_dark',
425
+ text = type_grouped.values, title = ' Experience Level Distribution')
426
+ fig4.update_layout(height=500,width=500,
427
+ xaxis_title=" Experience Level (years)",
428
+ yaxis_title="count",
429
+ font = dict(size=17,family="Franklin Gothic"))
430
+ fig4.update_traces(width=0.5)
431
+ st.plotly_chart(fig4)
432
+ return
433
+
434
+
435
+
436
+ #########################
437
+ ### linkedin analysis
438
+
439
+ def linkedin_exp(df1):
440
+ top10_job_title = df1['Title'].value_counts()[:10]
441
+ fig1 = px.bar(y=top10_job_title.values,
442
+ x=top10_job_title.index,
443
+ color = top10_job_title.index,
444
+ color_discrete_sequence=px.colors.sequential.deep,
445
+ text=top10_job_title.values,
446
+ title= 'Top 10 Job Titles',
447
+ template= 'plotly_dark')
448
+ fig1.update_layout(height=500,width=500,
449
+ xaxis_title="Job Titles",
450
+ yaxis_title="count",
451
+ font = dict(size=17,family="Franklin Gothic"))
452
+ st.plotly_chart(fig1)
453
+
454
+ type_grouped = df1['Employment type'].value_counts()
455
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
456
+ e_type =dict(df1['Employment type'].value_counts()).keys()
457
+ fig2 = px.bar(x = e_type, y = type_grouped.values,
458
+ color = type_grouped.index,
459
+ color_discrete_sequence=px.colors.sequential.dense,
460
+ template = 'plotly_dark',
461
+ text = type_grouped.values, title = 'Employment type Distribution')
462
+ fig2.update_layout( height=500, width=500,
463
+ xaxis_title="Employment type",
464
+ yaxis_title="count",
465
+ font = dict(size=17,family="Franklin Gothic"))
466
+ fig2.update_traces(width=0.5)
467
+ st.plotly_chart(fig2)
468
+ residence = df1['Location'].value_counts()
469
+ top10_employee_location = residence[:10]
470
+ fig3 = px.bar(y=top10_employee_location.values,
471
+ x=top10_employee_location.index,
472
+ color = top10_employee_location.index,
473
+ color_discrete_sequence=px.colors.sequential.deep,
474
+ text=top10_employee_location.values,
475
+ title= 'Top 10 Location of job',
476
+ template= 'plotly_dark')
477
+ fig3.update_layout(height=500,width=500,
478
+ xaxis_title="Location of job",
479
+ yaxis_title="count",
480
+ font = dict(size=17,family="Franklin Gothic"))
481
+ st.plotly_chart(fig3)
482
+
483
+ type_grouped = df1['Seniority level'].value_counts()
484
+ #e_type = ['Full-Time','Part-Time','Contract','Freelance']
485
+ e_type =dict(df1['Seniority level'].value_counts()).keys()
486
+ fig4 = px.bar(x = e_type, y = type_grouped.values,
487
+ color = type_grouped.index,
488
+ color_discrete_sequence=px.colors.sequential.dense,
489
+ template = 'plotly_dark',
490
+ text = type_grouped.values, title = 'Seniority level Distribution')
491
+ fig4.update_layout(height=500,width=500,
492
+ xaxis_title="Seniority level",
493
+ yaxis_title="count",
494
+ font = dict(size=17,family="Franklin Gothic"))
495
+ fig4.update_traces(width=0.5)
496
+ st.plotly_chart(fig4)
497
+ return
498
+
499
+
500
+ ########################
501
+
502
+ ####################### stream lit app ################################
503
+
504
+ #site = ""
505
+ #job =""
506
+ #num_jobs = 0
507
+
508
+ st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
509
+
510
+
511
+ # ---- HEADER SECTION ----
512
+ with st.container():
513
+ left_column, right_column = st.columns(2)
514
+ with left_column:
515
+ st.subheader("Hi! I am Yassmen :wave:")
516
+ st.title("An Electronics and Communcation Engineer")
517
+ st.write(
518
+ "In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:"
519
+ )
520
+ st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
521
+ with right_column:
522
+ pass
523
+ # st_lottie(lottie_coding, height=300, key="coding")
524
+
525
+
526
+
527
+ import streamlit as st
528
+ from streamlit_option_menu import option_menu
529
+
530
+ #with st.sidebar:
531
+ # selected = option_menu("Main Menu", ["select website", 'search job','numbers of jobs'], icons=['linkedin', 'search','123'], menu_icon="cast", default_index=1)
532
+
533
+ webs =["Wuzzuf","Linkedin"]
534
+ jobs =["Machine Learning","AI Engineer","Data Analysis","Software Testing"]
535
+ nums = np.arange(1,1000)
536
+
537
+ #with st.sidebar:
538
+ #if selected == "select website":
539
+ site = st.sidebar.selectbox("select one website", webs)
540
+ #elif selected == "search job":
541
+ job = st.sidebar.selectbox("select one job", jobs)
542
+ #elif selected == "numbers of jobs":
543
+ num_jobs = st.sidebar.selectbox("select num of jobs you want to scrap", nums)
544
+
545
+
546
+
547
+ import streamlit.components.v1 as components
548
+
549
+ import hydralit_components as hc
550
+ n2 = pd.DataFrame()
551
+
552
+ if st.sidebar.button('Start Scrapping'):
553
+ if site =="Wuzzuf":
554
+
555
+ with st.container():
556
+ st.write("---")
557
+ tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
558
+ with tab1 :
559
+ with hc.HyLoader('✨Now loading' ,hc.Loaders.standard_loaders,index=[3,0,5]):
560
+ time.sleep(5)
561
+ n1 = Wuzzuf_scrapping(job ,num_jobs )
562
+ try:
563
+ tab1.dataframe(n1)
564
+ except:
565
+ try:
566
+ tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success
567
+ except:
568
+ tab1.table(n1)
569
+ with tab2:
570
+ map_bubble(n1)
571
+ with tab3:
572
+ #tab3.plotly_chart(wuzzuf_exp(n1))
573
+ wuzzuf_exp(n1)
574
+
575
+
576
+ if site =="Linkedin":
577
+ with st.container():
578
+ st.write("---")
579
+ tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
580
+ with tab1 :
581
+ with hc.HyLoader('✨Now loading' ,hc.Loaders.standard_loaders,index=[3,0,5]):
582
+ time.sleep(5)
583
+ n1 = LINKEDIN_Scrapping(job ,num_jobs )
584
+ try:
585
+ tab1.dataframe(n1)
586
+ except:
587
+ try:
588
+ tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success
589
+ except:
590
+ tab1.table(n1)
591
+ with tab2:
592
+ map_bubble(n1)
593
+ with tab3:
594
+ linkedin_exp(n1) # WILL CHANGE