Niharmahesh commited on
Commit
e615930
·
verified ·
1 Parent(s): 7099c01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -0
app.py CHANGED
@@ -8,6 +8,7 @@ import time
8
  import pyarrow as pa
9
  import pyarrow.parquet as pq
10
  import math
 
11
  # Set page config for a wider layout and custom theme
12
  st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
13
 
@@ -82,6 +83,22 @@ def load_and_concat_data():
82
  # Drop duplicates and rows with NaT in date_posted
83
  filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  return filtered_df
86
 
87
  @st.cache_data()
 
8
  import pyarrow as pa
9
  import pyarrow.parquet as pq
10
  import math
11
+ import re
12
  # Set page config for a wider layout and custom theme
13
  st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
14
 
 
83
  # Drop duplicates and rows with NaT in date_posted
84
  filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
85
 
86
+ # Convert titles to lowercase
87
+ filtered_df['title'] = filtered_df['title'].str.lower()
88
+
89
+ # Function to clean the location
90
+ def clean_location(location):
91
+ if pd.isna(location):
92
+ return location # Return NaN as is
93
+ # Convert to lowercase
94
+ location = location.lower()
95
+ # Remove ', us' or ', usa' from the end using regex
96
+ location = re.sub(r',\s*(us|usa)$', '', location)
97
+ return location
98
+
99
+ # Clean the location in place
100
+ filtered_df['location'] = filtered_df['location'].apply(clean_location)
101
+
102
  return filtered_df
103
 
104
  @st.cache_data()