Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import time
|
|
8 |
import pyarrow as pa
|
9 |
import pyarrow.parquet as pq
|
10 |
import math
|
|
|
11 |
# Set page config for a wider layout and custom theme
|
12 |
st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
|
13 |
|
@@ -82,6 +83,22 @@ def load_and_concat_data():
|
|
82 |
# Drop duplicates and rows with NaT in date_posted
|
83 |
filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
return filtered_df
|
86 |
|
87 |
@st.cache_data()
|
|
|
8 |
import pyarrow as pa
|
9 |
import pyarrow.parquet as pq
|
10 |
import math
|
11 |
+
import re
|
12 |
# Set page config for a wider layout and custom theme
|
13 |
st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
|
14 |
|
|
|
83 |
# Drop duplicates and rows with NaT in date_posted
|
84 |
filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
|
85 |
|
86 |
+
# Convert titles to lowercase
|
87 |
+
filtered_df['title'] = filtered_df['title'].str.lower()
|
88 |
+
|
89 |
+
# Function to clean the location
|
90 |
+
def clean_location(location):
|
91 |
+
if pd.isna(location):
|
92 |
+
return location # Return NaN as is
|
93 |
+
# Convert to lowercase
|
94 |
+
location = location.lower()
|
95 |
+
# Remove ', us' or ', usa' from the end using regex
|
96 |
+
location = re.sub(r',\s*(us|usa)$', '', location)
|
97 |
+
return location
|
98 |
+
|
99 |
+
# Clean the location in place
|
100 |
+
filtered_df['location'] = filtered_df['location'].apply(clean_location)
|
101 |
+
|
102 |
return filtered_df
|
103 |
|
104 |
@st.cache_data()
|