Spaces:

Niharmahesh
/

job_easz

Running

Niharmahesh commited on Oct 31, 2024

Commit

e615930

verified ·

1 Parent(s): 7099c01

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 import pyarrow as pa
 import pyarrow.parquet as pq
 import math
 # Set page config for a wider layout and custom theme
 st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
@@ -82,6 +83,22 @@ def load_and_concat_data():
     # Drop duplicates and rows with NaT in date_posted
     filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
     return filtered_df
 @st.cache_data()

 import pyarrow as pa
 import pyarrow.parquet as pq
 import math
+import re
 # Set page config for a wider layout and custom theme
 st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
     # Drop duplicates and rows with NaT in date_posted
     filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
+    # Convert titles to lowercase
+    filtered_df['title'] = filtered_df['title'].str.lower()
+    # Function to clean the location
+    def clean_location(location):
+        if pd.isna(location):
+            return location  # Return NaN as is
+        # Convert to lowercase
+        location = location.lower()
+        # Remove ', us' or ', usa' from the end using regex
+        location = re.sub(r',\s*(us|usa)$', '', location)
+        return location
+    # Clean the location in place
+    filtered_df['location'] = filtered_df['location'].apply(clean_location)
     return filtered_df
 @st.cache_data()