Spaces:

Niharmahesh
/

job_easz

Running

App Files Files Community

Niharmahesh commited on Oct 26, 2024

Commit

44680e5

verified ·

1 Parent(s): 202d680

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -8

app.py CHANGED Viewed

@@ -93,14 +93,42 @@ def create_time_series(df):
     fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
     return fig
 def display_dashboard(df):
     col1, col2 = st.columns(2)
     with col1:
         st.subheader("Job Postings Overview")
         st.metric("Total Job Postings", len(df))
         st.metric("Unique Companies", df['company'].nunique())
-        st.metric("Unique Locations", df['location'].nunique())
         min_date = df['date_posted'].min().date()
         max_date = df['date_posted'].max().date()
@@ -118,14 +146,13 @@ def display_dashboard(df):
     col3, col4 = st.columns(2)
     with col3:
-        top_locations = df['location'].value_counts().head(10)
         fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b'])
         st.plotly_chart(fig, use_container_width=True)
     with col4:
-        job_types = df['job_type'].value_counts()
-        fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution", color_discrete_sequence=px.colors.qualitative.Pastel)
-        fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
         st.plotly_chart(fig, use_container_width=True)
 @st.cache_data
@@ -134,7 +161,7 @@ def filter_dataframe(df, companies, locations, job_types):
     if companies:
         filtered_df = filtered_df[filtered_df['company'].isin(companies)]
     if locations:
-        filtered_df = filtered_df[filtered_df['location'].isin(locations)]
     if job_types:
         filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)]
     return filtered_df
@@ -150,7 +177,7 @@ def display_data_explorer(df):
         with col1:
             companies = st.multiselect("Select Companies", options=unique_values['companies'])
         with col2:
-            locations = st.multiselect("Select Locations", options=unique_values['locations'])
         with col3:
             job_types = st.multiselect("Select Job Types", options=unique_values['job_types'])
@@ -177,13 +204,15 @@ def main():
         st.error("No data available. Please check your dataset.")
         return
     # Sidebar for navigation
     st.sidebar.title("Navigation")
     page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
     if page == "Dashboard":
         display_dashboard(df)
-    elif page =="Data Explorer":
         display_data_explorer(df)
 if __name__ == "__main__":

     fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
     return fig
+def parse_locations(df):
+    valid_locations = [
+        "New York, NY", "San Francisco, CA", "Los Angeles, CA", "Chicago, IL", "Houston, TX",
+        "Phoenix, AZ", "Philadelphia, PA", "San Antonio, TX", "San Diego, CA", "Dallas, TX",
+        "San Jose, CA", "Austin, TX", "Jacksonville, FL", "Fort Worth, TX", "Columbus, OH",
+        "San Francisco Bay Area", "Washington, D.C.", "Boston, MA", "Seattle, WA", "Denver, CO",
+        "Nashville, TN", "Baltimore, MD", "Portland, OR", "Las Vegas, NV", "Milwaukee, WI",
+        "Albuquerque, NM", "Tucson, AZ", "Fresno, CA", "Sacramento, CA", "Long Beach, CA",
+        "Kansas City, MO", "Mesa, AZ", "Atlanta, GA", "Colorado Springs, CO", "Raleigh, NC",
+        "Omaha, NE", "Miami, FL", "Oakland, CA", "Minneapolis, MN", "Tulsa, OK",
+        "Cleveland, OH", "Wichita, KS", "Arlington, TX", "New Orleans, LA", "Bakersfield, CA",
+        "Tampa, FL", "Honolulu, HI", "Aurora, CO", "Anaheim, CA", "Santa Ana, CA",
+        "St. Louis, MO", "Riverside, CA", "Corpus Christi, TX", "Lexington, KY", "Pittsburgh, PA",
+        "Anchorage, AK", "Stockton, CA", "Cincinnati, OH", "St. Paul, MN", "Toledo, OH",
+        "Newark, NJ", "Greensboro, NC", "Plano, TX", "Henderson, NV", "Lincoln, NE",
+        "Buffalo, NY", "Fort Wayne, IN", "Jersey City, NJ", "Chula Vista, CA", "Orlando, FL",
+        "St. Petersburg, FL", "Norfolk, VA", "Chandler, AZ", "Laredo, TX", "Madison, WI",
+        "Durham, NC", "Lubbock, TX", "Winston-Salem, NC", "Garland, TX", "Glendale, AZ",
+        "Hialeah, FL", "Reno, NV", "Baton Rouge, LA", "Irvine, CA", "Chesapeake, VA",
+        "Irving, TX", "Scottsdale, AZ", "North Las Vegas, NV", "Fremont, CA", "Gilbert, AZ",
+        "San Bernardino, CA", "Boise, ID", "Birmingham, AL"
+    ]
+    df['parsed_location'] = df['location'].apply(lambda x: next((loc for loc in valid_locations if loc in x), 'Other'))
+    return df
 def display_dashboard(df):
+    df = parse_locations(df)
     col1, col2 = st.columns(2)
     with col1:
         st.subheader("Job Postings Overview")
         st.metric("Total Job Postings", len(df))
         st.metric("Unique Companies", df['company'].nunique())
+        st.metric("Unique Locations", df['parsed_location'].nunique())
         min_date = df['date_posted'].min().date()
         max_date = df['date_posted'].max().date()
     col3, col4 = st.columns(2)
     with col3:
+        top_locations = df['parsed_location'].value_counts().head(10)
         fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b'])
         st.plotly_chart(fig, use_container_width=True)
     with col4:
+        top_job_titles = df['title'].value_counts().head(20)
+        fig = create_chart(top_job_titles, top_job_titles.index, top_job_titles.values, "Top 20 Job Titles", ['#59a14f'])
         st.plotly_chart(fig, use_container_width=True)
 @st.cache_data
     if companies:
         filtered_df = filtered_df[filtered_df['company'].isin(companies)]
     if locations:
+        filtered_df = filtered_df[filtered_df['parsed_location'].isin(locations)]
     if job_types:
         filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)]
     return filtered_df
         with col1:
             companies = st.multiselect("Select Companies", options=unique_values['companies'])
         with col2:
+            locations = st.multiselect("Select Locations", options=df['parsed_location'].unique())
         with col3:
             job_types = st.multiselect("Select Job Types", options=unique_values['job_types'])
         st.error("No data available. Please check your dataset.")
         return
+    df = parse_locations(df)
     # Sidebar for navigation
     st.sidebar.title("Navigation")
     page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
     if page == "Dashboard":
         display_dashboard(df)
+    elif page == "Data Explorer":
         display_data_explorer(df)
 if __name__ == "__main__":