Niharmahesh commited on
Commit
44680e5
·
verified ·
1 Parent(s): 202d680

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -8
app.py CHANGED
@@ -93,14 +93,42 @@ def create_time_series(df):
93
  fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
94
  return fig
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def display_dashboard(df):
 
 
97
  col1, col2 = st.columns(2)
98
 
99
  with col1:
100
  st.subheader("Job Postings Overview")
101
  st.metric("Total Job Postings", len(df))
102
  st.metric("Unique Companies", df['company'].nunique())
103
- st.metric("Unique Locations", df['location'].nunique())
104
 
105
  min_date = df['date_posted'].min().date()
106
  max_date = df['date_posted'].max().date()
@@ -118,14 +146,13 @@ def display_dashboard(df):
118
  col3, col4 = st.columns(2)
119
 
120
  with col3:
121
- top_locations = df['location'].value_counts().head(10)
122
  fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b'])
123
  st.plotly_chart(fig, use_container_width=True)
124
 
125
  with col4:
126
- job_types = df['job_type'].value_counts()
127
- fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution", color_discrete_sequence=px.colors.qualitative.Pastel)
128
- fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
129
  st.plotly_chart(fig, use_container_width=True)
130
 
131
  @st.cache_data
@@ -134,7 +161,7 @@ def filter_dataframe(df, companies, locations, job_types):
134
  if companies:
135
  filtered_df = filtered_df[filtered_df['company'].isin(companies)]
136
  if locations:
137
- filtered_df = filtered_df[filtered_df['location'].isin(locations)]
138
  if job_types:
139
  filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)]
140
  return filtered_df
@@ -150,7 +177,7 @@ def display_data_explorer(df):
150
  with col1:
151
  companies = st.multiselect("Select Companies", options=unique_values['companies'])
152
  with col2:
153
- locations = st.multiselect("Select Locations", options=unique_values['locations'])
154
  with col3:
155
  job_types = st.multiselect("Select Job Types", options=unique_values['job_types'])
156
 
@@ -177,13 +204,15 @@ def main():
177
  st.error("No data available. Please check your dataset.")
178
  return
179
 
 
 
180
  # Sidebar for navigation
181
  st.sidebar.title("Navigation")
182
  page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
183
 
184
  if page == "Dashboard":
185
  display_dashboard(df)
186
- elif page =="Data Explorer":
187
  display_data_explorer(df)
188
 
189
  if __name__ == "__main__":
 
93
  fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
94
  return fig
95
 
96
+ def parse_locations(df):
97
+ valid_locations = [
98
+ "New York, NY", "San Francisco, CA", "Los Angeles, CA", "Chicago, IL", "Houston, TX",
99
+ "Phoenix, AZ", "Philadelphia, PA", "San Antonio, TX", "San Diego, CA", "Dallas, TX",
100
+ "San Jose, CA", "Austin, TX", "Jacksonville, FL", "Fort Worth, TX", "Columbus, OH",
101
+ "San Francisco Bay Area", "Washington, D.C.", "Boston, MA", "Seattle, WA", "Denver, CO",
102
+ "Nashville, TN", "Baltimore, MD", "Portland, OR", "Las Vegas, NV", "Milwaukee, WI",
103
+ "Albuquerque, NM", "Tucson, AZ", "Fresno, CA", "Sacramento, CA", "Long Beach, CA",
104
+ "Kansas City, MO", "Mesa, AZ", "Atlanta, GA", "Colorado Springs, CO", "Raleigh, NC",
105
+ "Omaha, NE", "Miami, FL", "Oakland, CA", "Minneapolis, MN", "Tulsa, OK",
106
+ "Cleveland, OH", "Wichita, KS", "Arlington, TX", "New Orleans, LA", "Bakersfield, CA",
107
+ "Tampa, FL", "Honolulu, HI", "Aurora, CO", "Anaheim, CA", "Santa Ana, CA",
108
+ "St. Louis, MO", "Riverside, CA", "Corpus Christi, TX", "Lexington, KY", "Pittsburgh, PA",
109
+ "Anchorage, AK", "Stockton, CA", "Cincinnati, OH", "St. Paul, MN", "Toledo, OH",
110
+ "Newark, NJ", "Greensboro, NC", "Plano, TX", "Henderson, NV", "Lincoln, NE",
111
+ "Buffalo, NY", "Fort Wayne, IN", "Jersey City, NJ", "Chula Vista, CA", "Orlando, FL",
112
+ "St. Petersburg, FL", "Norfolk, VA", "Chandler, AZ", "Laredo, TX", "Madison, WI",
113
+ "Durham, NC", "Lubbock, TX", "Winston-Salem, NC", "Garland, TX", "Glendale, AZ",
114
+ "Hialeah, FL", "Reno, NV", "Baton Rouge, LA", "Irvine, CA", "Chesapeake, VA",
115
+ "Irving, TX", "Scottsdale, AZ", "North Las Vegas, NV", "Fremont, CA", "Gilbert, AZ",
116
+ "San Bernardino, CA", "Boise, ID", "Birmingham, AL"
117
+ ]
118
+
119
+ df['parsed_location'] = df['location'].apply(lambda x: next((loc for loc in valid_locations if loc in x), 'Other'))
120
+ return df
121
+
122
  def display_dashboard(df):
123
+ df = parse_locations(df)
124
+
125
  col1, col2 = st.columns(2)
126
 
127
  with col1:
128
  st.subheader("Job Postings Overview")
129
  st.metric("Total Job Postings", len(df))
130
  st.metric("Unique Companies", df['company'].nunique())
131
+ st.metric("Unique Locations", df['parsed_location'].nunique())
132
 
133
  min_date = df['date_posted'].min().date()
134
  max_date = df['date_posted'].max().date()
 
146
  col3, col4 = st.columns(2)
147
 
148
  with col3:
149
+ top_locations = df['parsed_location'].value_counts().head(10)
150
  fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b'])
151
  st.plotly_chart(fig, use_container_width=True)
152
 
153
  with col4:
154
+ top_job_titles = df['title'].value_counts().head(20)
155
+ fig = create_chart(top_job_titles, top_job_titles.index, top_job_titles.values, "Top 20 Job Titles", ['#59a14f'])
 
156
  st.plotly_chart(fig, use_container_width=True)
157
 
158
  @st.cache_data
 
161
  if companies:
162
  filtered_df = filtered_df[filtered_df['company'].isin(companies)]
163
  if locations:
164
+ filtered_df = filtered_df[filtered_df['parsed_location'].isin(locations)]
165
  if job_types:
166
  filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)]
167
  return filtered_df
 
177
  with col1:
178
  companies = st.multiselect("Select Companies", options=unique_values['companies'])
179
  with col2:
180
+ locations = st.multiselect("Select Locations", options=df['parsed_location'].unique())
181
  with col3:
182
  job_types = st.multiselect("Select Job Types", options=unique_values['job_types'])
183
 
 
204
  st.error("No data available. Please check your dataset.")
205
  return
206
 
207
+ df = parse_locations(df)
208
+
209
  # Sidebar for navigation
210
  st.sidebar.title("Navigation")
211
  page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
212
 
213
  if page == "Dashboard":
214
  display_dashboard(df)
215
+ elif page == "Data Explorer":
216
  display_data_explorer(df)
217
 
218
  if __name__ == "__main__":