Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -93,14 +93,42 @@ def create_time_series(df):
|
|
93 |
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
|
94 |
return fig
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
def display_dashboard(df):
|
|
|
|
|
97 |
col1, col2 = st.columns(2)
|
98 |
|
99 |
with col1:
|
100 |
st.subheader("Job Postings Overview")
|
101 |
st.metric("Total Job Postings", len(df))
|
102 |
st.metric("Unique Companies", df['company'].nunique())
|
103 |
-
st.metric("Unique Locations", df['
|
104 |
|
105 |
min_date = df['date_posted'].min().date()
|
106 |
max_date = df['date_posted'].max().date()
|
@@ -118,14 +146,13 @@ def display_dashboard(df):
|
|
118 |
col3, col4 = st.columns(2)
|
119 |
|
120 |
with col3:
|
121 |
-
top_locations = df['
|
122 |
fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b'])
|
123 |
st.plotly_chart(fig, use_container_width=True)
|
124 |
|
125 |
with col4:
|
126 |
-
|
127 |
-
fig =
|
128 |
-
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
|
129 |
st.plotly_chart(fig, use_container_width=True)
|
130 |
|
131 |
@st.cache_data
|
@@ -134,7 +161,7 @@ def filter_dataframe(df, companies, locations, job_types):
|
|
134 |
if companies:
|
135 |
filtered_df = filtered_df[filtered_df['company'].isin(companies)]
|
136 |
if locations:
|
137 |
-
filtered_df = filtered_df[filtered_df['
|
138 |
if job_types:
|
139 |
filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)]
|
140 |
return filtered_df
|
@@ -150,7 +177,7 @@ def display_data_explorer(df):
|
|
150 |
with col1:
|
151 |
companies = st.multiselect("Select Companies", options=unique_values['companies'])
|
152 |
with col2:
|
153 |
-
locations = st.multiselect("Select Locations", options=
|
154 |
with col3:
|
155 |
job_types = st.multiselect("Select Job Types", options=unique_values['job_types'])
|
156 |
|
@@ -177,13 +204,15 @@ def main():
|
|
177 |
st.error("No data available. Please check your dataset.")
|
178 |
return
|
179 |
|
|
|
|
|
180 |
# Sidebar for navigation
|
181 |
st.sidebar.title("Navigation")
|
182 |
page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
|
183 |
|
184 |
if page == "Dashboard":
|
185 |
display_dashboard(df)
|
186 |
-
elif page =="Data Explorer":
|
187 |
display_data_explorer(df)
|
188 |
|
189 |
if __name__ == "__main__":
|
|
|
93 |
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF')
|
94 |
return fig
|
95 |
|
96 |
+
def parse_locations(df):
|
97 |
+
valid_locations = [
|
98 |
+
"New York, NY", "San Francisco, CA", "Los Angeles, CA", "Chicago, IL", "Houston, TX",
|
99 |
+
"Phoenix, AZ", "Philadelphia, PA", "San Antonio, TX", "San Diego, CA", "Dallas, TX",
|
100 |
+
"San Jose, CA", "Austin, TX", "Jacksonville, FL", "Fort Worth, TX", "Columbus, OH",
|
101 |
+
"San Francisco Bay Area", "Washington, D.C.", "Boston, MA", "Seattle, WA", "Denver, CO",
|
102 |
+
"Nashville, TN", "Baltimore, MD", "Portland, OR", "Las Vegas, NV", "Milwaukee, WI",
|
103 |
+
"Albuquerque, NM", "Tucson, AZ", "Fresno, CA", "Sacramento, CA", "Long Beach, CA",
|
104 |
+
"Kansas City, MO", "Mesa, AZ", "Atlanta, GA", "Colorado Springs, CO", "Raleigh, NC",
|
105 |
+
"Omaha, NE", "Miami, FL", "Oakland, CA", "Minneapolis, MN", "Tulsa, OK",
|
106 |
+
"Cleveland, OH", "Wichita, KS", "Arlington, TX", "New Orleans, LA", "Bakersfield, CA",
|
107 |
+
"Tampa, FL", "Honolulu, HI", "Aurora, CO", "Anaheim, CA", "Santa Ana, CA",
|
108 |
+
"St. Louis, MO", "Riverside, CA", "Corpus Christi, TX", "Lexington, KY", "Pittsburgh, PA",
|
109 |
+
"Anchorage, AK", "Stockton, CA", "Cincinnati, OH", "St. Paul, MN", "Toledo, OH",
|
110 |
+
"Newark, NJ", "Greensboro, NC", "Plano, TX", "Henderson, NV", "Lincoln, NE",
|
111 |
+
"Buffalo, NY", "Fort Wayne, IN", "Jersey City, NJ", "Chula Vista, CA", "Orlando, FL",
|
112 |
+
"St. Petersburg, FL", "Norfolk, VA", "Chandler, AZ", "Laredo, TX", "Madison, WI",
|
113 |
+
"Durham, NC", "Lubbock, TX", "Winston-Salem, NC", "Garland, TX", "Glendale, AZ",
|
114 |
+
"Hialeah, FL", "Reno, NV", "Baton Rouge, LA", "Irvine, CA", "Chesapeake, VA",
|
115 |
+
"Irving, TX", "Scottsdale, AZ", "North Las Vegas, NV", "Fremont, CA", "Gilbert, AZ",
|
116 |
+
"San Bernardino, CA", "Boise, ID", "Birmingham, AL"
|
117 |
+
]
|
118 |
+
|
119 |
+
df['parsed_location'] = df['location'].apply(lambda x: next((loc for loc in valid_locations if loc in x), 'Other'))
|
120 |
+
return df
|
121 |
+
|
122 |
def display_dashboard(df):
|
123 |
+
df = parse_locations(df)
|
124 |
+
|
125 |
col1, col2 = st.columns(2)
|
126 |
|
127 |
with col1:
|
128 |
st.subheader("Job Postings Overview")
|
129 |
st.metric("Total Job Postings", len(df))
|
130 |
st.metric("Unique Companies", df['company'].nunique())
|
131 |
+
st.metric("Unique Locations", df['parsed_location'].nunique())
|
132 |
|
133 |
min_date = df['date_posted'].min().date()
|
134 |
max_date = df['date_posted'].max().date()
|
|
|
146 |
col3, col4 = st.columns(2)
|
147 |
|
148 |
with col3:
|
149 |
+
top_locations = df['parsed_location'].value_counts().head(10)
|
150 |
fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b'])
|
151 |
st.plotly_chart(fig, use_container_width=True)
|
152 |
|
153 |
with col4:
|
154 |
+
top_job_titles = df['title'].value_counts().head(20)
|
155 |
+
fig = create_chart(top_job_titles, top_job_titles.index, top_job_titles.values, "Top 20 Job Titles", ['#59a14f'])
|
|
|
156 |
st.plotly_chart(fig, use_container_width=True)
|
157 |
|
158 |
@st.cache_data
|
|
|
161 |
if companies:
|
162 |
filtered_df = filtered_df[filtered_df['company'].isin(companies)]
|
163 |
if locations:
|
164 |
+
filtered_df = filtered_df[filtered_df['parsed_location'].isin(locations)]
|
165 |
if job_types:
|
166 |
filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)]
|
167 |
return filtered_df
|
|
|
177 |
with col1:
|
178 |
companies = st.multiselect("Select Companies", options=unique_values['companies'])
|
179 |
with col2:
|
180 |
+
locations = st.multiselect("Select Locations", options=df['parsed_location'].unique())
|
181 |
with col3:
|
182 |
job_types = st.multiselect("Select Job Types", options=unique_values['job_types'])
|
183 |
|
|
|
204 |
st.error("No data available. Please check your dataset.")
|
205 |
return
|
206 |
|
207 |
+
df = parse_locations(df)
|
208 |
+
|
209 |
# Sidebar for navigation
|
210 |
st.sidebar.title("Navigation")
|
211 |
page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
|
212 |
|
213 |
if page == "Dashboard":
|
214 |
display_dashboard(df)
|
215 |
+
elif page == "Data Explorer":
|
216 |
display_data_explorer(df)
|
217 |
|
218 |
if __name__ == "__main__":
|