Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,32 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
from huggingface_hub import HfApi
|
4 |
import plotly.express as px
|
|
|
|
|
5 |
from datetime import datetime, timedelta
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# Hugging Face setup
|
8 |
HF_TOKEN = st.secrets["HF_TOKEN"]
|
9 |
HF_USERNAME = st.secrets["HF_USERNAME"]
|
@@ -21,11 +44,10 @@ def load_and_concat_data():
|
|
21 |
file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
|
22 |
df = pd.read_csv(file_content)
|
23 |
all_data.append(df)
|
24 |
-
except Exception
|
25 |
-
|
26 |
|
27 |
if not all_data:
|
28 |
-
st.error("No valid data found in any of the CSV files.")
|
29 |
return pd.DataFrame()
|
30 |
|
31 |
concatenated_df = pd.concat(all_data, ignore_index=True)
|
@@ -37,65 +59,83 @@ def load_and_concat_data():
|
|
37 |
filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
|
38 |
filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
|
39 |
|
|
|
|
|
|
|
40 |
return filtered_df
|
41 |
|
42 |
-
def
|
43 |
-
st.title("Job Listings
|
44 |
-
|
45 |
-
st.metric("Total Job Postings", len(df))
|
46 |
-
st.metric("Unique Companies", df['company'].nunique())
|
47 |
-
st.metric("Unique Locations", df['location'].nunique())
|
48 |
-
|
49 |
-
# Date range of job postings
|
50 |
-
min_date = df['date_posted'].min().date()
|
51 |
-
max_date = df['date_posted'].max().date()
|
52 |
-
st.write(f"Job postings from {min_date} to {max_date}")
|
53 |
|
54 |
-
|
55 |
-
top_companies = df['company'].value_counts().head(10)
|
56 |
-
fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values, title="Top 10 Companies")
|
57 |
-
st.plotly_chart(fig)
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
st.plotly_chart(fig)
|
63 |
|
64 |
-
|
65 |
-
st.title("
|
|
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
onsite_count = len(df) - remote_count
|
75 |
-
fig = px.pie(names=['Remote', 'On-site'], values=[remote_count, onsite_count], title="Remote vs. On-site Jobs")
|
76 |
-
st.plotly_chart(fig)
|
77 |
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
90 |
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
# Filters
|
95 |
-
st.
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
99 |
|
100 |
# Apply filters
|
101 |
filtered_df = df
|
@@ -108,26 +148,15 @@ def data_explorer(df):
|
|
108 |
|
109 |
# Display filtered data
|
110 |
st.write(f"Showing {len(filtered_df)} job listings")
|
111 |
-
|
112 |
-
|
113 |
-
def
|
114 |
-
|
115 |
-
page = st.sidebar.radio("Go to", ["Overview", "Location Analysis", "Job Type Analysis", "Data Explorer"])
|
116 |
|
117 |
-
|
|
|
118 |
|
119 |
-
|
120 |
-
st.error("No data available. Please check your dataset.")
|
121 |
-
return
|
122 |
-
|
123 |
-
if page == "Overview":
|
124 |
-
overview_page(df)
|
125 |
-
elif page == "Location Analysis":
|
126 |
-
location_analysis(df)
|
127 |
-
elif page == "Job Type Analysis":
|
128 |
-
job_type_analysis(df)
|
129 |
-
elif page == "Data Explorer":
|
130 |
-
data_explorer(df)
|
131 |
|
132 |
if __name__ == "__main__":
|
133 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
import plotly.express as px
|
4 |
+
from huggingface_hub import HfApi
|
5 |
+
import io
|
6 |
from datetime import datetime, timedelta
|
7 |
|
8 |
+
# Set page config for a wider layout and custom theme
|
9 |
+
st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
|
10 |
+
|
11 |
+
# Custom CSS for better color palette and styling
|
12 |
+
st.markdown("""
|
13 |
+
<style>
|
14 |
+
.stApp {
|
15 |
+
background-color: #f0f2f6;
|
16 |
+
}
|
17 |
+
.stButton>button {
|
18 |
+
background-color: #4e79a7;
|
19 |
+
color: white;
|
20 |
+
}
|
21 |
+
.stSelectbox {
|
22 |
+
color: #4e79a7;
|
23 |
+
}
|
24 |
+
.stDataFrame {
|
25 |
+
background-color: white;
|
26 |
+
}
|
27 |
+
</style>
|
28 |
+
""", unsafe_allow_html=True)
|
29 |
+
|
30 |
# Hugging Face setup
|
31 |
HF_TOKEN = st.secrets["HF_TOKEN"]
|
32 |
HF_USERNAME = st.secrets["HF_USERNAME"]
|
|
|
44 |
file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
|
45 |
df = pd.read_csv(file_content)
|
46 |
all_data.append(df)
|
47 |
+
except Exception:
|
48 |
+
pass # Silently skip files that can't be processed
|
49 |
|
50 |
if not all_data:
|
|
|
51 |
return pd.DataFrame()
|
52 |
|
53 |
concatenated_df = pd.concat(all_data, ignore_index=True)
|
|
|
59 |
filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
|
60 |
filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
|
61 |
|
62 |
+
# Drop duplicates
|
63 |
+
filtered_df = filtered_df.drop_duplicates()
|
64 |
+
|
65 |
return filtered_df
|
66 |
|
67 |
+
def main():
|
68 |
+
st.title("Job Listings Dashboard")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
+
df = load_and_concat_data()
|
|
|
|
|
|
|
71 |
|
72 |
+
if df.empty:
|
73 |
+
st.error("No data available. Please check your dataset.")
|
74 |
+
return
|
|
|
75 |
|
76 |
+
# Sidebar for navigation
|
77 |
+
st.sidebar.title("Navigation")
|
78 |
+
page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
|
79 |
|
80 |
+
if page == "Dashboard":
|
81 |
+
display_dashboard(df)
|
82 |
+
elif page == "Data Explorer":
|
83 |
+
display_data_explorer(df)
|
84 |
|
85 |
+
def display_dashboard(df):
|
86 |
+
col1, col2 = st.columns(2)
|
|
|
|
|
|
|
87 |
|
88 |
+
with col1:
|
89 |
+
st.subheader("Job Postings Overview")
|
90 |
+
st.metric("Total Job Postings", len(df))
|
91 |
+
st.metric("Unique Companies", df['company'].nunique())
|
92 |
+
st.metric("Unique Locations", df['location'].nunique())
|
93 |
|
94 |
+
# Date range of job postings
|
95 |
+
min_date = df['date_posted'].min().date()
|
96 |
+
max_date = df['date_posted'].max().date()
|
97 |
+
st.write(f"Job postings from {min_date} to {max_date}")
|
98 |
|
99 |
+
with col2:
|
100 |
+
# Top companies
|
101 |
+
top_companies = df['company'].value_counts().head(10)
|
102 |
+
fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values,
|
103 |
+
title="Top 10 Companies", color_discrete_sequence=['#4e79a7'])
|
104 |
+
st.plotly_chart(fig, use_container_width=True)
|
105 |
|
106 |
+
# Job postings over time
|
107 |
+
df_by_date = df.groupby('date_posted').size().reset_index(name='count')
|
108 |
+
fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time",
|
109 |
+
color_discrete_sequence=['#4e79a7'])
|
110 |
+
st.plotly_chart(fig, use_container_width=True)
|
111 |
+
|
112 |
+
col3, col4 = st.columns(2)
|
113 |
+
|
114 |
+
with col3:
|
115 |
+
# Top locations
|
116 |
+
top_locations = df['location'].value_counts().head(10)
|
117 |
+
fig = px.bar(top_locations, x=top_locations.index, y=top_locations.values,
|
118 |
+
title="Top 10 Locations", color_discrete_sequence=['#f28e2b'])
|
119 |
+
st.plotly_chart(fig, use_container_width=True)
|
120 |
+
|
121 |
+
with col4:
|
122 |
+
# Job types distribution
|
123 |
+
job_types = df['job_type'].value_counts()
|
124 |
+
fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution",
|
125 |
+
color_discrete_sequence=px.colors.qualitative.Pastel)
|
126 |
+
st.plotly_chart(fig, use_container_width=True)
|
127 |
+
|
128 |
+
def display_data_explorer(df):
|
129 |
+
st.subheader("Data Explorer")
|
130 |
|
131 |
# Filters
|
132 |
+
col1, col2, col3 = st.columns(3)
|
133 |
+
with col1:
|
134 |
+
companies = st.multiselect("Select Companies", options=df['company'].unique())
|
135 |
+
with col2:
|
136 |
+
locations = st.multiselect("Select Locations", options=df['location'].unique())
|
137 |
+
with col3:
|
138 |
+
job_types = st.multiselect("Select Job Types", options=df['job_type'].unique())
|
139 |
|
140 |
# Apply filters
|
141 |
filtered_df = df
|
|
|
148 |
|
149 |
# Display filtered data
|
150 |
st.write(f"Showing {len(filtered_df)} job listings")
|
151 |
+
|
152 |
+
# Convert URLs to clickable links
|
153 |
+
def make_clickable(url):
|
154 |
+
return f'<a href="{url}" target="_blank">Link</a>'
|
|
|
155 |
|
156 |
+
filtered_df['job_url'] = filtered_df['job_url'].apply(make_clickable)
|
157 |
+
filtered_df['company_url'] = filtered_df['company_url'].apply(make_clickable)
|
158 |
|
159 |
+
st.write(filtered_df.to_html(escape=False, index=False), unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
if __name__ == "__main__":
|
162 |
main()
|