Niharmahesh commited on
Commit
2235792
·
verified ·
1 Parent(s): c87c688

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -68
app.py CHANGED
@@ -1,9 +1,32 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from huggingface_hub import HfApi
4
  import plotly.express as px
 
 
5
  from datetime import datetime, timedelta
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # Hugging Face setup
8
  HF_TOKEN = st.secrets["HF_TOKEN"]
9
  HF_USERNAME = st.secrets["HF_USERNAME"]
@@ -21,11 +44,10 @@ def load_and_concat_data():
21
  file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
22
  df = pd.read_csv(file_content)
23
  all_data.append(df)
24
- except Exception as e:
25
- st.warning(f"Error reading file {file}: {str(e)}")
26
 
27
  if not all_data:
28
- st.error("No valid data found in any of the CSV files.")
29
  return pd.DataFrame()
30
 
31
  concatenated_df = pd.concat(all_data, ignore_index=True)
@@ -37,65 +59,83 @@ def load_and_concat_data():
37
  filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
38
  filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
39
 
 
 
 
40
  return filtered_df
41
 
42
- def overview_page(df):
43
- st.title("Job Listings Overview")
44
-
45
- st.metric("Total Job Postings", len(df))
46
- st.metric("Unique Companies", df['company'].nunique())
47
- st.metric("Unique Locations", df['location'].nunique())
48
-
49
- # Date range of job postings
50
- min_date = df['date_posted'].min().date()
51
- max_date = df['date_posted'].max().date()
52
- st.write(f"Job postings from {min_date} to {max_date}")
53
 
54
- # Top companies
55
- top_companies = df['company'].value_counts().head(10)
56
- fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values, title="Top 10 Companies")
57
- st.plotly_chart(fig)
58
 
59
- # Job postings over time
60
- df_by_date = df.groupby('date_posted').size().reset_index(name='count')
61
- fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time")
62
- st.plotly_chart(fig)
63
 
64
- def location_analysis(df):
65
- st.title("Location Analysis")
 
66
 
67
- # Top locations
68
- top_locations = df['location'].value_counts().head(10)
69
- fig = px.bar(top_locations, x=top_locations.index, y=top_locations.values, title="Top 10 Locations")
70
- st.plotly_chart(fig)
71
 
72
- # Remote vs. On-site jobs
73
- remote_count = df['is_remote'].sum()
74
- onsite_count = len(df) - remote_count
75
- fig = px.pie(names=['Remote', 'On-site'], values=[remote_count, onsite_count], title="Remote vs. On-site Jobs")
76
- st.plotly_chart(fig)
77
 
78
- def job_type_analysis(df):
79
- st.title("Job Type Analysis")
 
 
 
80
 
81
- # Job types distribution
82
- job_types = df['job_type'].value_counts()
83
- fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution")
84
- st.plotly_chart(fig)
85
 
86
- # Top job titles
87
- top_titles = df['title'].value_counts().head(10)
88
- fig = px.bar(top_titles, x=top_titles.index, y=top_titles.values, title="Top 10 Job Titles")
89
- st.plotly_chart(fig)
 
 
90
 
91
- def data_explorer(df):
92
- st.title("Data Explorer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  # Filters
95
- st.sidebar.header("Filters")
96
- companies = st.sidebar.multiselect("Select Companies", options=df['company'].unique())
97
- locations = st.sidebar.multiselect("Select Locations", options=df['location'].unique())
98
- job_types = st.sidebar.multiselect("Select Job Types", options=df['job_type'].unique())
 
 
 
99
 
100
  # Apply filters
101
  filtered_df = df
@@ -108,26 +148,15 @@ def data_explorer(df):
108
 
109
  # Display filtered data
110
  st.write(f"Showing {len(filtered_df)} job listings")
111
- st.dataframe(filtered_df)
112
-
113
- def main():
114
- st.sidebar.title("Navigation")
115
- page = st.sidebar.radio("Go to", ["Overview", "Location Analysis", "Job Type Analysis", "Data Explorer"])
116
 
117
- df = load_and_concat_data()
 
118
 
119
- if df.empty:
120
- st.error("No data available. Please check your dataset.")
121
- return
122
-
123
- if page == "Overview":
124
- overview_page(df)
125
- elif page == "Location Analysis":
126
- location_analysis(df)
127
- elif page == "Job Type Analysis":
128
- job_type_analysis(df)
129
- elif page == "Data Explorer":
130
- data_explorer(df)
131
 
132
  if __name__ == "__main__":
133
  main()
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import plotly.express as px
4
+ from huggingface_hub import HfApi
5
+ import io
6
  from datetime import datetime, timedelta
7
 
8
+ # Set page config for a wider layout and custom theme
9
+ st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
10
+
11
+ # Custom CSS for better color palette and styling
12
+ st.markdown("""
13
+ <style>
14
+ .stApp {
15
+ background-color: #f0f2f6;
16
+ }
17
+ .stButton>button {
18
+ background-color: #4e79a7;
19
+ color: white;
20
+ }
21
+ .stSelectbox {
22
+ color: #4e79a7;
23
+ }
24
+ .stDataFrame {
25
+ background-color: white;
26
+ }
27
+ </style>
28
+ """, unsafe_allow_html=True)
29
+
30
  # Hugging Face setup
31
  HF_TOKEN = st.secrets["HF_TOKEN"]
32
  HF_USERNAME = st.secrets["HF_USERNAME"]
 
44
  file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
45
  df = pd.read_csv(file_content)
46
  all_data.append(df)
47
+ except Exception:
48
+ pass # Silently skip files that can't be processed
49
 
50
  if not all_data:
 
51
  return pd.DataFrame()
52
 
53
  concatenated_df = pd.concat(all_data, ignore_index=True)
 
59
  filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
60
  filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
61
 
62
+ # Drop duplicates
63
+ filtered_df = filtered_df.drop_duplicates()
64
+
65
  return filtered_df
66
 
67
+ def main():
68
+ st.title("Job Listings Dashboard")
 
 
 
 
 
 
 
 
 
69
 
70
+ df = load_and_concat_data()
 
 
 
71
 
72
+ if df.empty:
73
+ st.error("No data available. Please check your dataset.")
74
+ return
 
75
 
76
+ # Sidebar for navigation
77
+ st.sidebar.title("Navigation")
78
+ page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
79
 
80
+ if page == "Dashboard":
81
+ display_dashboard(df)
82
+ elif page == "Data Explorer":
83
+ display_data_explorer(df)
84
 
85
+ def display_dashboard(df):
86
+ col1, col2 = st.columns(2)
 
 
 
87
 
88
+ with col1:
89
+ st.subheader("Job Postings Overview")
90
+ st.metric("Total Job Postings", len(df))
91
+ st.metric("Unique Companies", df['company'].nunique())
92
+ st.metric("Unique Locations", df['location'].nunique())
93
 
94
+ # Date range of job postings
95
+ min_date = df['date_posted'].min().date()
96
+ max_date = df['date_posted'].max().date()
97
+ st.write(f"Job postings from {min_date} to {max_date}")
98
 
99
+ with col2:
100
+ # Top companies
101
+ top_companies = df['company'].value_counts().head(10)
102
+ fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values,
103
+ title="Top 10 Companies", color_discrete_sequence=['#4e79a7'])
104
+ st.plotly_chart(fig, use_container_width=True)
105
 
106
+ # Job postings over time
107
+ df_by_date = df.groupby('date_posted').size().reset_index(name='count')
108
+ fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time",
109
+ color_discrete_sequence=['#4e79a7'])
110
+ st.plotly_chart(fig, use_container_width=True)
111
+
112
+ col3, col4 = st.columns(2)
113
+
114
+ with col3:
115
+ # Top locations
116
+ top_locations = df['location'].value_counts().head(10)
117
+ fig = px.bar(top_locations, x=top_locations.index, y=top_locations.values,
118
+ title="Top 10 Locations", color_discrete_sequence=['#f28e2b'])
119
+ st.plotly_chart(fig, use_container_width=True)
120
+
121
+ with col4:
122
+ # Job types distribution
123
+ job_types = df['job_type'].value_counts()
124
+ fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution",
125
+ color_discrete_sequence=px.colors.qualitative.Pastel)
126
+ st.plotly_chart(fig, use_container_width=True)
127
+
128
+ def display_data_explorer(df):
129
+ st.subheader("Data Explorer")
130
 
131
  # Filters
132
+ col1, col2, col3 = st.columns(3)
133
+ with col1:
134
+ companies = st.multiselect("Select Companies", options=df['company'].unique())
135
+ with col2:
136
+ locations = st.multiselect("Select Locations", options=df['location'].unique())
137
+ with col3:
138
+ job_types = st.multiselect("Select Job Types", options=df['job_type'].unique())
139
 
140
  # Apply filters
141
  filtered_df = df
 
148
 
149
  # Display filtered data
150
  st.write(f"Showing {len(filtered_df)} job listings")
151
+
152
+ # Convert URLs to clickable links
153
+ def make_clickable(url):
154
+ return f'<a href="{url}" target="_blank">Link</a>'
 
155
 
156
+ filtered_df['job_url'] = filtered_df['job_url'].apply(make_clickable)
157
+ filtered_df['company_url'] = filtered_df['company_url'].apply(make_clickable)
158
 
159
+ st.write(filtered_df.to_html(escape=False, index=False), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  if __name__ == "__main__":
162
  main()