import streamlit as st import pandas as pd import plotly.express as px from huggingface_hub import HfApi import io from datetime import datetime, timedelta # Set page config for a wider layout and custom theme st.set_page_config(layout="wide", page_title="Job Listings Dashboard") # Custom CSS for better color palette and styling st.markdown(""" """, unsafe_allow_html=True) # Hugging Face setup HF_TOKEN = st.secrets["HF_TOKEN"] HF_USERNAME = st.secrets["HF_USERNAME"] DATASET_NAME = "jobeasz" @st.cache_data(ttl=3600) # Cache for 1 hour def load_and_concat_data(): api = HfApi() dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset") csv_files = [file for file in dataset_files if file.endswith('.csv')] all_data = [] for file in csv_files: try: file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN) df = pd.read_csv(file_content) all_data.append(df) except Exception: pass # Silently skip files that can't be processed if not all_data: return pd.DataFrame() concatenated_df = pd.concat(all_data, ignore_index=True) columns_to_keep = [ 'site', 'job_url', 'title', 'company', 'location', 'job_type', 'date_posted', 'is_remote', 'description', 'company_url' ] filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True) filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce') # Drop duplicates filtered_df = filtered_df.drop_duplicates() return filtered_df def main(): st.title("Job Listings Dashboard") df = load_and_concat_data() if df.empty: st.error("No data available. Please check your dataset.") return # Sidebar for navigation st.sidebar.title("Navigation") page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"]) if page == "Dashboard": display_dashboard(df) elif page == "Data Explorer": display_data_explorer(df) def display_dashboard(df): col1, col2 = st.columns(2) with col1: st.subheader("Job Postings Overview") st.metric("Total Job Postings", len(df)) st.metric("Unique Companies", df['company'].nunique()) st.metric("Unique Locations", df['location'].nunique()) # Date range of job postings min_date = df['date_posted'].min().date() max_date = df['date_posted'].max().date() st.write(f"Job postings from {min_date} to {max_date}") with col2: # Top companies top_companies = df['company'].value_counts().head(10) fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values, title="Top 10 Companies", color_discrete_sequence=['#4e79a7']) st.plotly_chart(fig, use_container_width=True) # Job postings over time df_by_date = df.groupby('date_posted').size().reset_index(name='count') fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time", color_discrete_sequence=['#4e79a7']) st.plotly_chart(fig, use_container_width=True) col3, col4 = st.columns(2) with col3: # Top locations top_locations = df['location'].value_counts().head(10) fig = px.bar(top_locations, x=top_locations.index, y=top_locations.values, title="Top 10 Locations", color_discrete_sequence=['#f28e2b']) st.plotly_chart(fig, use_container_width=True) with col4: # Job types distribution job_types = df['job_type'].value_counts() fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution", color_discrete_sequence=px.colors.qualitative.Pastel) st.plotly_chart(fig, use_container_width=True) def display_data_explorer(df): st.subheader("Data Explorer") # Filters col1, col2, col3 = st.columns(3) with col1: companies = st.multiselect("Select Companies", options=df['company'].unique()) with col2: locations = st.multiselect("Select Locations", options=df['location'].unique()) with col3: job_types = st.multiselect("Select Job Types", options=df['job_type'].unique()) # Apply filters filtered_df = df if companies: filtered_df = filtered_df[filtered_df['company'].isin(companies)] if locations: filtered_df = filtered_df[filtered_df['location'].isin(locations)] if job_types: filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)] # Display filtered data st.write(f"Showing {len(filtered_df)} job listings") # Convert URLs to clickable links def make_clickable(url): return f'Link' filtered_df['job_url'] = filtered_df['job_url'].apply(make_clickable) filtered_df['company_url'] = filtered_df['company_url'].apply(make_clickable) st.write(filtered_df.to_html(escape=False, index=False), unsafe_allow_html=True) if __name__ == "__main__": main()