Spaces:

Niharmahesh
/

job_easz

Running

App Files Files Community

Niharmahesh commited on Oct 26, 2024

Commit

2235792

verified ·

1 Parent(s): c87c688

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -68

app.py CHANGED Viewed

@@ -1,9 +1,32 @@
 import streamlit as st
 import pandas as pd
-from huggingface_hub import HfApi
 import plotly.express as px
 from datetime import datetime, timedelta
 # Hugging Face setup
 HF_TOKEN = st.secrets["HF_TOKEN"]
 HF_USERNAME = st.secrets["HF_USERNAME"]
@@ -21,11 +44,10 @@ def load_and_concat_data():
             file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
             df = pd.read_csv(file_content)
             all_data.append(df)
-        except Exception as e:
-            st.warning(f"Error reading file {file}: {str(e)}")
     if not all_data:
-        st.error("No valid data found in any of the CSV files.")
         return pd.DataFrame()
     concatenated_df = pd.concat(all_data, ignore_index=True)
@@ -37,65 +59,83 @@ def load_and_concat_data():
     filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
     filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
     return filtered_df
-def overview_page(df):
-    st.title("Job Listings Overview")
-    st.metric("Total Job Postings", len(df))
-    st.metric("Unique Companies", df['company'].nunique())
-    st.metric("Unique Locations", df['location'].nunique())
-    # Date range of job postings
-    min_date = df['date_posted'].min().date()
-    max_date = df['date_posted'].max().date()
-    st.write(f"Job postings from {min_date} to {max_date}")
-    # Top companies
-    top_companies = df['company'].value_counts().head(10)
-    fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values, title="Top 10 Companies")
-    st.plotly_chart(fig)
-    # Job postings over time
-    df_by_date = df.groupby('date_posted').size().reset_index(name='count')
-    fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time")
-    st.plotly_chart(fig)
-def location_analysis(df):
-    st.title("Location Analysis")
-    # Top locations
-    top_locations = df['location'].value_counts().head(10)
-    fig = px.bar(top_locations, x=top_locations.index, y=top_locations.values, title="Top 10 Locations")
-    st.plotly_chart(fig)
-    # Remote vs. On-site jobs
-    remote_count = df['is_remote'].sum()
-    onsite_count = len(df) - remote_count
-    fig = px.pie(names=['Remote', 'On-site'], values=[remote_count, onsite_count], title="Remote vs. On-site Jobs")
-    st.plotly_chart(fig)
-def job_type_analysis(df):
-    st.title("Job Type Analysis")
-    # Job types distribution
-    job_types = df['job_type'].value_counts()
-    fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution")
-    st.plotly_chart(fig)
-    # Top job titles
-    top_titles = df['title'].value_counts().head(10)
-    fig = px.bar(top_titles, x=top_titles.index, y=top_titles.values, title="Top 10 Job Titles")
-    st.plotly_chart(fig)
-def data_explorer(df):
-    st.title("Data Explorer")
     # Filters
-    st.sidebar.header("Filters")
-    companies = st.sidebar.multiselect("Select Companies", options=df['company'].unique())
-    locations = st.sidebar.multiselect("Select Locations", options=df['location'].unique())
-    job_types = st.sidebar.multiselect("Select Job Types", options=df['job_type'].unique())
     # Apply filters
     filtered_df = df
@@ -108,26 +148,15 @@ def data_explorer(df):
     # Display filtered data
     st.write(f"Showing {len(filtered_df)} job listings")
-    st.dataframe(filtered_df)
-def main():
-    st.sidebar.title("Navigation")
-    page = st.sidebar.radio("Go to", ["Overview", "Location Analysis", "Job Type Analysis", "Data Explorer"])
-    df = load_and_concat_data()
-    if df.empty:
-        st.error("No data available. Please check your dataset.")
-        return
-    if page == "Overview":
-        overview_page(df)
-    elif page == "Location Analysis":
-        location_analysis(df)
-    elif page == "Job Type Analysis":
-        job_type_analysis(df)
-    elif page == "Data Explorer":
-        data_explorer(df)
 if __name__ == "__main__":
     main()

 import streamlit as st
 import pandas as pd
 import plotly.express as px
+from huggingface_hub import HfApi
+import io
 from datetime import datetime, timedelta
+# Set page config for a wider layout and custom theme
+st.set_page_config(layout="wide", page_title="Job Listings Dashboard")
+# Custom CSS for better color palette and styling
+st.markdown("""
+<style>
+    .stApp {
+        background-color: #f0f2f6;
+    }
+    .stButton>button {
+        background-color: #4e79a7;
+        color: white;
+    }
+    .stSelectbox {
+        color: #4e79a7;
+    }
+    .stDataFrame {
+        background-color: white;
+    }
+</style>
+""", unsafe_allow_html=True)
 # Hugging Face setup
 HF_TOKEN = st.secrets["HF_TOKEN"]
 HF_USERNAME = st.secrets["HF_USERNAME"]
             file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
             df = pd.read_csv(file_content)
             all_data.append(df)
+        except Exception:
+            pass  # Silently skip files that can't be processed
     if not all_data:
         return pd.DataFrame()
     concatenated_df = pd.concat(all_data, ignore_index=True)
     filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
     filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
+    # Drop duplicates
+    filtered_df = filtered_df.drop_duplicates()
     return filtered_df
+def main():
+    st.title("Job Listings Dashboard")
+    df = load_and_concat_data()
+    if df.empty:
+        st.error("No data available. Please check your dataset.")
+        return
+    # Sidebar for navigation
+    st.sidebar.title("Navigation")
+    page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer"])
+    if page == "Dashboard":
+        display_dashboard(df)
+    elif page == "Data Explorer":
+        display_data_explorer(df)
+def display_dashboard(df):
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Job Postings Overview")
+        st.metric("Total Job Postings", len(df))
+        st.metric("Unique Companies", df['company'].nunique())
+        st.metric("Unique Locations", df['location'].nunique())
+        # Date range of job postings
+        min_date = df['date_posted'].min().date()
+        max_date = df['date_posted'].max().date()
+        st.write(f"Job postings from {min_date} to {max_date}")
+    with col2:
+        # Top companies
+        top_companies = df['company'].value_counts().head(10)
+        fig = px.bar(top_companies, x=top_companies.index, y=top_companies.values,
+                     title="Top 10 Companies", color_discrete_sequence=['#4e79a7'])
+        st.plotly_chart(fig, use_container_width=True)
+    # Job postings over time
+    df_by_date = df.groupby('date_posted').size().reset_index(name='count')
+    fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time",
+                  color_discrete_sequence=['#4e79a7'])
+    st.plotly_chart(fig, use_container_width=True)
+    col3, col4 = st.columns(2)
+    with col3:
+        # Top locations
+        top_locations = df['location'].value_counts().head(10)
+        fig = px.bar(top_locations, x=top_locations.index, y=top_locations.values,
+                     title="Top 10 Locations", color_discrete_sequence=['#f28e2b'])
+        st.plotly_chart(fig, use_container_width=True)
+    with col4:
+        # Job types distribution
+        job_types = df['job_type'].value_counts()
+        fig = px.pie(names=job_types.index, values=job_types.values, title="Job Types Distribution",
+                     color_discrete_sequence=px.colors.qualitative.Pastel)
+        st.plotly_chart(fig, use_container_width=True)
+def display_data_explorer(df):
+    st.subheader("Data Explorer")
     # Filters
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        companies = st.multiselect("Select Companies", options=df['company'].unique())
+    with col2:
+        locations = st.multiselect("Select Locations", options=df['location'].unique())
+    with col3:
+        job_types = st.multiselect("Select Job Types", options=df['job_type'].unique())
     # Apply filters
     filtered_df = df
     # Display filtered data
     st.write(f"Showing {len(filtered_df)} job listings")
+    # Convert URLs to clickable links
+    def make_clickable(url):
+        return f'<a href="{url}" target="_blank">Link</a>'
+    filtered_df['job_url'] = filtered_df['job_url'].apply(make_clickable)
+    filtered_df['company_url'] = filtered_df['company_url'].apply(make_clickable)
+    st.write(filtered_df.to_html(escape=False, index=False), unsafe_allow_html=True)
 if __name__ == "__main__":
     main()