Spaces:

Niharmahesh
/

job_easz

Running

App Files Files Community

Niharmahesh commited on Oct 26, 2024

Commit

f2446d9

verified ·

1 Parent(s): bca1e9f

Create app.py

Browse files

Files changed (1) hide show

app.py +106 -0

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from huggingface_hub import HfApi
+import io
+from datetime import datetime, timedelta
+# Hugging Face setup
+HF_TOKEN = st.secrets["HF_TOKEN"]
+HF_USERNAME = st.secrets["HF_USERNAME"]
+DATASET_NAME = "jobeasz"
+@st.cache_data(ttl=3600)  # Cache for 1 hour
+def load_and_concat_data():
+    api = HfApi()
+    dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset")
+    csv_files = [file for file in dataset_files if file.endswith('.csv')]
+    all_data = []
+    for file in csv_files:
+        file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
+        df = pd.read_csv(file_content)
+        all_data.append(df)
+    concatenated_df = pd.concat(all_data, ignore_index=True)
+    concatenated_df = concatenated_df.drop_duplicates()
+    # Ensure 'date_posted' is in datetime format
+    concatenated_df['date_posted'] = pd.to_datetime(concatenated_df['date_posted'])
+    # Filter columns
+    columns_to_keep = [
+        'site', 'job_url', 'title', 'company', 'location',
+        'job_type', 'date_posted', 'is_remote', 'description', 'company_url'
+    ]
+    filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
+    return filtered_df
+def filter_data(df, start_date, end_date, selected_locations, selected_roles):
+    filtered_df = df[
+        (df['date_posted'].dt.date >= start_date) &
+        (df['date_posted'].dt.date <= end_date)
+    ]
+    if selected_locations:
+        filtered_df = filtered_df[filtered_df['location'].isin(selected_locations)]
+    if selected_roles:
+        filtered_df = filtered_df[filtered_df['title'].isin(selected_roles)]
+    return filtered_df
+def dashboard():
+    st.title("Job Listings Dashboard")
+    df = load_and_concat_data()
+    st.sidebar.header("Filters")
+    start_date = st.sidebar.date_input("Start Date", df['date_posted'].min().date())
+    end_date = st.sidebar.date_input("End Date", df['date_posted'].max().date())
+    locations = st.sidebar.multiselect("Locations", options=df['location'].unique())
+    roles = st.sidebar.multiselect("Job Roles", options=df['title'].unique())
+    filtered_df = filter_data(df, start_date, end_date, locations, roles)
+    st.metric("Total Job Postings", len(filtered_df))
+    daily_postings = filtered_df.groupby(filtered_df['date_posted'].dt.date).size().reset_index(name='count')
+    fig_time_series = px.line(daily_postings, x='date_posted', y='count', title='Daily Job Postings')
+    st.plotly_chart(fig_time_series)
+    location_counts = filtered_df['location'].value_counts().head(10)
+    fig_location = px.bar(location_counts, x=location_counts.index, y=location_counts.values, title='Top 10 Locations')
+    st.plotly_chart(fig_location)
+    role_counts = filtered_df['title'].value_counts().head(10)
+    fig_role = px.bar(role_counts, x=role_counts.index, y=role_counts.values, title='Top 10 Job Roles')
+    st.plotly_chart(fig_role)
+    st.subheader("Recent Job Postings")
+    recent_postings = filtered_df.sort_values('date_posted', ascending=False).head(5)
+    for _, job in recent_postings.iterrows():
+        st.write(f"**{job['title']}** - {job['company']} - {job['location']} - {job['date_posted'].date()}")
+def data_table():
+    st.title("Full Job Listings Data")
+    df = load_and_concat_data()
+    st.dataframe(
+        df.style.format({
+            'job_url': lambda x: f'<a href="{x}" target="_blank">Link</a>',
+            'company_url': lambda x: f'<a href="{x}" target="_blank">Link</a>' if pd.notnull(x) else ''
+        }),
+        unsafe_allow_html=True
+    )
+def main():
+    st.sidebar.title("Navigation")
+    page = st.sidebar.radio("Go to", ["Dashboard", "Data Table"])
+    if page == "Dashboard":
+        dashboard()
+    elif page == "Data Table":
+        data_table()
+if __name__ == "__main__":
+    main()