Niharmahesh commited on
Commit
f2446d9
·
verified ·
1 Parent(s): bca1e9f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ from huggingface_hub import HfApi
5
+ import io
6
+ from datetime import datetime, timedelta
7
+
8
+ # Hugging Face setup
9
+ HF_TOKEN = st.secrets["HF_TOKEN"]
10
+ HF_USERNAME = st.secrets["HF_USERNAME"]
11
+ DATASET_NAME = "jobeasz"
12
+
13
+ @st.cache_data(ttl=3600) # Cache for 1 hour
14
+ def load_and_concat_data():
15
+ api = HfApi()
16
+ dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset")
17
+ csv_files = [file for file in dataset_files if file.endswith('.csv')]
18
+
19
+ all_data = []
20
+ for file in csv_files:
21
+ file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
22
+ df = pd.read_csv(file_content)
23
+ all_data.append(df)
24
+
25
+ concatenated_df = pd.concat(all_data, ignore_index=True)
26
+ concatenated_df = concatenated_df.drop_duplicates()
27
+
28
+ # Ensure 'date_posted' is in datetime format
29
+ concatenated_df['date_posted'] = pd.to_datetime(concatenated_df['date_posted'])
30
+
31
+ # Filter columns
32
+ columns_to_keep = [
33
+ 'site', 'job_url', 'title', 'company', 'location',
34
+ 'job_type', 'date_posted', 'is_remote', 'description', 'company_url'
35
+ ]
36
+ filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
37
+
38
+ return filtered_df
39
+
40
+ def filter_data(df, start_date, end_date, selected_locations, selected_roles):
41
+ filtered_df = df[
42
+ (df['date_posted'].dt.date >= start_date) &
43
+ (df['date_posted'].dt.date <= end_date)
44
+ ]
45
+ if selected_locations:
46
+ filtered_df = filtered_df[filtered_df['location'].isin(selected_locations)]
47
+ if selected_roles:
48
+ filtered_df = filtered_df[filtered_df['title'].isin(selected_roles)]
49
+ return filtered_df
50
+
51
+ def dashboard():
52
+ st.title("Job Listings Dashboard")
53
+
54
+ df = load_and_concat_data()
55
+
56
+ st.sidebar.header("Filters")
57
+ start_date = st.sidebar.date_input("Start Date", df['date_posted'].min().date())
58
+ end_date = st.sidebar.date_input("End Date", df['date_posted'].max().date())
59
+ locations = st.sidebar.multiselect("Locations", options=df['location'].unique())
60
+ roles = st.sidebar.multiselect("Job Roles", options=df['title'].unique())
61
+
62
+ filtered_df = filter_data(df, start_date, end_date, locations, roles)
63
+
64
+ st.metric("Total Job Postings", len(filtered_df))
65
+
66
+ daily_postings = filtered_df.groupby(filtered_df['date_posted'].dt.date).size().reset_index(name='count')
67
+ fig_time_series = px.line(daily_postings, x='date_posted', y='count', title='Daily Job Postings')
68
+ st.plotly_chart(fig_time_series)
69
+
70
+ location_counts = filtered_df['location'].value_counts().head(10)
71
+ fig_location = px.bar(location_counts, x=location_counts.index, y=location_counts.values, title='Top 10 Locations')
72
+ st.plotly_chart(fig_location)
73
+
74
+ role_counts = filtered_df['title'].value_counts().head(10)
75
+ fig_role = px.bar(role_counts, x=role_counts.index, y=role_counts.values, title='Top 10 Job Roles')
76
+ st.plotly_chart(fig_role)
77
+
78
+ st.subheader("Recent Job Postings")
79
+ recent_postings = filtered_df.sort_values('date_posted', ascending=False).head(5)
80
+ for _, job in recent_postings.iterrows():
81
+ st.write(f"**{job['title']}** - {job['company']} - {job['location']} - {job['date_posted'].date()}")
82
+
83
+ def data_table():
84
+ st.title("Full Job Listings Data")
85
+
86
+ df = load_and_concat_data()
87
+
88
+ st.dataframe(
89
+ df.style.format({
90
+ 'job_url': lambda x: f'<a href="{x}" target="_blank">Link</a>',
91
+ 'company_url': lambda x: f'<a href="{x}" target="_blank">Link</a>' if pd.notnull(x) else ''
92
+ }),
93
+ unsafe_allow_html=True
94
+ )
95
+
96
+ def main():
97
+ st.sidebar.title("Navigation")
98
+ page = st.sidebar.radio("Go to", ["Dashboard", "Data Table"])
99
+
100
+ if page == "Dashboard":
101
+ dashboard()
102
+ elif page == "Data Table":
103
+ data_table()
104
+
105
+ if __name__ == "__main__":
106
+ main()