Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from streamlit.components.v1 import html | |
import nbformat | |
from nbconvert import HTMLExporter | |
from wordcloud import WordCloud | |
# Load the CSV data | |
file_path = 'category upwork jobs (1).csv' | |
jobs_df = pd.read_csv(file_path) | |
# Adjust column names as per the CSV | |
category_column = 'category' # Replace with the actual column name for category | |
job_title_column = 'title' # Replace with the actual column name for job title | |
description_column = 'description' | |
key_column = 'key' | |
date_column = 'Date' | |
# Sidebar menu | |
st.sidebar.title("Navigation") | |
option = st.sidebar.radio("Go to", ["Home", "Plots", "Notebook","Download Datasets"]) | |
# Home Page: Display data with category filter | |
if option == "Home": | |
st.title("Jobs Dashboard") | |
# Filter Jobs by Category | |
st.sidebar.header("Filter Jobs by Category") | |
categories = jobs_df[category_column].unique() # Extract unique categories | |
selected_category = st.sidebar.selectbox("Choose a category:", categories) | |
# Filter jobs based on the selected category | |
filtered_jobs = jobs_df[jobs_df[category_column] == selected_category] | |
# Display filtered jobs with additional columns | |
st.write(f"Showing jobs in category: **{selected_category}**") | |
st.dataframe(filtered_jobs[['title','key','description','date','extracted_budget']]) | |
# Optional: Show a count of jobs in the selected category | |
st.write(f"Total jobs in this category: {len(filtered_jobs)}") | |
# Plots Page: Display category distribution plot | |
elif option == "Plots": | |
st.title("Job Visualization") | |
# 1. Job Category Distribution Bar Plot | |
st.subheader("Job Category Distribution") | |
category_counts = jobs_df[category_column].value_counts() | |
# Create a color palette for the bars | |
colors = plt.cm.Paired.colors # You can choose any colormap you prefer | |
# Create the bar plot | |
fig1, ax1 = plt.subplots(figsize=(10, 6)) # Set figure size | |
bars = ax1.bar(range(len(category_counts)), category_counts.values, color=colors[:len(category_counts)]) | |
ax1.set_xlabel("Job Category") | |
ax1.set_ylabel("Number of Jobs") | |
ax1.set_title("Distribution of Jobs Across Categories") | |
plt.xticks(rotation=45, ha="right") | |
# Create a legend with job categories and their counts | |
legend_labels = [f"{category}: {int(count)}" for category, count in zip(category_counts.index, category_counts.values)] | |
ax1.legend(bars, legend_labels, title="Job Categories", loc="upper left", bbox_to_anchor=(1, 1)) # Position the legend | |
# No text labels on the bars and no labels above the bars | |
# Adjust layout to give space for the legend | |
plt.subplots_adjust(right=0.75) # Adjust right margin for legend space | |
st.pyplot(fig1) | |
# 2. Pie Chart for Category Distribution | |
st.subheader("Job Category Proportions") | |
fig2, ax2 = plt.subplots(figsize=(10, 10)) # Adjust the size as needed | |
wedges, texts, autotexts = ax2.pie( | |
category_counts, | |
autopct='%1.1f%%', | |
startangle=140, | |
colors=plt.cm.Paired.colors # Optional: Change colors for better aesthetics | |
) | |
ax2.axis('equal') # Equal aspect ratio ensures the pie chart is circular. | |
# Customize the text labels | |
for text in texts: | |
text.set_fontsize(10) # Adjust font size for labels | |
for autotext in autotexts: | |
autotext.set_color('white') # Change the color of the percentage text | |
autotext.set_fontsize(10) # Adjust font size for percentage | |
# Add a legend to the right of the pie chart | |
ax2.legend(wedges, category_counts.index, title="Job Categories", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) | |
st.pyplot(fig2) | |
# 4. Top Job Titles Bar Plot | |
st.subheader("Top Job Titles") | |
top_job_titles = jobs_df[job_title_column].value_counts().head(10) | |
# Create the bar plot | |
fig4, ax4 = plt.subplots(figsize=(10, 6)) # Adjust figure size for better readability | |
bars = ax4.bar(top_job_titles.index, top_job_titles.values, color='lightcoral') | |
ax4.set_xlabel("Job Title") | |
ax4.set_ylabel("Count") | |
ax4.set_title("Top 10 Job Titles") | |
plt.xticks(rotation=45, ha="right") | |
# Add labels on the right side of the bars | |
for bar in bars: | |
yval = bar.get_height() | |
ax4.text(bar.get_x() + bar.get_width() / 2, yval, int(yval),ha='center', va='bottom', color='black') # Centered above the bar | |
# Adjust layout to give space for labels | |
plt.subplots_adjust(right=0.85) # Adjust right margin for space | |
st.pyplot(fig4) | |
# 5. Word Cloud for Job Descriptions | |
st.subheader("Word Cloud for Job Descriptions") | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(jobs_df[description_column].dropna())) | |
fig5, ax5 = plt.subplots(figsize=(10, 5)) | |
ax5.imshow(wordcloud, interpolation='bilinear') | |
ax5.axis('off') # Turn off the axis | |
st.pyplot(fig5) | |
elif option == "Notebook": | |
st.title("Jupyter Notebook") | |
# Load and convert the notebook to HTML | |
notebook_path = 'upwork_dashboard.ipynb' # Update with the actual path to your notebook | |
with open(notebook_path) as f: | |
notebook_content = nbformat.read(f, as_version=4) | |
# Create a new markdown cell with the link to the Google Colab notebook | |
colab_link = "[Open in Google Colab](https://colab.research.google.com/drive/1qoTldQ-Kr6DgePRNYgdlQqqHq5JQax0h?usp=sharing)" | |
new_cell = nbformat.v4.new_markdown_cell(colab_link) | |
# Insert the new cell at the top of the notebook | |
notebook_content.cells.insert(0, new_cell) | |
# Export the notebook to HTML | |
html_exporter = HTMLExporter() | |
html_exporter.exclude_input = False # Include code cells in the notebook display | |
notebook_html, _ = html_exporter.from_notebook_node(notebook_content) | |
# Display the notebook HTML in Streamlit | |
html(notebook_html, height=800, scrolling=True) | |
# Notebook Page: Render the Jupyter Notebook | |
elif option == "Download Datasets": | |
st.title("Download Datasets") | |
d=pd.read_csv("category upwork jobs (1).csv") | |
d1=pd.read_csv("jobs.csv") | |
# Download links for the datasets | |
st.markdown("Click the links below to download the datasets:") | |
# Link for category upwork jobs dataset | |
with open("category upwork jobs.csv", 'rb') as f: | |
st.download_button( | |
label="Download Category Upwork Jobs Dataset", | |
data=f, | |
file_name='category_upwork_jobs.csv', | |
mime='text/csv' | |
) | |
st.dataframe(d) | |
# Link for the original dataset | |
with open("jobs.csv", 'rb') as f: | |
st.download_button( | |
label="Download previous Dataset", | |
data=f, | |
file_name='previous Dataset data.csv', | |
mime='text/csv' | |
) | |
st.dataframe(d1) | |