import streamlit as st import pandas as pd import matplotlib.pyplot as plt from streamlit.components.v1 import html import nbformat from nbconvert import HTMLExporter from wordcloud import WordCloud # Load the CSV data file_path = 'category upwork jobs (1).csv' jobs_df = pd.read_csv(file_path) # Adjust column names as per the CSV category_column = 'category' # Replace with the actual column name for category job_title_column = 'title' # Replace with the actual column name for job title description_column = 'description' key_column = 'key' date_column = 'Date' # Sidebar menu st.sidebar.title("Navigation") option = st.sidebar.radio("Go to", ["Home", "Plots", "Notebook","Download Datasets"]) # Home Page: Display data with category filter if option == "Home": st.title("Jobs Dashboard") # Filter Jobs by Category st.sidebar.header("Filter Jobs by Category") categories = jobs_df[category_column].unique() # Extract unique categories selected_category = st.sidebar.selectbox("Choose a category:", categories) # Filter jobs based on the selected category filtered_jobs = jobs_df[jobs_df[category_column] == selected_category] # Display filtered jobs with additional columns st.write(f"Showing jobs in category: **{selected_category}**") st.dataframe(filtered_jobs[['title','key','description','date','extracted_budget']]) # Optional: Show a count of jobs in the selected category st.write(f"Total jobs in this category: {len(filtered_jobs)}") # Plots Page: Display category distribution plot elif option == "Plots": st.title("Job Visualization") # 1. Job Category Distribution Bar Plot st.subheader("Job Category Distribution") category_counts = jobs_df[category_column].value_counts() # Create a color palette for the bars colors = plt.cm.Paired.colors # You can choose any colormap you prefer # Create the bar plot fig1, ax1 = plt.subplots(figsize=(10, 6)) # Set figure size bars = ax1.bar(range(len(category_counts)), category_counts.values, color=colors[:len(category_counts)]) ax1.set_xlabel("Job Category") ax1.set_ylabel("Number of Jobs") ax1.set_title("Distribution of Jobs Across Categories") plt.xticks(rotation=45, ha="right") # Create a legend with job categories and their counts legend_labels = [f"{category}: {int(count)}" for category, count in zip(category_counts.index, category_counts.values)] ax1.legend(bars, legend_labels, title="Job Categories", loc="upper left", bbox_to_anchor=(1, 1)) # Position the legend # No text labels on the bars and no labels above the bars # Adjust layout to give space for the legend plt.subplots_adjust(right=0.75) # Adjust right margin for legend space st.pyplot(fig1) # 2. Pie Chart for Category Distribution st.subheader("Job Category Proportions") fig2, ax2 = plt.subplots(figsize=(10, 10)) # Adjust the size as needed wedges, texts, autotexts = ax2.pie( category_counts, autopct='%1.1f%%', startangle=140, colors=plt.cm.Paired.colors # Optional: Change colors for better aesthetics ) ax2.axis('equal') # Equal aspect ratio ensures the pie chart is circular. # Customize the text labels for text in texts: text.set_fontsize(10) # Adjust font size for labels for autotext in autotexts: autotext.set_color('white') # Change the color of the percentage text autotext.set_fontsize(10) # Adjust font size for percentage # Add a legend to the right of the pie chart ax2.legend(wedges, category_counts.index, title="Job Categories", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) st.pyplot(fig2) # 4. Top Job Titles Bar Plot st.subheader("Top Job Titles") top_job_titles = jobs_df[job_title_column].value_counts().head(10) # Create the bar plot fig4, ax4 = plt.subplots(figsize=(10, 6)) # Adjust figure size for better readability bars = ax4.bar(top_job_titles.index, top_job_titles.values, color='lightcoral') ax4.set_xlabel("Job Title") ax4.set_ylabel("Count") ax4.set_title("Top 10 Job Titles") plt.xticks(rotation=45, ha="right") # Add labels on the right side of the bars for bar in bars: yval = bar.get_height() ax4.text(bar.get_x() + bar.get_width() / 2, yval, int(yval),ha='center', va='bottom', color='black') # Centered above the bar # Adjust layout to give space for labels plt.subplots_adjust(right=0.85) # Adjust right margin for space st.pyplot(fig4) # 5. Word Cloud for Job Descriptions st.subheader("Word Cloud for Job Descriptions") wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(jobs_df[description_column].dropna())) fig5, ax5 = plt.subplots(figsize=(10, 5)) ax5.imshow(wordcloud, interpolation='bilinear') ax5.axis('off') # Turn off the axis st.pyplot(fig5) elif option == "Notebook": st.title("Jupyter Notebook") # Load and convert the notebook to HTML notebook_path = 'upwork_dashboard.ipynb' # Update with the actual path to your notebook with open(notebook_path) as f: notebook_content = nbformat.read(f, as_version=4) # Create a new markdown cell with the link to the Google Colab notebook colab_link = "[Open in Google Colab](https://colab.research.google.com/drive/1qoTldQ-Kr6DgePRNYgdlQqqHq5JQax0h?usp=sharing)" new_cell = nbformat.v4.new_markdown_cell(colab_link) # Insert the new cell at the top of the notebook notebook_content.cells.insert(0, new_cell) # Export the notebook to HTML html_exporter = HTMLExporter() html_exporter.exclude_input = False # Include code cells in the notebook display notebook_html, _ = html_exporter.from_notebook_node(notebook_content) # Display the notebook HTML in Streamlit html(notebook_html, height=800, scrolling=True) # Notebook Page: Render the Jupyter Notebook elif option == "Download Datasets": st.title("Download Datasets") d=pd.read_csv("category upwork jobs (1).csv") d1=pd.read_csv("jobs.csv") # Download links for the datasets st.markdown("Click the links below to download the datasets:") # Link for category upwork jobs dataset with open("category upwork jobs.csv", 'rb') as f: st.download_button( label="Download Category Upwork Jobs Dataset", data=f, file_name='category_upwork_jobs.csv', mime='text/csv' ) st.dataframe(d) # Link for the original dataset with open("jobs.csv", 'rb') as f: st.download_button( label="Download previous Dataset", data=f, file_name='previous Dataset data.csv', mime='text/csv' ) st.dataframe(d1)