Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import numpy as np | |
def rgb_to_hex(r, g, b): | |
return f'#{r:02X}{g:02X}{b:02X}' | |
def interpolate_color(start_color, end_color, factor): | |
r1, g1, b1 = start_color | |
r2, g2, b2 = end_color | |
r = int(r1 + (r2 - r1) * factor) | |
g = int(g1 + (g2 - g1) * factor) | |
b = int(b1 + (b2 - b1) * factor) | |
return rgb_to_hex(r, g, b) | |
def get_warm_to_cold_colors(n): | |
hot = (124, 21, 77) # Hot color RGB | |
medium = (0, 66, 76) # Medium color RGB | |
cold = (197, 246, 235) # Cold color RGB | |
colors = [] | |
for i in range(n): | |
if i < n // 2: | |
factor = i / (n // 2) | |
color = interpolate_color(hot, medium, factor) | |
else: | |
factor = (i - n // 2) / (n // 2) | |
color = interpolate_color(medium, cold, factor) | |
colors.append(color) | |
return colors | |
PREDEFINED_COLORS = { | |
'SC_Q_Origin': '#A7BCC6', #specified random color | |
'SC_Q_H_state':'#A7BCC6', #specified random color | |
'SC_Q_H_scope': '#A7BCC6', #specified random color | |
'H_sector': '#1A636B', | |
'SC_Q_H_sector': '#1A636B', | |
"H_companysize": '#A7BCC6', #specified random color | |
'H_revenue': '#49677B', | |
'H_employee': '#125F51', | |
'SC_Q_H_employee': '#125F51', | |
'I_importance': '#074057', | |
'IB_imp_weighted': '#5B8394', | |
'I_importance_fut': '#01626F', | |
'IB_imp_fut_weighted': '#39808B', | |
'I_invest_share': '#7B8D24', | |
'IB_invest_share_weighted': '#BCCFD6', | |
'I_invest_share_fut': '#16936D', | |
'IB_invest_share_fut_weighted': '#1D6073', | |
'I_eneffincrease_fut': '#007B86', | |
'H_energyuse': '#245B60', | |
'IB_energyuse_fut': '#587081', | |
'IB_energyuse_weighted': '#146153', | |
'IB_energyuse_fut_weighted': '#035263', | |
'H_energyuse_classes': '#66889A', | |
'SC_Q_H_energyuse_classes': '#66889A', | |
'SC_Q_S23_turnover_energycost': '#245B60', #specified random color | |
'H_energyintensity': '#186B77', | |
'Des_Gesamtumsatz': '#4A8A95', | |
'Prod_Erw': '#064B55', | |
'Prod_BDB': '#A7BCC6' | |
} | |
column_name_map = { | |
'H_sector': 'Sector', | |
'SC_Q_H_sector': 'Sector', | |
'SC_Q_Origin': 'Origin', | |
'SC_Q_H_state': 'State', | |
'SC_Q_H_scope': 'SC_Q_H_scope', | |
'H_revenue': 'Revenue', | |
'H_employee': 'Employees', | |
'SC_Q_H_employee': 'Employees', | |
"H_companysize": 'Company Size', | |
'I_importance': 'Importance', | |
'IB_imp_weighted': 'Weighted Importance', | |
'I_importance_fut': 'Future Importance', | |
'IB_imp_fut_weighted': 'Future Weighted Importance', | |
'I_invest_share': 'Investment Share (Past)', | |
'IB_invest_share_weighted': 'Weighted Investment Share (Past)', | |
'I_invest_share_fut': 'Investment Share (Future)', | |
'IB_invest_share_fut_weighted': 'Weighted Investment Share (Future)', | |
'I_eneffincrease_fut': 'Future Energy Efficiency Increase', | |
'H_energyuse': 'Energy Use', | |
'SC_Q_S23_turnover_energycost': 'Turn over Energy Cose', #must be reviewed | |
'IB_energyuse_fut': 'Future Energy Use', | |
'IB_energyuse_weighted': 'Weighted Energy Use', | |
'IB_energyuse_fut_weighted': 'Weighted Future Energy Use', | |
'H_energyuse_classes': 'Energy Use Classes', | |
"SC_Q_H_energyuse_classes": 'Energy Use Classes', | |
'H_energyintensity': 'Energy Intensity', | |
'Des_Gesamtumsatz': 'Total Revenue', | |
'Prod_Erw': 'Product Development', | |
'Prod_BDB': 'Product BDB', | |
} | |
def create_visualizations(df, selected_column): | |
# Line graph | |
fig_line = px.line(df, y=selected_column, title=f"Line Graph for {selected_column}") | |
line_color = PREDEFINED_COLORS.get(selected_column, 'black') | |
fig_line.update_traces(line=dict(color=line_color)) | |
fig_line.update_layout(width=800, height=400) | |
st.plotly_chart(fig_line) | |
# Bar chart | |
fig_bar = px.bar(df, y=selected_column, title=f"Bar Chart for {selected_column}") | |
bar_color = PREDEFINED_COLORS.get(selected_column, 'blue') | |
fig_bar.update_traces(marker_color=bar_color) | |
fig_bar.update_layout(width=800, height=400) | |
st.plotly_chart(fig_bar) | |
# Pie chart (for categorical data or numerical data with few unique values) | |
if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10: | |
value_counts = df[selected_column].value_counts() | |
fig_pie = px.pie(names=value_counts.index, values=value_counts.values, title=f"Distribution of {selected_column}") | |
# Use predefined colors for the pie chart slices | |
pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index] | |
fig_pie.update_traces(marker=dict(colors=pie_colors)) | |
fig_pie.update_layout(width=800, height=400) | |
st.plotly_chart(fig_pie) | |
else: | |
st.write(f"Pie chart not applicable for {selected_column} due to high number of unique values.") | |
# Streamlit app | |
st.title('Fraunhofer Database') | |
# File uploader | |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
if uploaded_file is not None: | |
# Load the data | |
df = pd.read_csv(uploaded_file) | |
# Get column names | |
columns = df.columns.tolist() | |
df_renamed = df.rename(columns=column_name_map) | |
# Sidebar for user input | |
st.sidebar.header('Options') | |
view_option = st.sidebar.radio('Select View Option', | |
['View All Data', 'Select Columns', 'Filter Data', 'Natural Language Query', 'Visualize Data', 'General Visualization']) | |
if view_option == 'View All Data': | |
st.write(df) | |
elif view_option == 'Select Columns': | |
selected_columns = st.sidebar.multiselect('Select Columns', columns) | |
if selected_columns: | |
st.write(df[selected_columns]) | |
else: | |
st.write('Please select at least one column.') | |
elif view_option == 'Filter Data': | |
filter_column = st.sidebar.selectbox('Select Column to Filter', columns) | |
filter_operator = st.sidebar.selectbox('Select Operator', ['==', '>', '<', '>=', '<=', '!=']) | |
filter_value = st.sidebar.text_input('Enter Filter Value') | |
if st.sidebar.button('Apply Filter'): | |
if filter_value: | |
column_type = df[filter_column].dtype | |
if column_type == 'int64': | |
filter_value = int(filter_value) | |
elif column_type == 'float64': | |
filter_value = float(filter_value) | |
# Apply filter | |
filtered_df = df.query(f"`{filter_column}` {filter_operator} @filter_value") | |
st.write(filtered_df) | |
else: | |
st.write('Please enter a filter value.') | |
elif view_option == 'Natural Language Query': | |
query = st.text_input('Enter your query (e.g., "Show me rows where age is greater than 30")') | |
if query: | |
column, operator, value = parse_query(query, df) | |
if column and operator and value is not None: | |
filtered_df = df.query(f"`{column}` {operator} @value") | |
st.write(filtered_df) | |
else: | |
st.write("Couldn't understand the query. Please try rephrasing.") | |
elif view_option == 'Visualize Data': | |
st.sidebar.subheader('Visualization Options') | |
chart_type = st.sidebar.selectbox('Select Chart Type', ['Bar Chart', 'Stacked Bar Chart', 'Line Graph', 'Pie Chart']) | |
# Sliders for figure size | |
width = st.sidebar.slider('Select Figure Width', 400, 1200, 800) | |
height = st.sidebar.slider('Select Figure Height', 300, 800, 600) | |
# Bar Chart Example | |
if chart_type == 'Bar Chart': | |
x_axis = st.sidebar.selectbox('Select X-axis', columns) | |
y_axis = st.sidebar.selectbox('Select Y-axis', columns) | |
if x_axis and y_axis: | |
fig = px.bar(df, x=x_axis, y=y_axis, title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {x_axis}') | |
# Apply predefined colors | |
if y_axis in PREDEFINED_COLORS: | |
fig.update_traces(marker_color=PREDEFINED_COLORS[y_axis]) | |
fig.update_layout( | |
title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}', | |
xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"), | |
yaxis_title=column_name_map.get(y_axis, "Undefined Y-axis"), | |
width=width, | |
height=height | |
) | |
st.plotly_chart(fig) | |
elif chart_type == 'Stacked Bar Chart': | |
x_axis = st.sidebar.selectbox('Select X-axis', columns) | |
y_axis = st.sidebar.selectbox('Select Y-axis', columns) | |
secondary_y_axis = st.sidebar.selectbox('Select Secondary Y-axis (for stacking)', columns) | |
if x_axis and y_axis and secondary_y_axis: | |
fig = go.Figure(data=[ | |
go.Bar(name=f'{column_name_map.get(y_axis, "Undefined Y-axis")}', x=df[x_axis], y=df[y_axis], marker_color=PREDEFINED_COLORS.get(y_axis, 'blue')), | |
go.Bar(name=f'{column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")}', x=df[x_axis], y=df[secondary_y_axis], marker_color=PREDEFINED_COLORS.get(secondary_y_axis, 'green')) | |
]) | |
fig.update_layout(barmode='stack', title=f'{column_name_map.get(y_axis, "Undefined Y-axis")} and {column_name_map.get(secondary_y_axis, "Undefined Secondary Y-axis")} by {column_name_map.get(x_axis, "Undefined X-axis")}', width=width, height=height) | |
st.plotly_chart(fig) | |
elif chart_type == 'Line Graph': | |
x_axis = st.sidebar.selectbox('Select X-axis for Line Graph', columns) | |
y_axes = st.sidebar.multiselect('Select Y-axes for Line Graph', columns) | |
if x_axis and y_axes: | |
fig = go.Figure() | |
for y_axis in y_axes: | |
fig.add_trace(go.Scatter( | |
x=df[x_axis], | |
y=df[y_axis], | |
mode='lines', | |
name=column_name_map.get(y_axis, y_axis), # Use mapped name for the legend | |
line=dict(color=PREDEFINED_COLORS.get(y_axis, 'black')) | |
)) | |
# Update layout with custom titles | |
fig.update_layout( | |
title=f'{", ".join(column_name_map.get(y_axis, y_axis) for y_axis in y_axes)} over {column_name_map.get(x_axis, x_axis)}', | |
xaxis_title=column_name_map.get(x_axis, "Undefined X-axis"), | |
yaxis_title="Values", # You can customize this or use a different mapping | |
width=width, | |
height=height | |
) | |
st.plotly_chart(fig) | |
# Pie Chart | |
elif chart_type == 'Pie Chart': | |
category_column = st.sidebar.selectbox('Select Category for Pie Chart', columns) | |
values_column = st.sidebar.selectbox('Select Values for Pie Chart', columns) | |
if category_column and values_column: | |
st.subheader(f'Pie Chart: {column_name_map.get(values_column, values_column)} by {column_name_map.get(category_column, category_column)}') | |
# Get the value counts | |
value_counts = df.groupby(category_column)[values_column].sum().sort_values(ascending=False) | |
# Generate warm to cold colors based on the number of slices | |
n_slices = len(value_counts) | |
color_map = get_warm_to_cold_colors(n_slices) | |
# Create the pie chart using the generated color map | |
fig = px.pie(df, names=value_counts.index, values=value_counts.values, | |
title=f'{column_name_map.get(values_column, values_column)} distribution by {column_name_map.get(category_column, category_column)}', | |
color_discrete_sequence=color_map) | |
fig.update_layout(width=width, height=height) | |
st.plotly_chart(fig) | |
elif view_option == 'General Visualization': | |
st.subheader('General Visualization') | |
# Function to get columns starting with a specific prefix | |
def get_columns_with_prefix(df, prefix): | |
return [col for col in df.columns if col.startswith(prefix)] | |
# Get columns for each category | |
h_columns = get_columns_with_prefix(df, 'H_') | |
i_columns = get_columns_with_prefix(df, 'I_') | |
ib_columns = get_columns_with_prefix(df, 'IB_') | |
# Create visualizations for a selected column | |
def create_visualizations(df, selected_column): | |
# Line graph | |
fig_line = px.line(df, y=selected_column, | |
title=f"Line Graph for {column_name_map.get(selected_column, selected_column)}") | |
line_color = PREDEFINED_COLORS.get(selected_column, 'black') | |
fig_line.update_traces(line=dict(color=line_color)) | |
fig_line.update_layout( | |
xaxis_title=column_name_map.get('x_axis', 'Index'), # Optional: update X-axis title if relevant | |
yaxis_title=column_name_map.get(selected_column, 'Y-axis'), # Map Y-axis title | |
width=800, height=400 | |
) | |
st.plotly_chart(fig_line) | |
# Bar chart | |
fig_bar = px.bar(df, y=selected_column, | |
title=f"Bar Chart for {column_name_map.get(selected_column, selected_column)}") | |
bar_color = PREDEFINED_COLORS.get(selected_column, 'blue') | |
fig_bar.update_traces(marker_color=bar_color) | |
fig_bar.update_layout( | |
xaxis_title=column_name_map.get('x_axis', 'Index'), # Optional: update X-axis title if relevant | |
yaxis_title=column_name_map.get(selected_column, 'Y-axis'), # Map Y-axis title | |
width=800, height=400 | |
) | |
st.plotly_chart(fig_bar) | |
# Pie chart (for categorical data or numerical data with few unique values) | |
if df[selected_column].dtype == 'object' or df[selected_column].nunique() < 10: | |
value_counts = df[selected_column].value_counts() | |
fig_pie = px.pie(names=value_counts.index, values=value_counts.values, | |
title=f"Distribution of {column_name_map.get(selected_column, selected_column)}") | |
# Use predefined colors for the pie chart slices | |
pie_colors = [PREDEFINED_COLORS.get(c, 'grey') for c in value_counts.index] | |
fig_pie.update_traces(marker=dict(colors=pie_colors)) | |
fig_pie.update_layout(width=800, height=400) | |
st.plotly_chart(fig_pie) | |
else: | |
st.write(f"Pie chart not applicable for {column_name_map.get(selected_column, selected_column)} due to high number of unique values.") | |
# Create tabs for different visualizations | |
tabs = st.tabs(["H Columns", "I Columns", "IB Columns"]) | |
with tabs[0]: | |
st.subheader("H Columns Visualization") | |
h_selected = st.selectbox("Select an H column to visualize", h_columns) | |
if h_selected: | |
create_visualizations(df, h_selected) | |
with tabs[1]: | |
st.subheader("I Columns Visualization") | |
i_selected = st.selectbox("Select an I column to visualize", i_columns) | |
if i_selected: | |
create_visualizations(df, i_selected) | |
with tabs[2]: | |
st.subheader("IB Columns Visualization") | |
ib_selected = st.selectbox("Select an IB column to visualize", ib_columns) | |
if ib_selected: | |
create_visualizations(df, ib_selected) | |
# Summary statistics | |
st.subheader("Summary Statistics") | |
all_selected = h_columns + i_columns + ib_columns | |
if all_selected: | |
# Create a new DataFrame for summary statistics with mapped column names | |
summary_df = df[all_selected].describe().T # Transpose for better readability | |
summary_df.index = [column_name_map.get(col, col) for col in summary_df.index] # Update index names | |
st.write(summary_df) | |
else: | |
st.write('Please upload your data.') | |
st.sidebar.info('Designed by Taha Rasouli at Fraunhofer') | |