# statistiques.py
import streamlit as st
import pandas as pd
import plotly.express as px
from data_manager import get_data
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt

def display_companies_by_sector(df):
    sector_counts = df['libelle_section_naf'].value_counts().reset_index()
    sector_counts.columns = ['Secteur', 'Nombre']
    fig = px.bar(sector_counts, x='Secteur', y='Nombre',
                 color='Nombre', labels={'Nombre': ''}, template='plotly_white')
    fig.update_layout(xaxis_tickangle=-45, showlegend=False)
    # Assurez-vous que la légende est désactivée
    fig.update_traces(showlegend=False)
    st.plotly_chart(fig)


def display_company_sizes(df):
    fig = px.histogram(df, x='tranche_effectif_entreprise',
                       labels={'tranche_effectif_entreprise':'Taille de l\'entreprise', 'count':'Nombre'}, template='plotly_white')
    fig.update_traces(marker_color='green')
    fig.update_layout(yaxis_title="Nombre")
    st.plotly_chart(fig)

def display_companies_by_commune(df):
    commune_counts = df['commune'].value_counts(normalize=True).reset_index()
    commune_counts.columns = ['Commune', 'Pourcentage']
    fig = px.pie(commune_counts, values='Pourcentage', names='Commune',
                 template='plotly_white', hole=.3)
    fig.update_traces(textinfo='percent+label')
    st.plotly_chart(fig)

def display_rse_actions_wordcloud(df):
    st.header("Nuage de mots Actions RSE")
    
    custom_stopwords = set(["l", "d", "d ", "des", "qui", "ainsi", "toute", "hors", "plus", "cette", "afin", "via", "d'", "sa", "dans", "ont", "avec", "aux", "ce", "chez", "ont", "cela", "la", "un", "avons", "par", "c'est", "s'est", "aussi", "leurs", "d'un", "nos", "les", "sur", "ses", "tous", "nous", "du", "notre", "de", "et", "est", "pour", "le", "une", "se", "en", "au", "à", "que", "sont", "leur", "son"])
    stopwords = STOPWORDS.union(custom_stopwords)
    
    text = " ".join(action for action in df['action_rse'].dropna())
    
    wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=800, height=400).generate(text)
    
    fig, ax = plt.subplots()
    ax.imshow(wordcloud, interpolation='bilinear')
    ax.axis('off')
    st.pyplot(fig)

def main():
    st.title("Statistiques sur les entreprises engagées RSE")
    data, _ = get_data()
    df = pd.DataFrame(data)
    
    if not df.empty:
        st.header("Répartition des entreprises par secteur d'activité")
        display_companies_by_sector(df)
        st.header("Distribution des tailles d'entreprises")
        display_company_sizes(df)
        st.header("Pourcentage d'entreprises par Commune")
        display_companies_by_commune(df)
        display_rse_actions_wordcloud(df)

if __name__ == "__main__":
    main()