Spaces:

gestiodinamica
/

giz_visualizacion

Runtime error

App Files Files Community

jcmachicao commited on Mar 12, 2023

Commit

943aa96

•

1 Parent(s): 803b4f5

Upload app.py

Browse files

Files changed (1) hide show

app.py +108 -53

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import streamlit as st
 import pandas as pd
 c1, c2, = st.columns([7,7])
 with c2:
@@ -8,60 +10,113 @@ with c1:
     st.image('figures/giz_logo.png', width=200, caption='Peru')
 st.title('Estudio de Encuesta Integridad')
-option0 = st.selectbox('Formato de Analisis: ', ('Textos', 'Gráficos'))
-option3 = st.selectbox('Documento: ', ('Todos', 'Encuesta', 'Indicadores'))
-# Load dataframe
-file = st.file_uploader('Seleccione un archivo Excel: ')
-if file is not None:
-    if option0 == 'Textos':
-        # Load the Excel file into a Pandas dataframe
         df00 = pd.read_excel(file, engine='openpyxl')
-        # Show the dataframe in Streamlit
-        # Sidebar option
-        maprange = {'0 a 10%': 10, '10 a 20%': 20, '20 a 30%': 30, '30 a 40%': 40, '40 a 50%': 50}
-        option2 = st.selectbox('Franja de Discrepancia [0, 50]', ('0 a 10%', '10 a 20%', '20 a 30%', '30 a 40%', '40 a 50%'))
-        mapn = maprange[option2]
-        st.write('La franja de discrepancia mostrada es entre ', mapn-10, ' y ', mapn, '%')
-        option1 = st.selectbox('Clasificar resultados por:', ('Pregunta', 'Categoria'))
-        df = df00[(df00['diff'] > mapn-10) & (df00['diff'] < mapn)]
-        df = df[df['documento']==option3]
-        #st.write(df)
-        # Group dataframe by chosen option
-        if option1 == 'Pregunta':
-            grouped_df = df.groupby('pregunta')
-        elif option1 == 'Categoria':
-            grouped_df = df.groupby('cat_sup')
-        # Generate text with bullets
-        text = ''
-        for name, group in grouped_df:
-            cad01 = '\n\n > **'+ option1+ ': '+ name + '**'
-            st.write(cad01)
-            st.write('Cantidad de afirmaciones: ', len(group))
-            texto_tot = ''
-            for index, row in group.iterrows():
-                val0 = str(row['diff'])
-                text = '* En el rubro sobre ' + row.pregunta + ' la categoría ' + row.cat_sup + ' responde ' + \
-                row.respuesta + ' un **' + val0 + '%** más que la categoría ' + row.cat_inf + '\n'
-                texto_tot = texto_tot + '\n' + text
-            st.write(texto_tot)
-        # Download link for text file
-        st.download_button(label='Descargar Reporte', data=texto_tot, file_name='reporte.txt', mime='text/plain')
-    else:
-        option21 = st.selectbox('Tipo de Consulta: ', ('Categoría', 'Pregunta', 'Total'))
-        if option21 == 'Total':
-            st.image('figures/discrepancias_heatmap.png', width=1200)
         else:
-            st.write('En construcción...')

 import streamlit as st
 import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
 c1, c2, = st.columns([7,7])
 with c2:
     st.image('figures/giz_logo.png', width=200, caption='Peru')
 st.title('Estudio de Encuesta Integridad')
+option0 = st.selectbox('Formato de Analisis: ', (None, 'Textos', 'Gráficos'))
+option3 = st.selectbox('Documento: ', (None, 'Todos', 'Encuesta', 'Indicadores'))
+maprange = {'0 a 50%': 10, '10 a 50%': 20, '20 a 50%': 30, '30 a 50%': 40, '40 a 50%': 50}
+option2 = st.selectbox('Franja de Discrepancia siendo la máxima [0, 50]', ('0 a 50%', '10 a 50%', '20 a 50%', '30 a 50%', '40 a 50%'))
+mapn = maprange[option2]
+st.write('La franja de discrepancia mostrada es entre ', mapn-10, ' y 50%')
+if option0 and option2 and option3 is not None:
+    # Load dataframe
+    file = st.file_uploader('Seleccione un archivo Excel: ')
+    if file is not None:
         df00 = pd.read_excel(file, engine='openpyxl')
+        st.write(df00.shape)
+        if option0 == 'Textos':
+            option1 = st.selectbox('Clasificar resultados por:', ('Pregunta', 'Categoria'))
+            #df = df00[(df00['diff'] > mapn-10) & (df00['diff'] < mapn)]
+            df = df00[(df00['diff'] > mapn-10)]
+            if option3 != 'Todos':
+                df = df[df['documento']==option3]
+            else:
+                df = df
+            # Group dataframe by chosen option
+            if option1 == 'Pregunta':
+                grouped_df = df.groupby('pregunta')
+            elif option1 == 'Categoria':
+                grouped_df = df.groupby('cat_sup')
+            # Generate text with bullets
+            text = ''
+            for name, group in grouped_df:
+                cad01 = '\n\n > **'+ option1+ ': '+ name + '**'
+                st.write(cad01)
+                st.write('Cantidad de afirmaciones: ', len(group))
+                texto_tot = ''
+                for index, row in group.iterrows():
+                    val0 = str(row['diff'])
+                    text = '* En el rubro sobre ' + row.pregunta + ' la categoría ' + row.cat_sup + ' responde ' + \
+                    row.respuesta + ' un **' + val0 + '%** más que la categoría ' + row.cat_inf + '\n'
+                    texto_tot = texto_tot + '\n' + text
+                st.write(texto_tot)
+            # Download link for text file
+            st.download_button(label='Descargar Reporte', data=texto_tot, file_name='reporte.txt', mime='text/plain')
         else:
+            option21 = st.selectbox('Tipo de Gráfico ', ('Total', 'Parcial') )
+            if option21 == 'Total':
+                st.write('Este diagrama muestra las discrepancias promedio de categorías y preguntas, con discrepancias mayores a 15%.')
+                st.write('Permite un panorama general de las discrepancias.')
+                st.image('figures/discrepancias_heatmap.png', width=1200)
+            else:
+                df2 = df00[(df00['diff'] > mapn-10)]
+                if option3 != 'Todos':
+                    df2 = df2[df2['documento']==option3]
+                else:
+                    pass
+                df_pv = pd.pivot_table(df2, values='diff', index='pid', columns='cat_sup', aggfunc='mean').fillna(0)
+                st.write(df_pv.shape)
+                data_pv_s = df_pv[df_pv > 10]
+                row_to_drop = list(data_pv_s.index[data_pv_s.sum(axis=1) == 0])
+                data_pv_s = data_pv_s.drop(row_to_drop, axis=0)
+                col_to_drop = list(data_pv_s.columns[data_pv_s.sum(axis=0) == 0])
+                data_pv_s = data_pv_s.drop(col_to_drop, axis=1)
+                fig, ax = plt.subplots(figsize=(40, 20))
+                heatmap = ax.imshow(data_pv_s.T, cmap='inferno')
+                ax.set_yticks(range(len(data_pv_s.columns)))
+                ax.set_yticklabels(data_pv_s.columns, fontsize=16)
+                ax.set_xticks(range(len(data_pv_s.index)))
+                ax.set_xticklabels(data_pv_s.index, rotation=90, fontsize=16)
+                for i in range(len(data_pv_s.index)):
+                    for j in range(len(data_pv_s.columns)):
+                        value = data_pv_s.iloc[i, j]
+                        if not np.isnan(value):
+                            ax.annotate(int(value), xy=(i, j), horizontalalignment='center',
+                                        verticalalignment='center', fontsize=18, color='darkgray',
+                                        fontweight='bold'
+                                       )
+                ax.set_title('Distribución de Discrepancias Máximas en Categorías y Preguntas', fontsize=20)
+                ax.set_ylabel('Categorías')
+                ax.set_xlabel('Preguntas')
+                plt.colorbar(heatmap, ax=ax)
+                plt.grid(True)
+                st.pyplot(fig, width=1200)
+                st.write('Para grabar la imagen solo presione botón derecho y guardela como imagen en su servidor.')