Spaces:

nazneen
/

error-analysis

Runtime error

App Files Files Community

nazneen commited on May 17, 2022

Commit

f760ec3

1 Parent(s): 295dbc0

added instr

Browse files

Files changed (4) hide show

app.py +17 -26
error_analysis/utils/__pycache__/__init__.cpython-39.pyc +0 -0
error_analysis/utils/__pycache__/style_hacks.cpython-39.pyc +0 -0
error_analysis/utils/style_hacks.py +7 -10

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-    ### LIBRARIES ###
-# # Data
 import numpy as np
 import pandas as pd
 import torch
@@ -62,16 +62,8 @@ def down_samp(embedding):
 def data_comparison(df):
-    # set up a dropdown select bindinf
-    # input_dropdown = alt.binding_select(options=['Negative Sentiment','Positive Sentiment'])
-        #data_kmeans['distance_from_centroid'] = data_kmeans.apply(distance_from_centroid, axis=1)
     selection = alt.selection_multi(fields=['cluster','label'])
-    color = alt.condition(alt.datum.slice == 'high-loss', alt.Color('cluster:N', scale = alt.Scale(domain=df.cluster.tolist())), alt.value("lightgray"))
-    # color = alt.condition(selection,
-    #                        alt.Color('cluster:Q', legend=None),
-    #                         # scale = alt.Scale(domain = pop_domain,range=color_range)),
-    #                         alt.value('lightgray'))
     opacity = alt.condition(selection, alt.value(0.7), alt.value(0.25))
     # basic chart
@@ -97,7 +89,7 @@ def data_comparison(df):
         selection
     )
-    layered = legend | scatter
     layered = layered.configure_axis(
         grid=False
@@ -112,14 +104,12 @@ def quant_panel(embedding_df):
     """ Quantitative Panel Layout"""
     all_metrics = {}
-    # st.warning("**Data Comparison**")
-    # with st.expander("how to read this chart:"):
-    #     st.markdown("* each **point** is a single sentence")
-    #     st.markdown("* the **position** of each dot is determined mathematically based upon an analysis of the words in a sentence. The **closer** two points on the visualization the **more similar** the sentences are. The **further apart ** two points on the visualization the **more different** the sentences are")
-    #     st.markdown(
-    #             " * the **shape** of each point reflects whether it a positive (diamond) or negative sentiment (circle)")
-    #     st.markdown("* the **color** of each point is the ")
     st.altair_chart(data_comparison(down_samp(embedding_df)))
 def frequent_tokens(data, tokenizer, loss_quantile=0.95, top_k=200, smoothing=0.005):
@@ -246,7 +236,7 @@ if __name__ == "__main__":
     )
     loss_quantile = st.sidebar.slider(
-        "Loss Quantile", min_value=0.0, max_value=1.0,step=0.1,value=0.95
     )
     run_kmeans = st.sidebar.radio("Cluster error slice?", ('True', 'False'), index=0)
@@ -280,15 +270,16 @@ if __name__ == "__main__":
         table_html = dataframe.to_html(
             columns=['content', 'label', 'pred', 'loss', 'cluster'], max_rows=50)
         # table_html = table_html.replace("<th>", '<th align="left">')  # left-align the headers
-        st.write(dataframe)
-    # st_aggrid.AgGrid(dataframe)
-    # table_html = dataframe.to_html(columns=['content', 'label', 'pred', 'loss'], max_rows=100)
-    # table_html = table_html.replace("<th>", '<th align="left">')  # left-align the headers
-    # st.write(table_html)
     with rcol:
         st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
         commontokens = frequent_tokens(merged, tokenizer, loss_quantile=loss_quantile)
         st.write(commontokens)
     quant_panel(merged)

+## LIBRARIES ###
+## Data
 import numpy as np
 import pandas as pd
 import torch
 def data_comparison(df):
     selection = alt.selection_multi(fields=['cluster','label'])
+    color = alt.condition(alt.datum.slice == 'high-loss', alt.Color('cluster:N', scale = alt.Scale(domain=df.cluster.unique().tolist())), alt.value("lightgray"))
     opacity = alt.condition(selection, alt.value(0.7), alt.value(0.25))
     # basic chart
         selection
     )
+    layered =  scatter | legend
     layered = layered.configure_axis(
         grid=False
     """ Quantitative Panel Layout"""
     all_metrics = {}
+    st.warning("**Error slice visualization**")
+    with st.expander("How to read this chart:"):
+        st.markdown("* Each **point** is an input example.")
+        st.markdown("* Gray points have low-loss and the colored have high-loss. High-loss instances are clustered using **kmeans** and each color represents a cluster.")
+        st.markdown("* The **shape** of each point reflects the label category --  positive (diamond) or negative sentiment (circle).")
     st.altair_chart(data_comparison(down_samp(embedding_df)))
 def frequent_tokens(data, tokenizer, loss_quantile=0.95, top_k=200, smoothing=0.005):
     )
     loss_quantile = st.sidebar.slider(
+        "Loss Quantile", min_value=0.0, max_value=1.0,step=0.01,value=0.95
     )
     run_kmeans = st.sidebar.radio("Cluster error slice?", ('True', 'False'), index=0)
         table_html = dataframe.to_html(
             columns=['content', 'label', 'pred', 'loss', 'cluster'], max_rows=50)
         # table_html = table_html.replace("<th>", '<th align="left">')  # left-align the headers
+        with st.expander("How to read the table:"):
+            st.markdown("* The table displays model error slices on the test set, sorted by loss.")
+            st.markdown("* Each row is an input example that includes the label, model pred, loss, and error cluster.")
+        st.write(dataframe,width=900, height=300)
     with rcol:
         st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
         commontokens = frequent_tokens(merged, tokenizer, loss_quantile=loss_quantile)
+        with st.expander("How to read the table:"):
+            st.markdown("* The table displays the most frequent tokens in error slices, relative to their frequencies in the val set.")
         st.write(commontokens)
     quant_panel(merged)

error_analysis/utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (204 Bytes). View file

error_analysis/utils/__pycache__/style_hacks.cpython-39.pyc ADDED Viewed

Binary file (2.16 kB). View file

error_analysis/utils/style_hacks.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
- streamlit style hacks
 """
 import streamlit as st
@@ -10,12 +10,13 @@ def init_style():
     <style>
     /* Side Bar */
     [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
-         width: 225px;
-         margin-left: -500px;
        }
     [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
-        width: 225px;
-        margin-left: -500px;
     }
     .css-1outpf7 {
         background-color:rgb(254 244 219);
@@ -23,11 +24,7 @@ def init_style():
         padding:10px 10px 10px 10px;
     }
-    /* Main Panel*/
-    [data-testid="stVerticalBlock"]{
-        margin-left: -200px;
-        padding:10px 10px 10px -200px;
-    }
     .css-18e3th9 {
         padding:10px 10px 10px -200px;
     }

 """
+ placeholder for all streamlit style hacks
 """
 import streamlit as st
     <style>
     /* Side Bar */
     [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
+         width: 300px;
        }
     [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
+        width: 300px;
+    }
+    [data-testid="stSidebar"]{
+        flex-basis: unset;
     }
     .css-1outpf7 {
         background-color:rgb(254 244 219);
         padding:10px 10px 10px 10px;
     }
+    /* Main Panel*/
     .css-18e3th9 {
         padding:10px 10px 10px -200px;
     }