Spaces:

nazneen
/

error-analysis

Runtime error

App Files Files Community

nazneen commited on May 23, 2022

Commit

bbb450c

1 Parent(s): 49418b5

added offline files

Browse files

Files changed (1) hide show

app.py +4 -9

app.py CHANGED Viewed

@@ -140,16 +140,15 @@ def frequent_tokens(data, tokenizer, loss_quantile=0.95, top_k=200, smoothing=0.
 @st.cache(ttl=600)
-def get_data(spotlight, emb):
-    preds = spotlight.outputs.numpy()
-    losses = spotlight.losses.numpy()
     embeddings = pd.DataFrame(emb, columns=['x', 'y'])
     num_examples = len(losses)
     # dataset_labels = [dataset[i]['label'] for i in range(num_examples)]
     return pd.concat([pd.DataFrame(np.transpose(np.vstack([dataset[:num_examples]['content'],
                     dataset[:num_examples]['label'], preds, losses])), columns=['content', 'label', 'pred', 'loss']), embeddings], axis=1)
-@st.cache(ttl=600)
 def clustering(data,num_clusters):
     X = np.array(data['embedding'].tolist())
     kclusterer = KMeansClusterer(
@@ -158,11 +157,8 @@ def clustering(data,num_clusters):
     assigned_clusters = kclusterer.cluster(X, assign_clusters=True)
     data['cluster'] = pd.Series(assigned_clusters, index=data.index).astype('int')
     data['centroid'] = data['cluster'].apply(lambda x: kclusterer.means()[x])
     return data, assigned_clusters
-@st.cache(ttl=600)
 def kmeans(df, num_clusters=3):
     data_hl = df.loc[df['slice'] == 'high-loss']
     data_kmeans,clusters = clustering(data_hl,num_clusters)
@@ -171,7 +167,6 @@ def kmeans(df, num_clusters=3):
     merged['cluster'] = merged['cluster'].fillna(num_clusters).astype('int')
     return merged
-@st.cache(ttl=600)
 def distance_from_centroid(row):
     return sdist.norm(row['embedding'] - row['centroid'].tolist())
@@ -249,7 +244,7 @@ if __name__ == "__main__":
     high_loss = losses.quantile(loss_quantile)
     data_df['slice'] = 'high-loss'
     data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
     with rcol:
         with st.spinner(text='loading...'):
             st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)

 @st.cache(ttl=600)
+def get_data(inference, emb):
+    preds = inference.outputs.numpy()
+    losses = inference.losses.numpy()
     embeddings = pd.DataFrame(emb, columns=['x', 'y'])
     num_examples = len(losses)
     # dataset_labels = [dataset[i]['label'] for i in range(num_examples)]
     return pd.concat([pd.DataFrame(np.transpose(np.vstack([dataset[:num_examples]['content'],
                     dataset[:num_examples]['label'], preds, losses])), columns=['content', 'label', 'pred', 'loss']), embeddings], axis=1)
 def clustering(data,num_clusters):
     X = np.array(data['embedding'].tolist())
     kclusterer = KMeansClusterer(
     assigned_clusters = kclusterer.cluster(X, assign_clusters=True)
     data['cluster'] = pd.Series(assigned_clusters, index=data.index).astype('int')
     data['centroid'] = data['cluster'].apply(lambda x: kclusterer.means()[x])
     return data, assigned_clusters
 def kmeans(df, num_clusters=3):
     data_hl = df.loc[df['slice'] == 'high-loss']
     data_kmeans,clusters = clustering(data_hl,num_clusters)
     merged['cluster'] = merged['cluster'].fillna(num_clusters).astype('int')
     return merged
 def distance_from_centroid(row):
     return sdist.norm(row['embedding'] - row['centroid'].tolist())
     high_loss = losses.quantile(loss_quantile)
     data_df['slice'] = 'high-loss'
     data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
     with rcol:
         with st.spinner(text='loading...'):
             st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)