nazneen commited on
Commit
ddf0f2a
1 Parent(s): 8a7fbed

added offline files

Browse files
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -202,6 +202,25 @@ def topic_distribution(weights, smoothing=0.01):
202
  # for category in categories_sorted:
203
  # return(topic_frequencies[category], topic_frequencies_spotlight[category], topic_ratios[category], category)
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  if __name__ == "__main__":
207
  ### STREAMLIT APP CONGFIG ###
@@ -224,26 +243,10 @@ if __name__ == "__main__":
224
  ["distilbert-base-uncased-finetuned-sst-2-english",
225
  "albert-base-v2-yelp-polarity"],
226
  )
227
- ### LOAD DATA AND SESSION VARIABLES ###
228
- data_df = pd.read_parquet('./assets/data/'+dataset+ '_'+ model+'.parquet')
229
- if model == 'albert-base-v2-yelp-polarity':
230
- tokenizer = AutoTokenizer.from_pretrained('textattack/'+model)
231
- else:
232
- tokenizer = AutoTokenizer.from_pretrained(model)
233
- if "user_data" not in st.session_state:
234
- st.session_state["user_data"] = data_df
235
- if "selected_slice" not in st.session_state:
236
- st.session_state["selected_slice"] = None
237
-
238
- loss_quantile = st.sidebar.slider(
239
- "Loss Quantile", min_value=0.5, max_value=1.0,step=0.01,value=0.95
240
- )
241
- data_df['loss'] = data_df['loss'].astype(float)
242
- losses = data_df['loss']
243
- high_loss = losses.quantile(loss_quantile)
244
- data_df['slice'] = 'high-loss'
245
- data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
246
 
 
 
 
247
  with rcol:
248
  with st.spinner(text='loading...'):
249
  st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
 
202
  # for category in categories_sorted:
203
  # return(topic_frequencies[category], topic_frequencies_spotlight[category], topic_ratios[category], category)
204
 
205
+ def populate_session(dataset,model):
206
+ data_df = pd.read_parquet('./assets/data/'+dataset+ '_'+ model+'.parquet')
207
+ if model == 'albert-base-v2-yelp-polarity':
208
+ tokenizer = AutoTokenizer.from_pretrained('textattack/'+model)
209
+ else:
210
+ tokenizer = AutoTokenizer.from_pretrained(model)
211
+ if "user_data" not in st.session_state:
212
+ st.session_state["user_data"] = data_df
213
+ if "selected_slice" not in st.session_state:
214
+ st.session_state["selected_slice"] = None
215
+
216
+ loss_quantile = st.sidebar.slider(
217
+ "Loss Quantile", min_value=0.5, max_value=1.0,step=0.01,value=0.95
218
+ )
219
+ data_df['loss'] = data_df['loss'].astype(float)
220
+ losses = data_df['loss']
221
+ high_loss = losses.quantile(loss_quantile)
222
+ data_df['slice'] = 'high-loss'
223
+ data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
224
 
225
  if __name__ == "__main__":
226
  ### STREAMLIT APP CONGFIG ###
 
243
  ["distilbert-base-uncased-finetuned-sst-2-english",
244
  "albert-base-v2-yelp-polarity"],
245
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ ### LOAD DATA AND SESSION VARIABLES ###
248
+ ##uncomment the next next line to run dynamically and not from file
249
+ #populate_session(dataset, model)
250
  with rcol:
251
  with st.spinner(text='loading...'):
252
  st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)