Spaces:

Brendan
/

tod-example-explorer

Runtime error

App Files Files Community

Brendan King commited on Jan 28, 2022

Commit

ce78cc4

1 Parent(s): 07aa55e

Initial run: example explorer in huggingface

Browse files

Files changed (5) hide show

app.py +127 -0
output/bart-100ish-examples.jsonl +0 -0
output/dialogpt-100ish-examples.jsonl +0 -0
output/pptod-small-10-percent.jsonl +0 -0
output/t5-small-10-percent.jsonl +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import json
+from typing import List, Dict, Tuple, Union, Any
+import streamlit as st
+from annotated_text import annotated_text
+sidebar = st.sidebar
+def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
+    gold_set = set(belief_state_example['gold'].split(' '))
+    input_set = set(" ".join(belief_state_example['input']).split(' '))
+    generated = belief_state_example['generated']
+    result = []
+    for word in generated.split(' '):
+        if word in gold_set:
+            result.append((word, 'gold', '#dfd'))  # gold overlap => label green
+        elif word in input_set:
+            result.append((word, 'in', '#eea'))  # input overlap => label yellow
+        else:
+            result.append(word + ' ')  # no overlap => no label (replace space)
+    return result
+# load in data
+pptod_examples: List[Dict] = []
+models: Dict[str, Dict[str, Any]] = {
+    'pptod-small': {
+        'name': 'pptod-small',
+        'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, '
+                       'it is familiar with the meaning of these special separator tokens. However, it does not have'
+                       'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
+                       'is unaware of the particular slot name conventions of MultiWoZ.',
+        'output_file': './output/pptod-small-10-percent.jsonl'
+    },
+    't5-small': {
+        'name': 't5-small',
+        'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
+                       'that the model could not be familiar with.',
+        'output_file': './output/t5-small-10-percent.jsonl'
+    },
+    'bart': {
+        'name': 'bart',
+        'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
+                       'that the model could not be familiar with.',
+        'output_file': './output/bart-100ish-examples.jsonl'
+    },
+    'dialogpt': {
+        'name': 'dialogpt',
+        'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
+        'output_file': './output/dialogpt-100ish-examples.jsonl'
+    }
+}
+for model_def in models.values():
+    model_def['examples'] = []
+    with open(model_def['output_file'], 'r') as f:
+        for line in f.readlines():
+            model_def['examples'].append(json.loads(line.strip()))
+model_names = list(models.keys())
+model_name = sidebar.selectbox('Model', model_names)
+active_model = models[model_name]
+st.write(f"""
+#### Inputs
+**Selected Model:** `{active_model['name']}`
+{active_model['description']}
+""")
+"""
+### Belief State Prediction
+Below is the predicted belief state as a sequence.
+- `input` denotes the input, which has been transformed into a list for
+human readability but is presented to the model as a sequence.
+- `gold` is the target belief state in sequence form (slot-name slot-value pairs)
+- `generated` is the model generated belief state sequence
+"""
+titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
+title = sidebar.selectbox("Development Example", titles)
+active_example = active_model['examples'][int(title[0])][0]
+active_belief_spans = active_example['bspn_input'].split("> <")
+active_example_bs = {'input':
+              [ ('<' if i > 0 else '') +
+                string +
+                ('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
+                for i, string in enumerate(active_belief_spans)],
+          'generated': active_example['bspn_gen'],
+          'gold': active_example['bspn']}
+st.write(active_example_bs)
+"""
+##### Generated Overlap
+"""
+annotated_text(*bs_unigram_match_annotated_text(active_example_bs))
+"""
+---
+### Response Generation
+Below is the predicted response as a sequence.
+- `input` denotes the input, which has been transformed into a list for
+human readability but is presented to the model as a sequence.
+- `gold` is the target response sequence
+- `generated` is the model generated response
+"""
+#title = st.selectbox("Development Example", titles)
+active_example_resp = {'input':
+              [ ('<' if i > 0 else '') +
+                string +
+                ('>' if string[-1] is not '>' else '')
+                for i, string in enumerate(active_example['resp_input'].split("> <"))],
+          'generated': active_example['resp_gen'],
+          'gold': active_example['resp']}
+st.write(active_example_resp)
+"""
+##### Generated Overlap
+"""
+annotated_text(*bs_unigram_match_annotated_text(active_example_resp))

output/bart-100ish-examples.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

output/dialogpt-100ish-examples.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

output/pptod-small-10-percent.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

output/t5-small-10-percent.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff