Spaces:
Runtime error
Runtime error
Brendan King
commited on
Commit
·
ce78cc4
1
Parent(s):
07aa55e
Initial run: example explorer in huggingface
Browse files- app.py +127 -0
- output/bart-100ish-examples.jsonl +0 -0
- output/dialogpt-100ish-examples.jsonl +0 -0
- output/pptod-small-10-percent.jsonl +0 -0
- output/t5-small-10-percent.jsonl +0 -0
app.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from typing import List, Dict, Tuple, Union, Any
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
from annotated_text import annotated_text
|
6 |
+
sidebar = st.sidebar
|
7 |
+
def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
|
8 |
+
gold_set = set(belief_state_example['gold'].split(' '))
|
9 |
+
input_set = set(" ".join(belief_state_example['input']).split(' '))
|
10 |
+
generated = belief_state_example['generated']
|
11 |
+
result = []
|
12 |
+
for word in generated.split(' '):
|
13 |
+
if word in gold_set:
|
14 |
+
result.append((word, 'gold', '#dfd')) # gold overlap => label green
|
15 |
+
elif word in input_set:
|
16 |
+
result.append((word, 'in', '#eea')) # input overlap => label yellow
|
17 |
+
else:
|
18 |
+
result.append(word + ' ') # no overlap => no label (replace space)
|
19 |
+
return result
|
20 |
+
|
21 |
+
# load in data
|
22 |
+
pptod_examples: List[Dict] = []
|
23 |
+
|
24 |
+
models: Dict[str, Dict[str, Any]] = {
|
25 |
+
'pptod-small': {
|
26 |
+
'name': 'pptod-small',
|
27 |
+
'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, '
|
28 |
+
'it is familiar with the meaning of these special separator tokens. However, it does not have'
|
29 |
+
'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
|
30 |
+
'is unaware of the particular slot name conventions of MultiWoZ.',
|
31 |
+
'output_file': './output/pptod-small-10-percent.jsonl'
|
32 |
+
},
|
33 |
+
't5-small': {
|
34 |
+
'name': 't5-small',
|
35 |
+
'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
|
36 |
+
'that the model could not be familiar with.',
|
37 |
+
'output_file': './output/t5-small-10-percent.jsonl'
|
38 |
+
},
|
39 |
+
'bart': {
|
40 |
+
'name': 'bart',
|
41 |
+
'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
|
42 |
+
'that the model could not be familiar with.',
|
43 |
+
'output_file': './output/bart-100ish-examples.jsonl'
|
44 |
+
},
|
45 |
+
'dialogpt': {
|
46 |
+
'name': 'dialogpt',
|
47 |
+
'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
|
48 |
+
'output_file': './output/dialogpt-100ish-examples.jsonl'
|
49 |
+
}
|
50 |
+
}
|
51 |
+
for model_def in models.values():
|
52 |
+
model_def['examples'] = []
|
53 |
+
with open(model_def['output_file'], 'r') as f:
|
54 |
+
for line in f.readlines():
|
55 |
+
model_def['examples'].append(json.loads(line.strip()))
|
56 |
+
|
57 |
+
|
58 |
+
model_names = list(models.keys())
|
59 |
+
|
60 |
+
|
61 |
+
model_name = sidebar.selectbox('Model', model_names)
|
62 |
+
active_model = models[model_name]
|
63 |
+
|
64 |
+
st.write(f"""
|
65 |
+
#### Inputs
|
66 |
+
|
67 |
+
**Selected Model:** `{active_model['name']}`
|
68 |
+
|
69 |
+
{active_model['description']}
|
70 |
+
|
71 |
+
""")
|
72 |
+
"""
|
73 |
+
### Belief State Prediction
|
74 |
+
|
75 |
+
Below is the predicted belief state as a sequence.
|
76 |
+
|
77 |
+
- `input` denotes the input, which has been transformed into a list for
|
78 |
+
human readability but is presented to the model as a sequence.
|
79 |
+
- `gold` is the target belief state in sequence form (slot-name slot-value pairs)
|
80 |
+
- `generated` is the model generated belief state sequence
|
81 |
+
"""
|
82 |
+
titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
|
83 |
+
title = sidebar.selectbox("Development Example", titles)
|
84 |
+
active_example = active_model['examples'][int(title[0])][0]
|
85 |
+
|
86 |
+
active_belief_spans = active_example['bspn_input'].split("> <")
|
87 |
+
active_example_bs = {'input':
|
88 |
+
[ ('<' if i > 0 else '') +
|
89 |
+
string +
|
90 |
+
('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
|
91 |
+
for i, string in enumerate(active_belief_spans)],
|
92 |
+
'generated': active_example['bspn_gen'],
|
93 |
+
'gold': active_example['bspn']}
|
94 |
+
|
95 |
+
st.write(active_example_bs)
|
96 |
+
"""
|
97 |
+
##### Generated Overlap
|
98 |
+
"""
|
99 |
+
annotated_text(*bs_unigram_match_annotated_text(active_example_bs))
|
100 |
+
|
101 |
+
"""
|
102 |
+
---
|
103 |
+
|
104 |
+
### Response Generation
|
105 |
+
|
106 |
+
Below is the predicted response as a sequence.
|
107 |
+
|
108 |
+
- `input` denotes the input, which has been transformed into a list for
|
109 |
+
human readability but is presented to the model as a sequence.
|
110 |
+
- `gold` is the target response sequence
|
111 |
+
- `generated` is the model generated response
|
112 |
+
"""
|
113 |
+
#title = st.selectbox("Development Example", titles)
|
114 |
+
|
115 |
+
active_example_resp = {'input':
|
116 |
+
[ ('<' if i > 0 else '') +
|
117 |
+
string +
|
118 |
+
('>' if string[-1] is not '>' else '')
|
119 |
+
for i, string in enumerate(active_example['resp_input'].split("> <"))],
|
120 |
+
'generated': active_example['resp_gen'],
|
121 |
+
'gold': active_example['resp']}
|
122 |
+
|
123 |
+
st.write(active_example_resp)
|
124 |
+
"""
|
125 |
+
##### Generated Overlap
|
126 |
+
"""
|
127 |
+
annotated_text(*bs_unigram_match_annotated_text(active_example_resp))
|
output/bart-100ish-examples.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
output/dialogpt-100ish-examples.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
output/pptod-small-10-percent.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
output/t5-small-10-percent.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|