LightChen2333 commited on
Commit
223340a
·
1 Parent(s): ae0617b

Upload 78 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +136 -0
  2. README.md +259 -10
  3. __init__.py +1 -0
  4. accelerate/config-old.yaml +16 -0
  5. accelerate/config.yaml +22 -0
  6. app.py +28 -7
  7. common/config.py +5 -4
  8. common/global_pool.py +26 -0
  9. common/loader.py +2 -3
  10. common/logger.py +46 -6
  11. common/metric.py +4 -1
  12. common/model_manager.py +221 -125
  13. common/saver.py +80 -0
  14. common/tokenizer.py +14 -2
  15. common/utils.py +12 -2
  16. config/README.md +348 -0
  17. config/app.yaml +1 -104
  18. config/decoder/interaction/stack-propagation.yaml +1 -0
  19. config/examples/README.md +38 -0
  20. config/examples/from_pretrained.yaml +53 -0
  21. config/examples/from_pretrained_multi.yaml +55 -0
  22. config/examples/normal.yaml +70 -0
  23. config/examples/reload_to_train.yaml +71 -0
  24. config/reproduction/atis/bi-model.yaml +106 -0
  25. config/reproduction/atis/dca-net.yaml +88 -0
  26. config/reproduction/atis/deberta.yaml +67 -0
  27. config/reproduction/atis/electra.yaml +67 -0
  28. config/reproduction/atis/joint-bert.yaml +70 -0
  29. config/reproduction/atis/roberta.yaml +70 -0
  30. config/reproduction/atis/slot-gated.yaml +87 -0
  31. config/reproduction/atis/stack-propagation.yaml +109 -0
  32. config/reproduction/mix-atis/agif.yaml +133 -0
  33. config/reproduction/mix-atis/gl-gin.yaml +128 -0
  34. config/reproduction/mix-atis/vanilla.yaml +95 -0
  35. config/reproduction/mix-snips/agif.yaml +131 -0
  36. config/reproduction/mix-snips/gl-gin.yaml +131 -0
  37. config/reproduction/mix-snips/vanilla.yaml +95 -0
  38. config/reproduction/snips/bi-model.yaml +104 -0
  39. config/reproduction/snips/dca_net.yaml +88 -0
  40. config/reproduction/snips/deberta.yaml +70 -0
  41. config/reproduction/snips/electra.yaml +69 -0
  42. config/reproduction/snips/joint-bert.yaml +75 -0
  43. config/reproduction/snips/roberta.yaml +70 -0
  44. config/reproduction/snips/slot-gated.yaml +87 -0
  45. config/reproduction/snips/stack-propagation.yaml +105 -0
  46. config/visual.yaml +6 -0
  47. model/decoder/base_decoder.py +24 -11
  48. model/encoder/auto_encoder.py +1 -1
  49. model/encoder/non_pretrained_encoder.py +2 -2
  50. model/encoder/pretrained_encoder.py +8 -3
.gitignore ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ .idea/
6
+ wandb/*
7
+ save/*
8
+ !save/.gitkeep
9
+ logs/*
10
+ !logs/.gitkeep
11
+ test
12
+ # C extensions
13
+ *.so
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ pip-wheel-metadata/
30
+ share/python-wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ MANIFEST
35
+
36
+ # PyInstaller
37
+ # Usually these files are written by a python script from a template
38
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
39
+ *.manifest
40
+ *.spec
41
+
42
+ # Installer logs
43
+ pip-log.txt
44
+ pip-delete-this-directory.txt
45
+
46
+ # Unit test / coverage reports
47
+ htmlcov/
48
+ .tox/
49
+ .nox/
50
+ .coverage
51
+ .coverage.*
52
+ .cache
53
+ nosetests.xml
54
+ coverage.xml
55
+ *.cover
56
+ *.py,cover
57
+ .hypothesis/
58
+ .pytest_cache/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101
+ __pypackages__/
102
+
103
+ # Celery stuff
104
+ celerybeat-schedule
105
+ celerybeat.pid
106
+
107
+ # SageMath parsed files
108
+ *.sage.py
109
+
110
+ # Environments
111
+ .env
112
+ .venv
113
+ env/
114
+ venv/
115
+ ENV/
116
+ env.bak/
117
+ venv.bak/
118
+
119
+ # Spyder project settings
120
+ .spyderproject
121
+ .spyproject
122
+
123
+ # Rope project settings
124
+ .ropeproject
125
+
126
+ # mkdocs documentation
127
+ /site
128
+
129
+ # mypy
130
+ .mypy_cache/
131
+ .dmypy.json
132
+ dmypy.json
133
+
134
+ # Pyre type checker
135
+ .pyre/
136
+ .vscode/
README.md CHANGED
@@ -1,13 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: OpenSLU
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 3.17.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
 
 
 
 
 
 
 
 
 
11
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/OpenSLU.jpg" alt=""/>
2
+
3
+ ---
4
+
5
+ <p align="center">
6
+ <a >
7
+ <img alt="version" src="https://img.shields.io/badge/version-v0.1.0-blue?color=FF8000?color=009922" />
8
+ </a>
9
+ <a >
10
+ <img alt="Status-building" src="https://img.shields.io/badge/Status-building-blue" />
11
+ </a>
12
+ <a href=""><img src="https://img.shields.io/badge/python-3.6.2+-orange.svg"></a>
13
+ <a >
14
+ <img alt="PRs-Welcome" src="https://img.shields.io/badge/PRs-Welcome-red" />
15
+ </a>
16
+ <a>
17
+ <img alt="stars" src="https://img.shields.io/github/stars/LightChen233/OpenSLU" />
18
+ </a>
19
+ <a href="https://github.com/LightChen233/OpenSLU/network/members">
20
+ <img alt="FORK" src="https://img.shields.io/github/forks/LightChen233/OpenSLU?color=FF8000" />
21
+ </a>
22
+ <a href="https://github.com/LightChen233/OpenSLU/issues">
23
+ <img alt="Issues" src="https://img.shields.io/github/issues/LightChen233/OpenSLU?color=0088ff"/>
24
+ </a>
25
+ <br />
26
+ </p>
27
+
28
+ ## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/motivation.png" width="25" /> Motivation
29
+
30
+ Spoken Language Understanding (SLU) is one of the core components of a task-oriented dialogue system, which aims to extract the semantic meaning of user queries (e.g., intents and slots).
31
+
32
+ In this work, we introduce __OpenSLU__, an open-source toolkit to provide a unified, modularized, and extensible toolkit for spoken language understanding. Specifically, OpenSLU unifies 10 SLU baselines for both single-intent and multi-intent scenarios, which support both non-pretrained and pretrained models simultaneously. Additionally, OpenSLU is highly modularized and extensible by decomposing the model architecture, inference, and learning process into reusable modules, which allows researchers to quickly set up SLU experiments with highly flexible configurations. We hope OpenSLU can help researcher to quickly initiate experiments and spur more breakthroughs in SLU.
33
+
34
+ ## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/notes.png" width="25" /> Changelog
35
+ - 2023-02-09
36
+ - We build the first version and release it.
37
+
38
+ ## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/resource.png" width="25" /> Installation
39
+ ### System requirements
40
+ OpenSLU requires `Python>=3.8`, and `torch>=1.12.0`.
41
+ ### Install from git
42
+ ```bash
43
+ git clone https://github.com/LightChen2333/OpenSLU.git && cd OpenSLU/
44
+ pip install -r requirements.txt
45
+ ```
46
+
47
+
48
+ ## File Structure
49
+
50
+ ```yaml
51
+ root
52
+ ├── common
53
+ │ ├── config.py # load configuration and auto preprocess ignored config
54
+ │ ├── loader.py # load data from hugging face
55
+ │ ├── logger.py # log predict result, support [fitlog], [wandb], [local logging]
56
+ │ ├── metric.py # evalutation metric, support [intent acc], [slot F1], [EMA]
57
+ │ ├── model_manager.py # help to prepare data, prebuild training progress.
58
+ │ ├── tokenizer.py # tokenizer also support no-pretrained model for word tokenizer.
59
+ │ └── utils.py # canonical model communication data structure and other common tool function
60
+ ├── config
61
+ │ ├── reproduction # configurations for reproducted SLU model.
62
+ │ └── **.yaml # configuration for SLU model.
63
+ ├── logs # local log storage dir path.
64
+ ├── model
65
+ │ ├── encoder
66
+ │ │ ├── base_encoder.py # base encoder model. All implemented encoder models need to inherit the BaseEncoder class
67
+ │ │ ├── auto_encoder.py # auto-encoder to autoload provided encoder model
68
+ │ │ ├── non_pretrained_encoder.py # all common-used no pretrained encoder like lstm, lstm+self-attention
69
+ │ │ └── pretrained_encoder.py # all common-used pretrained encoder, implemented by hugging-face [AutoModel].
70
+ │ ├── decoder
71
+ │ │ ├── interaction
72
+ │ │ │ ├── base_interaction.py # base interaction model. All implemented encoder models need to inherit the BaseInteraction class
73
+ │ │ │ └── *_interaction.py # some SOTA SLU interaction module. You can easily reuse or rewrite to implement your own idea.
74
+ │ │ ├── base_decoder.py # decoder class, [BaseDecoder] support classification after interaction, also you can rewrite for your own interaction order
75
+ │ │ └── classifier.py # classifier class, support linear and LSTM classification. Also support token-level intent.
76
+ │ └── open_slu_model.py # the general model class, can automatically build the model through configuration.
77
+ ├── save # model checkpoint storage dir path and dir to automatically save glove embedding.
78
+ └── run.py # run script for all function.
79
+ ```
80
+
81
+ ## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/catalogue.png" width="27" /> Quick Start
82
+
83
+ ### 1. Reproducing Existing Models
84
+ Example for reproduction of `slot-gated` model:
85
+
86
+ ```bash
87
+ python run.py --dataset atis --model slot-gated
88
+ ```
89
+
90
+ ### 2. Customizable Combination Existing Components
91
+ 1. First, you can freely combine and build your own model through config files. For details, see [Configuration](config/README.md).
92
+ 2. Then, you can assign the configuration path to train your own model.
93
+
94
+ Example for `stack-propagation` fine-tuning:
95
+
96
+ ```bash
97
+ python run.py -cp config/stack-propagation.yaml
98
+ ```
99
+
100
+ Example for multi-GPU fine-tuning:
101
+
102
+ ```bash
103
+ accelerate config
104
+ accelerate launch run.py -cp config/stack-propagation.yaml
105
+ ```
106
+
107
+ Or you can assign `accelerate` yaml configuration.
108
+
109
+ ```bash
110
+ accelerate launch [--config_file ./accelerate/config.yaml] run.py -cp config/stack-propagation.yaml
111
+ ```
112
+
113
+ ### 3. Implementing a New SLU Model
114
+ In OpenSLU, you are only needed to rewrite required commponents and assign them in configuration instead of rewriting all commponents.
115
+
116
+ In most cases, rewriting Interaction module is enough for building a new SLU model.
117
+ This module accepts [HiddenData](./common/utils.py) as input and return with `HiddenData`, which contains the `hidden_states` for `intent` and `slot`, and other helpful information. The example is as follows:
118
+ ```python
119
+ class NewInteraction(BaseInteraction):
120
+     def __init__(self, **config):
121
+         self.config = config
122
+         ...
123
+    
124
+     def forward(self, hiddens: HiddenData):
125
+         ...
126
+         intent, slot = self.func(hiddens)
127
+         hiddens.update_slot_hidden_state(slot)
128
+         hiddens.update_intent_hidden_state(intent)
129
+         return hiddens
130
+ ```
131
+
132
+ To further meet the
133
+ needs of complex exploration, we provide the
134
+ [BaseDecoder](./model/decoder/base_decoder.py) class, and the user can simply override the `forward()` function in class, which accepts `HiddenData` as input and `OutputData` as output. The example is as follows:
135
+ ```python
136
+ class NewDecoder(BaseDecoder):
137
+     def __init__(self,
138
+         intent_classifier,
139
+         slot_classifier,
140
+         interaction=None):
141
+         ...
142
+         self.int_cls = intent_classifier
143
+         self.slot_cls = slot_classifier
144
+         self.interaction = interaction
145
+        
146
+     def forward(self, hiddens: HiddenData):
147
+         ...
148
+         interact = self.interaction(hiddens)
149
+         slot = self.slot_cls(interact.slot)
150
+         intent = self.int_cls(interact.intent)
151
+         return OutputData(intent, slot)
152
+ ```
153
+
154
+
155
+ ## Modules
156
+
157
+ ### 1. Encoder Modules
158
+
159
+ - **No Pretrained Encoder**
160
+ - GloVe Embedding
161
+ - BiLSTM Encoder
162
+ - BiLSTM + Self-Attention Encoder
163
+ - Bi-Encoder (support two encoders for intent and slot, respectively)
164
+ - **Pretrained Encoder**
165
+ - `bert-base-uncased`
166
+ - `roberta-base`
167
+ - `microsoft/deberta-v3-base`
168
+ - other hugging-face supported encoder model...
169
+
170
+ ### 2. Decoder Modules
171
+
172
+ #### 2.1 Interaction Modules
173
+
174
+ - DCA Net Interaction
175
+ - Stack Propagation Interaction
176
+ - Bi-Model Interaction(with decoder/without decoder)
177
+ - Slot Gated Interaction
178
+
179
+ #### 2.2 Classification Modules
180
+ All classifier support `Token-level Intent` and `Sentence-level intent`. What's more, our decode function supports to both `Single-Intent` and `Multi-Intent`.
181
+ - LinearClassifier
182
+ - AutoregressiveLSTMClassifier
183
+ - MLPClassifier
184
+
185
+ ### 3. Supported Models
186
+ We implement various 10 common-used SLU baselines:
187
+
188
  ---
189
+ **Single-Intent Model**
190
+ - Bi-Model \[ [Wang et al., 2018](https://aclanthology.org/N18-2050/) \] :
191
+ - `bi-model.yaml`
192
+ - Slot-Gated \[ [Goo et al., 2018](https://www.csie.ntu.edu.tw/~yvchen/doc/NAACL18_SlotGated.pdf) \] :
193
+ - `slot-gated.yaml`
194
+ - Stack-Propagation \[ [Qin et al., 2019](https://www.aclweb.org/anthology/D19-1214/) \] :
195
+ - `stack-propagation.yaml`
196
+ - Joint Bert \[ [Chen et al., 2019](https://arxiv.org/abs/1902.10909) \] :
197
+ - `joint-bert.yaml`
198
+ - RoBERTa \[ [Liu et al., 2019](https://arxiv.org/abs/1907.11692) \] :
199
+ - `roberta.yaml`
200
+ - ELECTRA \[ [Clark et al., 2020](https://arxiv.org/abs/2003.10555) \] :
201
+ - `electra.yaml`
202
+ - DCA-Net \[ [Qin et al., 2021](https://arxiv.org/abs/2010.03880) \] :
203
+ - `dca_net.yaml`
204
+ - DeBERTa \[ [He et al., 2021](https://arxiv.org/abs/2111.09543) \] :
205
+ - `deberta.yaml`
206
+
207
  ---
208
+ **Multi-Intent Model**
209
+ - AGIF \[ [Qin et al., 2020](https://arxiv.org/pdf/2004.10087.pdf) \] :
210
+ - `agif.yaml`
211
+ - GL-GIN \[ [Qin et al., 2021](https://arxiv.org/abs/2106.01925) \] :
212
+ - `gl-gin.yaml`
213
+
214
+
215
+ ## Application
216
+ ### 1. Visualization Tools
217
+ Model metrics tests alone no longer adequately reflect the model's performance. To help researchers further improve their models, we provide a tool for visual error analysis.
218
+
219
+ We provide an analysis interface with three main parts:
220
+ - (a) error distribution analysis;
221
+ - (b) label transfer analysis;
222
+ - (c) instance analysis.
223
+
224
+ <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/visual_analysis.png" />
225
+
226
+ ```bash
227
+ python tools/visualization.py \
228
+ --config_path config/visual.yaml \
229
+ --output_path {ckpt_dir}/outputs.jsonl
230
+ ```
231
+ Visualization configuration can be set as below:
232
+ ```yaml
233
+ host: 127.0.0.1
234
+ port: 7861
235
+ is_push_to_public: true # whether to push to gradio platform(public network)
236
+ output_path: save/stack/outputs.jsonl # output prediction file path
237
+ page-size: 2 # the number of instances of each page in instance anlysis.
238
+ ```
239
+ ### 2. Deployment
240
+
241
+ We provide an script to deploy your model automatically. You are only needed to run the command as below to deploy your own model:
242
+
243
+ ```bash
244
+ python app.py --config_path config/reproduction/atis/bi-model.yaml
245
+ ```
246
+
247
+ <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/app.png" />
248
+
249
+ ### 3. Publish your model to hugging face
250
+
251
+ We also offer an script to transfer models trained by OpenSLU to hugging face format automatically. And you can upload the model to your `Model` space.
252
+
253
+ ```shell
254
+ python tools/parse_to_hugging_face.py -cp config/reproduction/atis/bi-model.yaml -op save/temp
255
+ ```
256
+
257
+ It will generate 5 files, and you should only need to upload `config.json`, `pytorch_model.bin` and `tokenizer.pkl`.
258
+ After that, others can reproduction your model just by adjust `_from_pretrained_` parameters in Configuration.
259
+
260
+ ## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/intro.png" width="25" /> Contact
261
 
262
+ Please create Github issues here or email [Libo Qin](mailto:lbqin@ir.hit.edu.cn) or [Qiguang Chen](mailto:[email protected]) if you have any questions or suggestions.
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
accelerate/config-old.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ deepspeed_config: {}
3
+ distributed_type: MULTI_GPU
4
+ downcast_bf16: 'no'
5
+ fsdp_config: {}
6
+ gpu_ids: all
7
+ machine_rank: 0
8
+ main_process_ip: null
9
+ main_process_port: 9001
10
+ main_training_function: main
11
+ mixed_precision: 'no'
12
+ num_machines: 0
13
+ num_processes: 2
14
+ rdzv_backend: static
15
+ same_network: true
16
+ use_cpu: false
accelerate/config.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ command_file: null
2
+ commands: null
3
+ compute_environment: LOCAL_MACHINE
4
+ deepspeed_config: {}
5
+ distributed_type: 'NO'
6
+ downcast_bf16: 'no'
7
+ dynamo_backend: 'NO'
8
+ fsdp_config: {}
9
+ gpu_ids: all
10
+ machine_rank: 0
11
+ main_process_ip: null
12
+ main_process_port: null
13
+ main_training_function: main
14
+ megatron_lm_config: {}
15
+ mixed_precision: 'no'
16
+ num_machines: 1
17
+ num_processes: 2
18
+ rdzv_backend: static
19
+ same_network: true
20
+ tpu_name: null
21
+ tpu_zone: null
22
+ use_cpu: false
app.py CHANGED
@@ -1,11 +1,31 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import os
3
  from common.config import Config
4
  from common.model_manager import ModelManager
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- config = Config.load_from_yaml("config/app.yaml")
7
  model_manager = ModelManager(config)
8
- model_manager.load()
9
 
10
 
11
  def text_analysis(text):
@@ -34,9 +54,10 @@ demo = gr.Interface(
34
  gr.Textbox(placeholder="Enter sentence here..."),
35
  ["html"],
36
  examples=[
37
- ["What a beautiful morning for a walk!"],
38
- ["It was the best of times, it was the worst of times."],
39
  ],
40
  )
41
-
42
- demo.launch()
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ LastEditors: Qiguang Chen
4
+ Date: 2023-02-07 15:42:32
5
+ LastEditTime: 2023-02-19 21:04:03
6
+ Description:
7
+
8
+ '''
9
+ import argparse
10
  import gradio as gr
11
+
12
  from common.config import Config
13
  from common.model_manager import ModelManager
14
+ from common.utils import str2bool
15
+
16
+
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument('--config_path', '-cp', type=str, default="config/examples/from_pretrained.yaml")
19
+ parser.add_argument('--push_to_public', '-p', type=str2bool, nargs='?',
20
+ const=True, default=False,
21
+ help="Push to public network.")
22
+ args = parser.parse_args()
23
+ config = Config.load_from_yaml(args.config_path)
24
+ config.base["train"] = False
25
+ config.base["test"] = False
26
 
 
27
  model_manager = ModelManager(config)
28
+ model_manager.init_model()
29
 
30
 
31
  def text_analysis(text):
 
54
  gr.Textbox(placeholder="Enter sentence here..."),
55
  ["html"],
56
  examples=[
57
+ ["i would like to find a flight from charlotte to las vegas that makes a stop in st louis"],
 
58
  ],
59
  )
60
+ if args.push_to_public:
61
+ demo.launch(share=True)
62
+ else:
63
+ demo.launch()
common/config.py CHANGED
@@ -2,7 +2,7 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-01-26 10:55:43
6
  Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
7
 
8
  '''
@@ -18,7 +18,8 @@ class Config(dict):
18
  dict.__init__(self, *args, **kwargs)
19
  self.__dict__ = self
20
  self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
21
- self.__autowired()
 
22
 
23
  @staticmethod
24
  def load_from_yaml(file_path:str)->"Config":
@@ -46,8 +47,8 @@ class Config(dict):
46
  Returns:
47
  Config: _description_
48
  """
49
- if args.model is not None:
50
- args.config_path = "config/" + args.model + ".yaml"
51
  config = Config.load_from_yaml(args.config_path)
52
  if args.dataset is not None:
53
  config.__update_dataset(args.dataset)
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-15 17:58:53
6
  Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
7
 
8
  '''
 
18
  dict.__init__(self, *args, **kwargs)
19
  self.__dict__ = self
20
  self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
21
+ if not self.model.get("_from_pretrained_"):
22
+ self.__autowired()
23
 
24
  @staticmethod
25
  def load_from_yaml(file_path:str)->"Config":
 
47
  Returns:
48
  Config: _description_
49
  """
50
+ if args.model is not None and args.dataset is not None:
51
+ args.config_path = f"config/reproduction/{args.dataset}/{args.model}.yaml"
52
  config = Config.load_from_yaml(args.config_path)
53
  if args.dataset is not None:
54
  config.__update_dataset(args.dataset)
common/global_pool.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ LastEditors: Qiguang Chen
4
+ Date: 2023-02-12 14:35:37
5
+ LastEditTime: 2023-02-12 14:37:40
6
+ Description:
7
+
8
+ '''
9
+ def _init():
10
+ global _global_dict
11
+ _global_dict = {}
12
+
13
+
14
+ def set_value(key, value):
15
+ # set gobal value to object pool
16
+ _global_dict[key] = value
17
+
18
+
19
+ def get_value(key):
20
+ # get gobal value from object pool
21
+ try:
22
+ return _global_dict[key]
23
+ except:
24
+ print('读取' + key + '失败\r\n')
25
+
26
+
common/loader.py CHANGED
@@ -2,7 +2,7 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-02-07 19:26:06
6
  Description: all class for load data.
7
 
8
  '''
@@ -36,14 +36,13 @@ class DataFactory(object):
36
  return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
37
 
38
  def load_dataset(self, dataset_config, split="train"):
39
- # TODO: 关闭use_auth_token
40
  dataset_name = None
41
  if split not in dataset_config:
42
  dataset_name = dataset_config.get("dataset_name")
43
  elif self.__is_supported_datasets(dataset_config[split]):
44
  dataset_name = dataset_config[split].lower()
45
  if dataset_name is not None:
46
- return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split, use_auth_token=True)
47
  else:
48
  data_file = dataset_config[split]
49
  data_dict = {"text": [], "slot": [], "intent":[]}
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-19 15:39:48
6
  Description: all class for load data.
7
 
8
  '''
 
36
  return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
37
 
38
  def load_dataset(self, dataset_config, split="train"):
 
39
  dataset_name = None
40
  if split not in dataset_config:
41
  dataset_name = dataset_config.get("dataset_name")
42
  elif self.__is_supported_datasets(dataset_config[split]):
43
  dataset_name = dataset_config[split].lower()
44
  if dataset_name is not None:
45
+ return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split)
46
  else:
47
  data_file = dataset_config[split]
48
  data_dict = {"text": [], "slot": [], "intent":[]}
common/logger.py CHANGED
@@ -2,14 +2,17 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-02-02 16:29:13
6
  Description: log manager
7
 
8
  '''
 
9
  import json
10
  import os
11
  import time
12
  from common.config import Config
 
 
13
 
14
  def mkdirs(dir_names):
15
  for dir_name in dir_names:
@@ -71,7 +74,7 @@ class Logger():
71
  self.other_log_file = os.path.join(self.output_dir, "/other_log.jsonl")
72
  with open(self.other_log_file, "w", encoding="utf8") as f:
73
  print(f"Other Log Result will be written to {self.other_log_file}")
74
- import logging
75
  LOGGING_LEVEL_MAP = {
76
  "CRITICAL": logging.CRITICAL,
77
  "FATAL": logging.FATAL,
@@ -82,10 +85,47 @@ class Logger():
82
  "DEBUG": logging.DEBUG,
83
  "NOTSET": logging.NOTSET,
84
  }
85
- logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
86
- filename=os.path.join(self.output_dir, "log.log"), level=LOGGING_LEVEL_MAP[logging_level])
87
- self.logging = logging
88
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def set_config(self, config: Config):
90
  """save config
91
 
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-17 20:38:38
6
  Description: log manager
7
 
8
  '''
9
+ import datetime
10
  import json
11
  import os
12
  import time
13
  from common.config import Config
14
+ import logging
15
+ import colorlog
16
 
17
  def mkdirs(dir_names):
18
  for dir_name in dir_names:
 
74
  self.other_log_file = os.path.join(self.output_dir, "/other_log.jsonl")
75
  with open(self.other_log_file, "w", encoding="utf8") as f:
76
  print(f"Other Log Result will be written to {self.other_log_file}")
77
+
78
  LOGGING_LEVEL_MAP = {
79
  "CRITICAL": logging.CRITICAL,
80
  "FATAL": logging.FATAL,
 
85
  "DEBUG": logging.DEBUG,
86
  "NOTSET": logging.NOTSET,
87
  }
88
+ # logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
89
+ # filename=os.path.join(self.output_dir, "log.log"), level=LOGGING_LEVEL_MAP[logging_level])
90
+
91
+ # logger = logging.getLogger()
92
+ # KZT = logging.StreamHandler()
93
+ # KZT.setLevel(logging.DEBUG)
94
+ # logger.addHandler(KZT)
95
+
96
+ self.logging = self._get_logging_logger(logging_level)
97
+
98
+ def _get_logging_logger(self, level="INFO"):
99
+ log_colors_config = {
100
+ 'DEBUG': 'cyan',
101
+ 'INFO': 'blue',
102
+ 'WARNING': 'yellow',
103
+ 'ERROR': 'red',
104
+ 'CRITICAL': 'red,bg_white',
105
+ }
106
+
107
+ logger = logging.getLogger()
108
+ logger.setLevel(level)
109
+
110
+ log_path = os.path.join(self.output_dir, "log.log")
111
+
112
+ if not logger.handlers:
113
+ sh = logging.StreamHandler()
114
+ fh = logging.FileHandler(filename=log_path, mode='a', encoding="utf-8")
115
+ fmt = logging.Formatter(
116
+ fmt='[%(levelname)s - %(asctime)s]\t%(message)s',
117
+ datefmt='%m/%d/%Y %I:%M:%S %p')
118
+
119
+ sh_fmt = colorlog.ColoredFormatter(
120
+ fmt='%(log_color)s[%(levelname)s - %(asctime)s]\t%(message)s',
121
+ datefmt='%m/%d/%Y %I:%M:%S %p',
122
+ log_colors=log_colors_config)
123
+ sh.setFormatter(fmt=sh_fmt)
124
+ fh.setFormatter(fmt=fmt)
125
+ logger.addHandler(sh)
126
+ logger.addHandler(fh)
127
+ return logger
128
+
129
  def set_config(self, config: Config):
130
  """save config
131
 
common/metric.py CHANGED
@@ -2,7 +2,7 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-01-26 12:12:55
6
  Description: Metric calculation class
7
 
8
  '''
@@ -198,6 +198,8 @@ class Evaluator(object):
198
  lastPredTag = 'O'
199
  lastPredType = ''
200
  for c, p in zip(correct_slot, pred_slot):
 
 
201
  correctTag, correctType = Evaluator.__splitTagType(c)
202
  predTag, predType = Evaluator.__splitTagType(p)
203
 
@@ -317,6 +319,7 @@ class Evaluator(object):
317
  use_intent = output.intent_ids is not None and len(
318
  output.intent_ids) > 0
319
  if use_slot and "slot_f1" in metric_list:
 
320
  res_dict["slot_f1"] = Evaluator.computeF1Score(
321
  output.slot_ids, inps.slot)
322
  if use_intent and "intent_acc" in metric_list:
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-17 19:39:22
6
  Description: Metric calculation class
7
 
8
  '''
 
198
  lastPredTag = 'O'
199
  lastPredType = ''
200
  for c, p in zip(correct_slot, pred_slot):
201
+ c = str(c)
202
+ p = str(p)
203
  correctTag, correctType = Evaluator.__splitTagType(c)
204
  predTag, predType = Evaluator.__splitTagType(p)
205
 
 
319
  use_intent = output.intent_ids is not None and len(
320
  output.intent_ids) > 0
321
  if use_slot and "slot_f1" in metric_list:
322
+
323
  res_dict["slot_f1"] = Evaluator.computeF1Score(
324
  output.slot_ids, inps.slot)
325
  if use_intent and "intent_acc" in metric_list:
common/model_manager.py CHANGED
@@ -2,11 +2,13 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-02-08 00:57:09
6
  Description: manage all process of model training and prediction.
7
 
8
  '''
 
9
  import os
 
10
  import random
11
 
12
  import numpy as np
@@ -18,11 +20,15 @@ from common import utils
18
  from common.loader import DataFactory
19
  from common.logger import Logger
20
  from common.metric import Evaluator
 
21
  from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
22
  from common.utils import InputData, instantiate
23
  from common.utils import OutputData
24
  from common.config import Config
25
  import dill
 
 
 
26
 
27
 
28
  class ModelManager(object):
@@ -33,45 +39,101 @@ class ModelManager(object):
33
  config (Config): configuration to manage all process in OpenSLU
34
  """
35
  # init config
 
36
  self.config = config
37
  self.__set_seed(self.config.base.get("seed"))
38
  self.device = self.config.base.get("device")
39
-
 
 
 
 
40
  # enable accelerator
41
  if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
42
  from accelerate import Accelerator
43
- self.accelerator = Accelerator(log_with="wandb")
44
  else:
45
  self.accelerator = None
 
 
46
  if self.config.base.get("train"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  self.tokenizer = get_tokenizer(
48
  self.config.tokenizer.get("_tokenizer_name_"))
49
- self.logger = Logger(
50
- "wandb", self.config.base["name"], start_time=config.start_time, accelerator=self.accelerator)
 
 
 
 
 
 
 
 
51
 
 
 
 
 
 
 
 
 
 
 
 
52
  # init dataloader & load data
53
- if self.config.base.get("save_dir"):
54
- self.model_save_dir = self.config.base["save_dir"]
55
- else:
56
- if not os.path.exists("save/"):
57
- os.mkdir("save/")
58
- self.model_save_dir = "save/" + config.start_time
59
- if not os.path.exists(self.model_save_dir):
60
- os.mkdir(self.model_save_dir)
61
- batch_size = self.config.base["batch_size"]
62
- df = DataFactory(tokenizer=self.tokenizer,
63
- use_multi_intent=self.config.base.get("multi_intent"),
64
- to_lower_case=self.config.base.get("_to_lower_case_"))
65
- train_dataset = df.load_dataset(self.config.dataset, split="train")
66
 
67
- # update label and vocabulary
68
- df.update_label_names(train_dataset)
69
- df.update_vocabulary(train_dataset)
70
 
71
- # init tokenizer config and dataloaders
72
- tokenizer_config = {key: self.config.tokenizer[key]
73
- for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
74
- self.train_dataloader = df.get_data_loader(train_dataset,
75
  batch_size,
76
  shuffle=True,
77
  device=self.device,
@@ -80,9 +142,9 @@ class ModelManager(object):
80
  "_align_mode_"),
81
  label2tensor=True,
82
  **tokenizer_config)
83
- dev_dataset = df.load_dataset(
84
- self.config.dataset, split="validation")
85
- self.dev_dataloader = df.get_data_loader(dev_dataset,
86
  batch_size,
87
  shuffle=False,
88
  device=self.device,
@@ -91,16 +153,22 @@ class ModelManager(object):
91
  "_align_mode_"),
92
  label2tensor=False,
93
  **tokenizer_config)
94
- df.update_vocabulary(dev_dataset)
 
 
 
 
95
  # add intent label num and slot label num to config
96
- if int(self.config.get_intent_label_num()) == 0 or int(self.config.get_slot_label_num()) == 0:
97
- self.intent_list = df.intent_label_list
98
- self.intent_dict = df.intent_label_dict
99
  self.config.set_intent_label_num(len(self.intent_list))
100
- self.slot_list = df.slot_label_list
101
- self.slot_dict = df.slot_label_dict
 
102
  self.config.set_slot_label_num(len(self.slot_list))
103
- self.config.set_vocab_size(self.tokenizer.vocab_size)
 
104
 
105
  # autoload embedding for non-pretrained encoder
106
  if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
@@ -114,19 +182,13 @@ class ModelManager(object):
114
  self.config.autoload_template()
115
  # save config
116
  self.logger.set_config(self.config)
117
-
118
- self.model = None
119
- self.optimizer = None
120
- self.total_step = None
121
- self.lr_scheduler = None
122
- if self.config.tokenizer.get("_tokenizer_name_") == "word_tokenizer":
123
- self.tokenizer.save(os.path.join(self.model_save_dir, "tokenizer.json"))
124
- utils.save_json(os.path.join(
125
- self.model_save_dir, "label.json"), {"intent": self.intent_list,"slot": self.slot_list})
126
  if self.config.base.get("test"):
127
- self.test_dataset = df.load_dataset(
128
- self.config.dataset, split="test")
129
- self.test_dataloader = df.get_data_loader(self.test_dataset,
130
  batch_size,
131
  shuffle=False,
132
  device=self.device,
@@ -136,30 +198,6 @@ class ModelManager(object):
136
  label2tensor=False,
137
  **tokenizer_config)
138
 
139
- def init_model(self, model):
140
- """init model, optimizer, lr_scheduler
141
-
142
- Args:
143
- model (Any): pytorch model
144
- """
145
- self.model = model
146
- self.model.to(self.device)
147
- if self.config.base.get("train"):
148
- self.optimizer = instantiate(
149
- self.config["optimizer"])(self.model.parameters())
150
- self.total_step = int(self.config.base.get(
151
- "epoch_num")) * len(self.train_dataloader)
152
- self.lr_scheduler = instantiate(self.config["scheduler"])(
153
- optimizer=self.optimizer,
154
- num_training_steps=self.total_step
155
- )
156
- if self.accelerator is not None:
157
- self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
158
- self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
159
- if self.config.base.get("load_dir_path"):
160
- self.accelerator.load_state(self.config.base.get("load_dir_path"))
161
- # self.dev_dataloader = self.accelerator.prepare(self.dev_dataloader)
162
-
163
  def eval(self, step: int, best_metric: float) -> float:
164
  """ evaluation models.
165
 
@@ -171,31 +209,21 @@ class ModelManager(object):
171
  float: updated best metric value
172
  """
173
  # TODO: save dev
174
- _, res = self.__evaluate(self.model, self.dev_dataloader)
175
  self.logger.log_metric(res, metric_split="dev", step=step)
176
- if res[self.config.base.get("best_key")] > best_metric:
177
- best_metric = res[self.config.base.get("best_key")]
178
- outputs, test_res = self.__evaluate(
179
- self.model, self.test_dataloader)
180
- if not os.path.exists(self.model_save_dir):
181
- os.mkdir(self.model_save_dir)
182
- if self.accelerator is None:
183
- torch.save(self.model, os.path.join(
184
- self.model_save_dir, "model.pkl"))
185
- torch.save(self.optimizer, os.path.join(
186
- self.model_save_dir, "optimizer.pkl"))
187
- torch.save(self.lr_scheduler, os.path.join(
188
- self.model_save_dir, "lr_scheduler.pkl"), pickle_module=dill)
189
- torch.save(step, os.path.join(
190
- self.model_save_dir, "step.pkl"))
191
- else:
192
- self.accelerator.wait_for_everyone()
193
- unwrapped_model = self.accelerator.unwrap_model(self.model)
194
- self.accelerator.save(unwrapped_model.state_dict(
195
- ), os.path.join(self.model_save_dir, "model.pkl"))
196
- self.accelerator.save_state(output_dir=self.model_save_dir)
197
- outputs.save(self.model_save_dir, self.test_dataset)
198
- self.logger.log_metric(test_res, metric_split="test", step=step)
199
  return best_metric
200
 
201
  def train(self) -> float:
@@ -204,9 +232,23 @@ class ModelManager(object):
204
  Returns:
205
  float: updated best metric value
206
  """
207
- step = 0
208
- best_metric = 0
 
 
 
 
 
 
 
 
 
 
 
 
209
  progress_bar = tqdm(range(self.total_step))
 
 
210
  for _ in range(int(self.config.base.get("epoch_num"))):
211
  for data in self.train_dataloader:
212
  if step == 0:
@@ -230,16 +272,25 @@ class ModelManager(object):
230
  loss.backward()
231
  self.optimizer.step()
232
  self.lr_scheduler.step()
233
- if not self.config.base.get("eval_by_epoch") and step % self.config.base.get(
234
- "eval_step") == 0 and step != 0:
235
- best_metric = self.eval(step, best_metric)
 
 
 
 
 
 
236
  step += 1
237
  progress_bar.update(1)
238
- if self.config.base.get("eval_by_epoch"):
239
- best_metric = self.eval(step, best_metric)
240
  self.logger.finish()
241
- return best_metric
242
 
 
 
 
243
  def __set_seed(self, seed_value: int):
244
  """Manually set random seeds.
245
 
@@ -258,7 +309,7 @@ class ModelManager(object):
258
  torch.backends.cudnn.benchmark = True
259
  return
260
 
261
- def __evaluate(self, model, dataloader):
262
  model.eval()
263
  inps = InputData()
264
  outputs = OutputData()
@@ -272,52 +323,97 @@ class ModelManager(object):
272
 
273
  decode_output.map_output(slot_map=self.slot_list,
274
  intent_map=self.intent_list)
275
- data, decode_output = utils.remove_slot_ignore_index(
276
- data, decode_output, ignore_index="#")
 
277
 
278
  inps.merge_input_data(data)
279
  outputs.merge_output_data(decode_output)
280
- if "metric" in self.config:
281
  res = Evaluator.compute_all_metric(
282
- inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.metric)
283
  else:
284
  res = Evaluator.compute_all_metric(
285
  inps, outputs, intent_label_map=self.intent_dict)
 
286
  model.train()
287
  return outputs, res
288
 
289
  def load(self):
290
- self.model = torch.load(os.path.join(self.config.base["model_dir"], "model.pkl"), map_location=torch.device(self.device))
291
- if self.config.tokenizer["_tokenizer_name_"] == "word_tokenizer":
292
- self.tokenizer = get_tokenizer_class(self.config.tokenizer["_tokenizer_name_"]).from_file(
293
- os.path.join(self.config.base["model_dir"], "tokenizer.json"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  else:
295
- self.tokenizer = get_tokenizer(self.config.tokenizer["_tokenizer_name_"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  self.model.to(self.device)
297
- label = utils.load_json(os.path.join(self.config.base["model_dir"], "label.json"))
 
298
  self.intent_list = label["intent"]
299
  self.slot_list = label["slot"]
300
- self.data_factory=DataFactory(tokenizer=self.tokenizer,
301
- use_multi_intent=self.config.base.get("multi_intent"),
302
- to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
303
 
304
  def predict(self, text_data):
305
  self.model.eval()
306
  tokenizer_config = {key: self.config.tokenizer[key]
307
- for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
308
  align_mode = self.config.tokenizer.get("_align_mode_")
309
  inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
310
- device=self.device,
311
- config=tokenizer_config,
312
- enable_label=False,
313
- align_mode= align_mode if align_mode is not None else "general",
314
- label2tensor=False)
315
  output = self.model(inputs)
316
  decode_output = self.model.decode(output, inputs)
317
  decode_output.map_output(slot_map=self.slot_list,
318
- intent_map=self.intent_list)
319
  if self.config.base.get("multi_intent"):
320
  intent = decode_output.intent_ids[0]
321
  else:
322
  intent = [decode_output.intent_ids[0]]
323
- return {"intent": intent, "slot": decode_output.slot_ids[0], "text": self.tokenizer.decode(inputs.input_ids[0])}
 
 
 
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-19 18:50:11
6
  Description: manage all process of model training and prediction.
7
 
8
  '''
9
+ import math
10
  import os
11
+ import queue
12
  import random
13
 
14
  import numpy as np
 
20
  from common.loader import DataFactory
21
  from common.logger import Logger
22
  from common.metric import Evaluator
23
+ from common.saver import Saver
24
  from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
25
  from common.utils import InputData, instantiate
26
  from common.utils import OutputData
27
  from common.config import Config
28
  import dill
29
+ from common import global_pool
30
+ from tools.load_from_hugging_face import PreTrainedTokenizerForSLU, PretrainedModelForSLU
31
+ # from tools.hugging_face_parser import load_model, load_tokenizer
32
 
33
 
34
  class ModelManager(object):
 
39
  config (Config): configuration to manage all process in OpenSLU
40
  """
41
  # init config
42
+ global_pool._init()
43
  self.config = config
44
  self.__set_seed(self.config.base.get("seed"))
45
  self.device = self.config.base.get("device")
46
+ self.load_dir = self.config.model_manager.get("load_dir")
47
+ if self.config.get("logger") and self.config["logger"].get("logger_type"):
48
+ logger_type = self.config["logger"].get("logger_type")
49
+ else:
50
+ logger_type = "wandb"
51
  # enable accelerator
52
  if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
53
  from accelerate import Accelerator
54
+ self.accelerator = Accelerator(log_with=logger_type)
55
  else:
56
  self.accelerator = None
57
+ self.tokenizer = None
58
+ self.saver = Saver(self.config.model_manager, start_time=self.config.start_time)
59
  if self.config.base.get("train"):
60
+ self.model = None
61
+ self.optimizer = None
62
+ self.total_step = None
63
+ self.lr_scheduler = None
64
+ self.init_step = 0
65
+ self.best_metric = 0
66
+ self.logger = Logger(logger_type=logger_type,
67
+ logger_name=self.config.base["name"],
68
+ start_time=self.config.start_time,
69
+ accelerator=self.accelerator)
70
+ global_pool.set_value("logger", self.logger)
71
+
72
+ def init_model(self):
73
+ """init model, optimizer, lr_scheduler
74
+
75
+ Args:
76
+ model (Any): pytorch model
77
+ """
78
+ self.prepared = False
79
+ if self.load_dir is not None:
80
+ self.load()
81
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
82
+ self.init_data()
83
+ if self.config.base.get("train") and self.config.model_manager.get("load_train_state"):
84
+ train_state = torch.load(os.path.join(
85
+ self.load_dir, "train_state.pkl"), pickle_module=dill)
86
+ self.optimizer = instantiate(
87
+ self.config["optimizer"])(self.model.parameters())
88
+ self.lr_scheduler = instantiate(self.config["scheduler"])(
89
+ optimizer=self.optimizer,
90
+ num_training_steps=self.total_step
91
+ )
92
+ self.optimizer.load_state_dict(train_state["optimizer"])
93
+ self.optimizer.zero_grad()
94
+ self.lr_scheduler.load_state_dict(train_state["lr_scheduler"])
95
+ self.init_step = train_state["step"]
96
+ self.best_metric = train_state["best_metric"]
97
+ elif self.config.model.get("_from_pretrained_") and self.config.tokenizer.get("_from_pretrained_"):
98
+ self.from_pretrained()
99
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
100
+ self.init_data()
101
+ else:
102
  self.tokenizer = get_tokenizer(
103
  self.config.tokenizer.get("_tokenizer_name_"))
104
+ self.init_data()
105
+ self.model = instantiate(self.config.model)
106
+ self.model.to(self.device)
107
+ if self.config.base.get("train"):
108
+ self.optimizer = instantiate(
109
+ self.config["optimizer"])(self.model.parameters())
110
+ self.lr_scheduler = instantiate(self.config["scheduler"])(
111
+ optimizer=self.optimizer,
112
+ num_training_steps=self.total_step
113
+ )
114
 
115
+
116
+ def init_data(self):
117
+ self.data_factory = DataFactory(tokenizer=self.tokenizer,
118
+ use_multi_intent=self.config.base.get("multi_intent"),
119
+ to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
120
+ batch_size = self.config.base["batch_size"]
121
+ # init tokenizer config and dataloaders
122
+ tokenizer_config = {key: self.config.tokenizer[key]
123
+ for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
124
+
125
+ if self.config.base.get("train"):
126
  # init dataloader & load data
127
+
128
+
129
+ train_dataset = self.data_factory.load_dataset(self.config.dataset, split="train")
 
 
 
 
 
 
 
 
 
 
130
 
131
+ # update label and vocabulary (ONLY SUPPORT FOR "word_tokenizer")
132
+ self.data_factory.update_label_names(train_dataset)
133
+ self.data_factory.update_vocabulary(train_dataset)
134
 
135
+
136
+ self.train_dataloader = self.data_factory.get_data_loader(train_dataset,
 
 
137
  batch_size,
138
  shuffle=True,
139
  device=self.device,
 
142
  "_align_mode_"),
143
  label2tensor=True,
144
  **tokenizer_config)
145
+ self.total_step = int(self.config.base.get("epoch_num")) * len(self.train_dataloader)
146
+ dev_dataset = self.data_factory.load_dataset(self.config.dataset, split="validation")
147
+ self.dev_dataloader = self.data_factory.get_data_loader(dev_dataset,
148
  batch_size,
149
  shuffle=False,
150
  device=self.device,
 
153
  "_align_mode_"),
154
  label2tensor=False,
155
  **tokenizer_config)
156
+ self.data_factory.update_vocabulary(dev_dataset)
157
+ self.intent_list = None
158
+ self.intent_dict = None
159
+ self.slot_list = None
160
+ self.slot_dict = None
161
  # add intent label num and slot label num to config
162
+ if self.config.model["decoder"].get("intent_classifier") and int(self.config.get_intent_label_num()) == 0:
163
+ self.intent_list = self.data_factory.intent_label_list
164
+ self.intent_dict = self.data_factory.intent_label_dict
165
  self.config.set_intent_label_num(len(self.intent_list))
166
+ if self.config.model["decoder"].get("slot_classifier") and int(self.config.get_slot_label_num()) == 0:
167
+ self.slot_list = self.data_factory.slot_label_list
168
+ self.slot_dict = self.data_factory.slot_label_dict
169
  self.config.set_slot_label_num(len(self.slot_list))
170
+
171
+
172
 
173
  # autoload embedding for non-pretrained encoder
174
  if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
 
182
  self.config.autoload_template()
183
  # save config
184
  self.logger.set_config(self.config)
185
+ self.saver.save_tokenizer(self.tokenizer)
186
+ self.saver.save_label(self.intent_list, self.slot_list)
187
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
188
+
 
 
 
 
 
189
  if self.config.base.get("test"):
190
+ self.test_dataset = self.data_factory.load_dataset(self.config.dataset, split="test")
191
+ self.test_dataloader = self.data_factory.get_data_loader(self.test_dataset,
 
192
  batch_size,
193
  shuffle=False,
194
  device=self.device,
 
198
  label2tensor=False,
199
  **tokenizer_config)
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def eval(self, step: int, best_metric: float) -> float:
202
  """ evaluation models.
203
 
 
209
  float: updated best metric value
210
  """
211
  # TODO: save dev
212
+ _, res = self.__evaluate(self.model, self.dev_dataloader, mode="dev")
213
  self.logger.log_metric(res, metric_split="dev", step=step)
214
+ if res[self.config.evaluator.get("best_key")] > best_metric:
215
+ best_metric = res[self.config.evaluator.get("best_key")]
216
+ train_state = {
217
+ "step": step,
218
+ "best_metric": best_metric,
219
+ "optimizer": self.optimizer.state_dict(),
220
+ "lr_scheduler": self.lr_scheduler.state_dict()
221
+ }
222
+ self.saver.save_model(self.model, train_state, self.accelerator)
223
+ if self.config.base.get("test"):
224
+ outputs, test_res = self.__evaluate(self.model, self.test_dataloader, mode="test")
225
+ self.saver.save_output(outputs, self.test_dataset)
226
+ self.logger.log_metric(test_res, metric_split="test", step=step)
 
 
 
 
 
 
 
 
 
 
227
  return best_metric
228
 
229
  def train(self) -> float:
 
232
  Returns:
233
  float: updated best metric value
234
  """
235
+ self.model.train()
236
+ if self.accelerator is not None:
237
+ self.total_step = math.ceil(self.total_step / self.accelerator.num_processes)
238
+ if self.optimizer is None:
239
+ self.optimizer = instantiate(self.config["optimizer"])(self.model.parameters())
240
+ if self.lr_scheduler is None:
241
+ self.lr_scheduler = instantiate(self.config["scheduler"])(
242
+ optimizer=self.optimizer,
243
+ num_training_steps=self.total_step
244
+ )
245
+ if not self.prepared and self.accelerator is not None:
246
+ self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
247
+ self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
248
+ step = self.init_step
249
  progress_bar = tqdm(range(self.total_step))
250
+ progress_bar.update(self.init_step)
251
+ self.optimizer.zero_grad()
252
  for _ in range(int(self.config.base.get("epoch_num"))):
253
  for data in self.train_dataloader:
254
  if step == 0:
 
272
  loss.backward()
273
  self.optimizer.step()
274
  self.lr_scheduler.step()
275
+ train_state = {
276
+ "step": step,
277
+ "best_metric": self.best_metric,
278
+ "optimizer": self.optimizer.state_dict(),
279
+ "lr_scheduler": self.lr_scheduler.state_dict()
280
+ }
281
+ if not self.saver.auto_save_step(self.model, train_state, self.accelerator):
282
+ if not self.config.evaluator.get("eval_by_epoch") and step % self.config.evaluator.get("eval_step") == 0 and step != 0:
283
+ self.best_metric = self.eval(step, self.best_metric)
284
  step += 1
285
  progress_bar.update(1)
286
+ if self.config.evaluator.get("eval_by_epoch"):
287
+ self.best_metric = self.eval(step, self.best_metric)
288
  self.logger.finish()
289
+ return self.best_metric
290
 
291
+ def test(self):
292
+ return self.__evaluate(self.model, self.test_dataloader, mode="test")
293
+
294
  def __set_seed(self, seed_value: int):
295
  """Manually set random seeds.
296
 
 
309
  torch.backends.cudnn.benchmark = True
310
  return
311
 
312
+ def __evaluate(self, model, dataloader, mode="dev"):
313
  model.eval()
314
  inps = InputData()
315
  outputs = OutputData()
 
323
 
324
  decode_output.map_output(slot_map=self.slot_list,
325
  intent_map=self.intent_list)
326
+ if self.config.model["decoder"].get("slot_classifier"):
327
+ data, decode_output = utils.remove_slot_ignore_index(
328
+ data, decode_output, ignore_index="#")
329
 
330
  inps.merge_input_data(data)
331
  outputs.merge_output_data(decode_output)
332
+ if "metric" in self.config.evaluator:
333
  res = Evaluator.compute_all_metric(
334
+ inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.evaluator["metric"])
335
  else:
336
  res = Evaluator.compute_all_metric(
337
  inps, outputs, intent_label_map=self.intent_dict)
338
+ self.logger.info(f"Best {mode} metric: "+str(res))
339
  model.train()
340
  return outputs, res
341
 
342
  def load(self):
343
+
344
+ if self.tokenizer is None:
345
+ with open(os.path.join(self.load_dir, "tokenizer.pkl"), 'rb') as f:
346
+ self.tokenizer = dill.load(f)
347
+ label = utils.load_json(os.path.join(self.load_dir, "label.json"))
348
+ if label["intent"] is None:
349
+ self.intent_list = None
350
+ self.intent_dict = None
351
+ else:
352
+ self.intent_list = label["intent"]
353
+ self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
354
+ self.config.set_intent_label_num(len(self.intent_list))
355
+ if label["slot"] is None:
356
+ self.slot_list = None
357
+ self.slot_dict = None
358
+ else:
359
+ self.slot_list = label["slot"]
360
+ self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
361
+ self.config.set_slot_label_num(len(self.slot_list))
362
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
363
+ if self.accelerator is not None and self.load_dir is not None:
364
+ self.model = torch.load(os.path.join(self.load_dir, "model.pkl"), map_location=torch.device(self.device))
365
+ self.prepared = True
366
+ self.accelerator.load_state(self.load_dir)
367
+ self.accelerator.prepare_model(self.model)
368
  else:
369
+ self.model = torch.load(os.path.join(
370
+ self.load_dir, "model.pkl"), map_location=torch.device(self.device))
371
+ # if self.config.tokenizer["_tokenizer_name_"] == "word_tokenizer":
372
+ # self.tokenizer = get_tokenizer_class(self.config.tokenizer["_tokenizer_name_"]).from_file(os.path.join(self.load_dir, "tokenizer.json"))
373
+ # else:
374
+ # self.tokenizer = get_tokenizer(self.config.tokenizer["_tokenizer_name_"])
375
+ self.model.to(self.device)
376
+
377
+
378
+ def from_pretrained(self):
379
+ self.config.autoload_template()
380
+ model = PretrainedModelForSLU.from_pretrained(self.config.model["_from_pretrained_"])
381
+ # model = load_model(self.config.model["_from_pretrained_"])
382
+ self.model = model.model
383
+ if self.tokenizer is None:
384
+ self.tokenizer = PreTrainedTokenizerForSLU.from_pretrained(
385
+ self.config.tokenizer["_from_pretrained_"])
386
+ self.config.tokenizer = model.config.tokenizer
387
+ # self.tokenizer = load_tokenizer(self.config.tokenizer["_from_pretrained_"])
388
+
389
  self.model.to(self.device)
390
+ label = model.config._id2label
391
+ self.config.model = model.config.model
392
  self.intent_list = label["intent"]
393
  self.slot_list = label["slot"]
394
+ self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
395
+ self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
 
396
 
397
  def predict(self, text_data):
398
  self.model.eval()
399
  tokenizer_config = {key: self.config.tokenizer[key]
400
+ for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
401
  align_mode = self.config.tokenizer.get("_align_mode_")
402
  inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
403
+ device=self.device,
404
+ config=tokenizer_config,
405
+ enable_label=False,
406
+ align_mode=align_mode if align_mode is not None else "general",
407
+ label2tensor=False)
408
  output = self.model(inputs)
409
  decode_output = self.model.decode(output, inputs)
410
  decode_output.map_output(slot_map=self.slot_list,
411
+ intent_map=self.intent_list)
412
  if self.config.base.get("multi_intent"):
413
  intent = decode_output.intent_ids[0]
414
  else:
415
  intent = [decode_output.intent_ids[0]]
416
+ input_ids = inputs.input_ids[0].tolist()
417
+ tokens = [self.tokenizer.decode(ids) for ids in input_ids]
418
+ slots = decode_output.slot_ids[0]
419
+ return {"intent": intent, "slot": slots, "text": tokens}
common/saver.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ LastEditors: Qiguang Chen
4
+ Date: 2023-02-12 22:23:58
5
+ LastEditTime: 2023-02-19 14:14:56
6
+ Description:
7
+
8
+ '''
9
+ import json
10
+ import os
11
+ import queue
12
+ import shutil
13
+ import torch
14
+ import dill
15
+ from common import utils
16
+
17
+
18
+ class Saver():
19
+ def __init__(self, config, start_time=None) -> None:
20
+ self.config = config
21
+ if self.config.get("save_dir"):
22
+ self.model_save_dir = self.config["save_dir"]
23
+ else:
24
+ if not os.path.exists("save/"):
25
+ os.mkdir("save/")
26
+ self.model_save_dir = "save/" + start_time
27
+ if not os.path.exists(self.model_save_dir):
28
+ os.mkdir(self.model_save_dir)
29
+ save_mode = config.get("save_mode")
30
+ self.save_mode = save_mode if save_mode is not None else "save-by-eval"
31
+
32
+ max_save_num = self.config.get("max_save_num")
33
+ self.max_save_num = max_save_num if max_save_num is not None else 1
34
+ self.save_pool = queue.Queue(maxsize=max_save_num)
35
+
36
+ def save_tokenizer(self, tokenizer):
37
+ with open(os.path.join(self.model_save_dir, "tokenizer.pkl"), 'wb') as f:
38
+ dill.dump(tokenizer, f)
39
+
40
+ def save_label(self, intent_list, slot_list):
41
+ utils.save_json(os.path.join(self.model_save_dir, "label.json"), {"intent": intent_list, "slot": slot_list})
42
+
43
+
44
+ def save_model(self, model, train_state, accelerator=None):
45
+ step = train_state["step"]
46
+ if self.max_save_num != 1:
47
+
48
+ model_save_dir =os.path.join(self.model_save_dir, str(step))
49
+ if self.save_pool.full():
50
+ delete_dir = self.save_pool.get()
51
+ shutil.rmtree(delete_dir)
52
+ self.save_pool.put(model_save_dir)
53
+ else:
54
+ self.save_pool.put(model_save_dir)
55
+ if not os.path.exists(model_save_dir):
56
+ os.mkdir(model_save_dir)
57
+ else:
58
+ model_save_dir = self.model_save_dir
59
+ if not os.path.exists(model_save_dir):
60
+ os.mkdir(model_save_dir)
61
+ if accelerator is None:
62
+ torch.save(model, os.path.join(model_save_dir, "model.pkl"))
63
+ torch.save(train_state, os.path.join(model_save_dir, "train_state.pkl"), pickle_module=dill)
64
+ else:
65
+ accelerator.wait_for_everyone()
66
+ unwrapped_model = accelerator.unwrap_model(model)
67
+ accelerator.save(unwrapped_model, os.path.join(model_save_dir, "model.pkl"))
68
+ accelerator.save_state(output_dir=model_save_dir)
69
+
70
+ def auto_save_step(self, model, train_state, accelerator=None):
71
+ step = train_state["step"]
72
+ if self.save_mode == "save-by-step" and step % self.config.get("save_step")==0 and step != 0:
73
+ self.save_model(model, train_state, accelerator)
74
+ return True
75
+ else:
76
+ return False
77
+
78
+
79
+ def save_output(self, outputs, dataset):
80
+ outputs.save(self.model_save_dir, dataset)
common/tokenizer.py CHANGED
@@ -103,7 +103,7 @@ class WordTokenizer(object):
103
  assert isinstance(instance, str)
104
 
105
  # count the frequency of instances.
106
- self.counter[instance] += 1
107
 
108
  if instance not in self.index2instance:
109
  self.instance2index[instance] = len(self.index2instance)
@@ -190,6 +190,18 @@ class WordTokenizer(object):
190
  index = index.tolist()
191
  return self.decode(index)
192
  return self.index2instance[index]
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  def save(self, path):
195
  """ Save the content of alphabet to files.
@@ -214,7 +226,7 @@ class WordTokenizer(object):
214
  obj = json.load(fw)
215
  tokenizer = WordTokenizer(obj["name"])
216
  tokenizer.instance2index = OrderedDict(obj["token_map"])
217
- tokenizer.counter = len(tokenizer.instance2index)
218
  tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
219
  return tokenizer
220
 
 
103
  assert isinstance(instance, str)
104
 
105
  # count the frequency of instances.
106
+ # self.counter[instance] += 1
107
 
108
  if instance not in self.index2instance:
109
  self.instance2index[instance] = len(self.index2instance)
 
190
  index = index.tolist()
191
  return self.decode(index)
192
  return self.index2instance[index]
193
+
194
+ def decode_batch(self, index, **kargs):
195
+ """ Get corresponding instance of query index.
196
+
197
+ if index is invalid, then throws exception.
198
+
199
+ Args:
200
+ index (int): is query index, possibly iterable.
201
+ Returns:
202
+ is corresponding instance.
203
+ """
204
+ return self.decode(index)
205
 
206
  def save(self, path):
207
  """ Save the content of alphabet to files.
 
226
  obj = json.load(fw)
227
  tokenizer = WordTokenizer(obj["name"])
228
  tokenizer.instance2index = OrderedDict(obj["token_map"])
229
+ # tokenizer.counter = len(tokenizer.instance2index)
230
  tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
231
  return tokenizer
232
 
common/utils.py CHANGED
@@ -12,7 +12,7 @@ import torch
12
  from torch.nn.utils.rnn import pad_sequence
13
  from tqdm import tqdm
14
  from torch import Tensor
15
-
16
  class InputData():
17
  """input datas class
18
  """
@@ -486,4 +486,14 @@ def save_json(file_path, obj):
486
  def load_json(file_path):
487
  with open(file_path, 'r', encoding="utf8") as fw:
488
  res =json.load(fw)
489
- return res
 
 
 
 
 
 
 
 
 
 
 
12
  from torch.nn.utils.rnn import pad_sequence
13
  from tqdm import tqdm
14
  from torch import Tensor
15
+ import argparse
16
  class InputData():
17
  """input datas class
18
  """
 
486
  def load_json(file_path):
487
  with open(file_path, 'r', encoding="utf8") as fw:
488
  res =json.load(fw)
489
+ return res
490
+
491
+ def str2bool(v):
492
+ if isinstance(v, bool):
493
+ return v
494
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
495
+ return True
496
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
497
+ return False
498
+ else:
499
+ raise argparse.ArgumentTypeError('Boolean value expected.')
config/README.md ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuation
2
+
3
+ ## 1. Introduction
4
+
5
+ Configuration is divided into fine-grained reusable modules:
6
+
7
+ - `base`: basic configuration
8
+ - `logger`: logger setting
9
+ - `model_manager`: loading and saving model parameters
10
+ - `accelerator`: whether to enable multi-GPU
11
+ - `dataset`: dataset management
12
+ - `evaluator`: evaluation and metrics setting.
13
+ - `tokenizer`: Tokenizer initiation and tokenizing setting.
14
+ - `optimizer`: Optimizer initiation setting.
15
+ - `scheduler`: scheduler initiation setting.
16
+ - `model`: model construction setting.
17
+
18
+ From Sec. 2 to Sec. 11, we will describe the configuration in detail. Or you can see [Examples](examples/README.md) for Quick Start.
19
+
20
+ NOTE: `_*_` config are reserved fields in OpenSLU.
21
+
22
+ ## Configuration Item Script
23
+ In OpenSLU configuration, we support simple calculation script for each configuration item. For example, we can get `dataset_name` by using `{dataset.dataset_name}`, and fill its value into python script `'LightChen2333/agif-slu-' + '*'`.(Without '', `{dataset.dataset_name}` value will be treated as a variable).
24
+
25
+ NOTE: each item with `{}` will be treated as python script.
26
+ ```yaml
27
+ tokenizer:
28
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
29
+
30
+ ```
31
+
32
+ ## `base` Config
33
+ ```yaml
34
+ # `start_time` will generated automatically when start any config script, needless to be assigned.
35
+ # start_time: xxxxxxxx
36
+ base:
37
+ name: "OpenSLU" # project/logger name
38
+ multi_intent: false # whether to enable multi-intent setting
39
+ train: True # enable train else enable zero-shot
40
+ test: True # enable test during train.
41
+ device: cuda # device for cuda/cpu
42
+ seed: 42 # random seed
43
+ best_key: EMA # save model by which metric[intent_acc/slot_f1/EMA]
44
+ tokenizer_name: word_tokenizer # tokenizer: word_tokenizer for no pretrained model, else use [AutoTokenizer] tokenizer name
45
+ add_special_tokens: false # whether add [CLS], [SEP] special tokens
46
+ epoch_num: 300 # train epoch num
47
+ # eval_step: 280 # if eval_by_epoch = false and eval_step > 0, will evaluate model by steps
48
+ eval_by_epoch: true # evaluate model by epoch
49
+ batch_size: 16 # batch size
50
+ ```
51
+ ## `logger` Config
52
+ ```yaml
53
+ logger:
54
+ # `wandb` is supported both in single- multi-GPU,
55
+ # `tensorboard` is only supported in multi-GPU,
56
+ # and `fitlog` is only supported in single-GPU
57
+ logger_type: wandb
58
+ ```
59
+ ## `model_manager` Config
60
+ ```yaml
61
+ model_manager:
62
+ # if load_dir != `null`, OpenSLU will try to load checkpoint to continue training,
63
+ # if load_dir == `null`, OpenSLU will restart training.
64
+ load_dir: null
65
+ # The dir path to save model and training state.
66
+ # if save_dir == `null` model will be saved to `save/{start_time}`
67
+ save_dir: save/stack
68
+ # save_mode can be selected in [save-by-step, save-by-eval]
69
+ # `save-by-step` means save model only by {save_step} steps without evaluation.
70
+ # `save-by-eval` means save model by best validation performance
71
+ save_mode: save-by-eval
72
+ # save_step: 100 # only enabled when save_mode == `save-by-step`
73
+ max_save_num: 1 # The number of best models will be saved.
74
+ ```
75
+ ## `accelerator` Config
76
+ ```yaml
77
+ accelerator:
78
+ use_accelerator: false # will enable `accelerator` if use_accelerator is `true`
79
+ ```
80
+ ## `dataset` Config
81
+ ```yaml
82
+ dataset:
83
+ # support load model from hugging-face.
84
+ # dataset_name can be selected in [atis, snips, mix-atis, mix-snips]
85
+ dataset_name: atis
86
+ # support assign any one of dataset path and other dataset split is the same as split in `dataset_name`
87
+ # train: atis # support load model from hugging-face or assigned local data path.
88
+ # validation: {root}/ATIS/dev.jsonl
89
+ # test: {root}/ATIS/test.jsonl
90
+ ```
91
+ ## `evaluator` Config
92
+ ```yaml
93
+ evaluator:
94
+ best_key: EMA # the metric to judge the best model
95
+ eval_by_epoch: true # Evaluate after an epoch if `true`.
96
+ # Evaluate after {eval_step} steps if eval_by_epoch == `false`.
97
+ # eval_step: 1800
98
+ # metric is supported the metric as below:
99
+ # - intent_acc
100
+ # - slot_f1
101
+ # - EMA
102
+ # - intent_f1
103
+ # - macro_intent_f1
104
+ # - micro_intent_f1
105
+ # NOTE: [intent_f1, macro_intent_f1, micro_intent_f1] is only supported in multi-intent setting. intent_f1 and macro_intent_f1 is the same metric.
106
+ metric:
107
+ - intent_acc
108
+ - slot_f1
109
+ - EMA
110
+ ```
111
+ ## `tokenizer` Config
112
+ ```yaml
113
+ tokenizer:
114
+ # Init tokenizer. Support `word_tokenizer` and other tokenizers in huggingface.
115
+ _tokenizer_name_: word_tokenizer
116
+ # if `_tokenizer_name_` is not assigned, you can load pretrained tokenizer from hugging-face.
117
+ # _from_pretrained_: LightChen2333/stack-propagation-slu-atis
118
+ _padding_side_: right # the padding side of tokenizer, support [left/ right]
119
+ # Align mode between text and slot, support [fast/ general],
120
+ # `general` is supported in most tokenizer, `fast` is supported only in small portion of tokenizers.
121
+ _align_mode_: fast
122
+ _to_lower_case_: true
123
+ add_special_tokens: false # other tokenizer args, you can add other args to tokenizer initialization except `_*_` format args
124
+ max_length: 512
125
+
126
+ ```
127
+ ## `optimizer` Config
128
+ ```yaml
129
+ optimizer:
130
+ _model_target_: torch.optim.Adam # Optimizer class/ function return Optimizer object
131
+ _model_partial_: true # partial load configuration. Here will add model.parameters() to complete all Optimizer parameters
132
+ lr: 0.001 # learning rate
133
+ weight_decay: 1e-6 # weight decay
134
+ ```
135
+ ## `scheduler` Config
136
+ ```yaml
137
+ scheduler:
138
+ _model_target_: transformers.get_scheduler
139
+ _model_partial_: true # partial load configuration. Here will add optimizer, num_training_steps to complete all Optimizer parameters
140
+ name : "linear"
141
+ num_warmup_steps: 0
142
+ ```
143
+ ## `model` Config
144
+ ```yaml
145
+ model:
146
+ # _from_pretrained_: LightChen2333/stack-propagation-slu-atis # load model from hugging-face and is not need to assigned any parameters below.
147
+ _model_target_: model.OpenSLUModel # the general model class, can automatically build the model through configuration.
148
+
149
+ encoder:
150
+ _model_target_: model.encoder.AutoEncoder # auto-encoder to autoload provided encoder model
151
+ encoder_name: self-attention-lstm # support [lstm/ self-attention-lstm] and other pretrained models those hugging-face supported
152
+
153
+ embedding: # word embedding layer
154
+ # load_embedding_name: glove.6B.300d.txt # support autoload glove embedding.
155
+ embedding_dim: 256 # embedding dim
156
+ dropout_rate: 0.5 # dropout ratio after embedding
157
+
158
+ lstm:
159
+ layer_num: 1 # lstm configuration
160
+ bidirectional: true
161
+ output_dim: 256 # module should set output_dim for autoload input_dim in next module. You can also set input_dim manually.
162
+ dropout_rate: 0.5
163
+
164
+ attention: # self-attention configuration
165
+ hidden_dim: 1024
166
+ output_dim: 128
167
+ dropout_rate: 0.5
168
+
169
+ return_with_input: true # add inputs information, like attention_mask, to decoder module.
170
+ return_sentence_level_hidden: false # if return sentence representation to decoder module
171
+
172
+ decoder:
173
+ _model_target_: model.decoder.StackPropagationDecoder # decoder name
174
+ interaction:
175
+ _model_target_: model.decoder.interaction.StackInteraction # interaction module name
176
+ differentiable: false # interaction module config
177
+
178
+ intent_classifier:
179
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier # intent classifier module name
180
+ layer_num: 1
181
+ bidirectional: false
182
+ hidden_dim: 64
183
+ force_ratio: 0.9 # teacher-force ratio
184
+ embedding_dim: 8 # intent embedding dim
185
+ ignore_index: -100 # ignore index to compute loss and metric
186
+ dropout_rate: 0.5
187
+ mode: "token-level-intent" # decode mode, support [token-level-intent, intent, slot]
188
+ use_multi: "{base.multi_intent}"
189
+ return_sentence_level: true # whether to return sentence level prediction as decoded input
190
+
191
+ slot_classifier:
192
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
193
+ layer_num: 1
194
+ bidirectional: false
195
+ force_ratio: 0.9
196
+ hidden_dim: 64
197
+ embedding_dim: 32
198
+ ignore_index: -100
199
+ dropout_rate: 0.5
200
+ mode: "slot"
201
+ use_multi: false
202
+ return_sentence_level: false
203
+ ```
204
+
205
+ ## Implementing a New Model
206
+
207
+ ### 1. Interaction Re-Implement
208
+ Here we take `DCA-Net` as an example:
209
+
210
+ In most cases, you just need to rewrite `Interaction` module:
211
+
212
+ ```python
213
+ from common.utils import HiddenData
214
+ from model.decoder.interaction import BaseInteraction
215
+ class DCANetInteraction(BaseInteraction):
216
+ def __init__(self, **config):
217
+ super().__init__(**config)
218
+ self.T_block1 = I_S_Block(self.config["output_dim"], self.config["attention_dropout"], self.config["num_attention_heads"])
219
+ ...
220
+
221
+ def forward(self, encode_hidden: HiddenData, **kwargs):
222
+ ...
223
+ ```
224
+
225
+ and then you should configure your module:
226
+ ```yaml
227
+ base:
228
+ ...
229
+
230
+ optimizer:
231
+ ...
232
+
233
+ scheduler:
234
+ ...
235
+
236
+ model:
237
+ _model_target_: model.OpenSLUModel
238
+ encoder:
239
+ _model_target_: model.encoder.AutoEncoder
240
+ encoder_name: lstm
241
+
242
+ embedding:
243
+ load_embedding_name: glove.6B.300d.txt
244
+ embedding_dim: 300
245
+ dropout_rate: 0.5
246
+
247
+ lstm:
248
+ dropout_rate: 0.5
249
+ output_dim: 128
250
+ layer_num: 2
251
+ bidirectional: true
252
+ output_dim: "{model.encoder.lstm.output_dim}"
253
+ return_with_input: true
254
+ return_sentence_level_hidden: false
255
+
256
+ decoder:
257
+ _model_target_: model.decoder.DCANetDecoder
258
+ interaction:
259
+ _model_target_: model.decoder.interaction.DCANetInteraction
260
+ output_dim: "{model.encoder.output_dim}"
261
+ attention_dropout: 0.5
262
+ num_attention_heads: 8
263
+
264
+ intent_classifier:
265
+ _model_target_: model.decoder.classifier.LinearClassifier
266
+ mode: "intent"
267
+ input_dim: "{model.decoder.output_dim.output_dim}"
268
+ ignore_index: -100
269
+
270
+ slot_classifier:
271
+ _model_target_: model.decoder.classifier.LinearClassifier
272
+ mode: "slot"
273
+ input_dim: "{model.decoder.output_dim.output_dim}"
274
+ ignore_index: -100
275
+ ```
276
+
277
+ Oops, you finish all model construction. You can run script as follows to train model:
278
+ ```shell
279
+ python run.py -cp config/dca_net.yaml [-ds atis]
280
+ ```
281
+ ### 2. Decoder Re-Implement
282
+ Sometimes, `interaction then classification` order can not meet your needs. Therefore, you should simply rewrite decoder for flexible interaction order:
283
+
284
+ Here, we take `stack-propagation` as an example:
285
+ 1. We should rewrite interaction module for `stack-propagation`
286
+ ```python
287
+ from common.utils import ClassifierOutputData, HiddenData
288
+ from model.decoder.interaction.base_interaction import BaseInteraction
289
+ class StackInteraction(BaseInteraction):
290
+ def __init__(self, **config):
291
+ super().__init__(**config)
292
+ ...
293
+
294
+ def forward(self, intent_output: ClassifierOutputData, encode_hidden: HiddenData):
295
+ ...
296
+ ```
297
+ 2. We should rewrite `StackPropagationDecoder` for stack-propagation interaction order:
298
+ ```python
299
+ from common.utils import HiddenData, OutputData
300
+ class StackPropagationDecoder(BaseDecoder):
301
+
302
+ def forward(self, hidden: HiddenData):
303
+ pred_intent = self.intent_classifier(hidden)
304
+ hidden = self.interaction(pred_intent, hidden)
305
+ pred_slot = self.slot_classifier(hidden)
306
+ return OutputData(pred_intent, pred_slot)
307
+ ```
308
+
309
+ 3. Then we can easily combine general model by `config/stack-propagation.yaml` configuration file:
310
+ ```yaml
311
+ base:
312
+ ...
313
+
314
+ ...
315
+
316
+ model:
317
+ _model_target_: model.OpenSLUModel
318
+
319
+ encoder:
320
+ ...
321
+
322
+ decoder:
323
+ _model_target_: model.decoder.StackPropagationDecoder
324
+ interaction:
325
+ _model_target_: model.decoder.interaction.StackInteraction
326
+ differentiable: false
327
+
328
+ intent_classifier:
329
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
330
+ ... # parameters needed __init__(*)
331
+ mode: "token-level-intent"
332
+ use_multi: false
333
+ return_sentence_level: true
334
+
335
+ slot_classifier:
336
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
337
+ ... # parameters needed __init__(*)
338
+ mode: "slot"
339
+ use_multi: false
340
+ return_sentence_level: false
341
+ ```
342
+ 4. You can run script as follows to train model:
343
+ ```shell
344
+ python run.py -cp config/stack-propagation.yaml
345
+ ```
346
+
347
+
348
+
config/app.yaml CHANGED
@@ -1,109 +1,6 @@
1
- device: "NVIDIA GeForce RTX 2080 Ti"
2
-
3
  host: 127.0.0.1
4
  port: 7860
5
 
6
  is_push_to_public: false
7
  save-path: save/stack/outputs.jsonl
8
- page-size: 2
9
-
10
- base:
11
- name: "OpenSLUv1"
12
- train: false
13
- test: false
14
- device: cpu
15
- ckpt_path: null
16
- seed: 42
17
- best_key: EMA
18
- epoch_num: 300
19
- batch_size: 16
20
- eval_by_epoch: true
21
- model_dir: save/stack
22
- template: application.html
23
- accelerator:
24
- use_accelerator: false
25
-
26
- dataset:
27
- dataset_name: atis
28
-
29
- metric:
30
- - intent_acc
31
- - slot_f1
32
- - EMA
33
-
34
- tokenizer:
35
- _tokenizer_name_: word_tokenizer
36
- _padding_side_: right
37
- _align_mode_: fast
38
- _to_lower_case_: true
39
- add_special_tokens: false
40
- max_length: 512
41
-
42
- optimizer:
43
- _model_target_: torch.optim.Adam
44
- _model_partial_: true
45
- lr: 0.001
46
- weight_decay: 1e-6
47
-
48
- scheduler:
49
- _model_target_: transformers.get_scheduler
50
- _model_partial_: true
51
- name : "linear"
52
- num_warmup_steps: 0
53
-
54
- model:
55
- _model_target_: model.OpenSLUModel
56
-
57
- encoder:
58
- _model_target_: model.encoder.AutoEncoder
59
- encoder_name: self-attention-lstm
60
-
61
- embedding:
62
- embedding_dim: 256
63
- dropout_rate: 0.55
64
-
65
- lstm:
66
- layer_num: 1
67
- bidirectional: true
68
- output_dim: 256
69
- dropout_rate: 0.5
70
-
71
- attention:
72
- hidden_dim: 1024
73
- output_dim: 128
74
- dropout_rate: 0.6
75
-
76
- return_with_input: true
77
- return_sentence_level_hidden: false
78
-
79
- decoder:
80
- _model_target_: model.decoder.StackPropagationDecoder
81
- interaction:
82
- _model_target_: model.decoder.interaction.StackInteraction
83
- differentiable: false
84
-
85
- intent_classifier:
86
- _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
87
- layer_num: 1
88
- bidirectional: false
89
- force_ratio: 0.9
90
- hidden_dim: 64
91
- embedding_dim: 8
92
- ignore_index: -100
93
- dropout_rate: 0.5
94
- mode: "token-level-intent"
95
- use_multi: false
96
- return_sentence_level: true
97
-
98
- slot_classifier:
99
- _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
100
- layer_num: 1
101
- bidirectional: false
102
- force_ratio: 0.9
103
- hidden_dim: 64
104
- embedding_dim: 32
105
- ignore_index: -100
106
- dropout_rate: 0.55
107
- mode: "slot"
108
- use_multi: false
109
- return_sentence_level: false
 
 
 
1
  host: 127.0.0.1
2
  port: 7860
3
 
4
  is_push_to_public: false
5
  save-path: save/stack/outputs.jsonl
6
+ page-size: 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config/decoder/interaction/stack-propagation.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ differentiable: false
config/examples/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Examples
2
+
3
+ Here we introduce some usage of our famework by configuration.
4
+
5
+ ## Reload to train
6
+
7
+ Firstly, you can run this script to train a `joint-bert` model:
8
+ ```shell
9
+ python run.py -cp config/examples/normal.yaml
10
+ ```
11
+
12
+ and you can use `kill` or `Ctrl+C` to kill the training process.
13
+
14
+ Then, to reload model and continue training, you can run `reload_to_train.yaml` to reload checkpoint and training state.
15
+ ```shell
16
+ python run.py -cp config/examples/reload_to_train.yaml
17
+ ```
18
+
19
+ The main difference in `reload_to_train.yaml` is the `model_manager` configuration item:
20
+ ```yaml
21
+ ...
22
+ model_manager:
23
+ load_train_state: True # set to True
24
+ load_dir: save/joint_bert # not null
25
+ ...
26
+ ...
27
+ ```
28
+
29
+ ## Load from Pre-finetuned model.
30
+ We upload all models to [LightChen2333](https://huggingface.co/LightChen2333). You can load those model by simple configuration.
31
+ In `from_pretrained.yaml` and `from_pretrained_multi.yaml`, we show two example scripts to load from hugging face in single- and multi-intent, respectively. The key configuration items are as below:
32
+ ```yaml
33
+ tokenizer:
34
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
35
+
36
+ model:
37
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
38
+ ```
config/examples/from_pretrained.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: false
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ logger:
13
+ logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
14
+
15
+ model_manager:
16
+ load_dir: null
17
+ save_dir: save/joint_bert
18
+ save_mode: save-by-eval # save-by-step
19
+ # save_step: 100
20
+ max_save_num: 1
21
+
22
+ accelerator:
23
+ use_accelerator: false
24
+
25
+ dataset:
26
+ dataset_name: atis
27
+
28
+ evaluator:
29
+ best_key: EMA
30
+ eval_by_epoch: true
31
+ # eval_step: 1800
32
+ metric:
33
+ - intent_acc
34
+ - slot_f1
35
+ - EMA
36
+
37
+ tokenizer:
38
+ _from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
39
+
40
+ optimizer:
41
+ _model_target_: torch.optim.Adam
42
+ _model_partial_: true
43
+ lr: 0.001
44
+ weight_decay: 1e-6
45
+
46
+ scheduler:
47
+ _model_target_: transformers.get_scheduler
48
+ _model_partial_: true
49
+ name : "linear"
50
+ num_warmup_steps: 0
51
+
52
+ model:
53
+ _from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
config/examples/from_pretrained_multi.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: false
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 300
11
+ batch_size: 16
12
+
13
+
14
+ logger:
15
+ logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
16
+
17
+ model_manager:
18
+ load_dir: null
19
+ save_dir: save/joint_bert
20
+ save_mode: save-by-eval # save-by-step
21
+ # save_step: 100
22
+ max_save_num: 1
23
+
24
+ accelerator:
25
+ use_accelerator: false
26
+
27
+ dataset:
28
+ dataset_name: atis
29
+
30
+ evaluator:
31
+ best_key: EMA
32
+ eval_by_epoch: true
33
+ # eval_step: 1800
34
+ metric:
35
+ - intent_acc
36
+ - slot_f1
37
+ - EMA
38
+
39
+ tokenizer:
40
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
41
+
42
+ optimizer:
43
+ _model_target_: torch.optim.Adam
44
+ _model_partial_: true
45
+ lr: 0.001
46
+ weight_decay: 1e-6
47
+
48
+ scheduler:
49
+ _model_target_: transformers.get_scheduler
50
+ _model_partial_: true
51
+ name : "linear"
52
+ num_warmup_steps: 0
53
+
54
+ model:
55
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
config/examples/normal.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLU-test"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/joint_bert
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: bert-base-uncased
33
+ _padding_side_: right
34
+ _align_mode_: general
35
+ add_special_tokens: true
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 4e-6
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: bert-base-uncased
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/examples/reload_to_train.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLU-test"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_train_state: True
14
+ load_dir: save/joint_bert
15
+ save_dir: save/joint_bert
16
+
17
+ evaluator:
18
+ best_key: EMA
19
+ eval_by_epoch: true
20
+ # eval_step: 1800
21
+ metric:
22
+ - intent_acc
23
+ - slot_f1
24
+ - EMA
25
+
26
+ accelerator:
27
+ use_accelerator: false
28
+
29
+ dataset:
30
+ dataset_name: atis
31
+
32
+ tokenizer:
33
+ _tokenizer_name_: bert-base-uncased
34
+ _padding_side_: right
35
+ _align_mode_: general
36
+ add_special_tokens: true
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.AdamW
40
+ _model_partial_: true
41
+ lr: 4e-6
42
+ weight_decay: 1e-8
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.open_slu_model.OpenSLUModel
52
+ ignore_index: -100
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: bert-base-uncased
56
+ output_dim: 768
57
+ return_with_input: true
58
+ return_sentence_level_hidden: true
59
+
60
+ decoder:
61
+ _model_target_: model.decoder.base_decoder.BaseDecoder
62
+ intent_classifier:
63
+ _model_target_: model.decoder.classifier.LinearClassifier
64
+ mode: "intent"
65
+ ignore_index: -100
66
+
67
+
68
+ slot_classifier:
69
+ _model_target_: model.decoder.classifier.LinearClassifier
70
+ mode: "slot"
71
+ ignore_index: -100
config/reproduction/atis/bi-model.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/bi-model-atis
15
+
16
+ accelerator:
17
+ use_accelerator: false
18
+
19
+ dataset:
20
+ dataset_name: atis
21
+
22
+ evaluator:
23
+ best_key: EMA
24
+ eval_by_epoch: true
25
+ # eval_step: 1800
26
+ metric:
27
+ - intent_acc
28
+ - slot_f1
29
+ - EMA
30
+
31
+
32
+ tokenizer:
33
+ _tokenizer_name_: word_tokenizer
34
+ _padding_side_: right
35
+ _align_mode_: fast
36
+ add_special_tokens: false
37
+ max_length: 512
38
+
39
+ optimizer:
40
+ _model_target_: torch.optim.Adam
41
+ _model_partial_: true
42
+ lr: 0.001
43
+ weight_decay: 1e-6
44
+
45
+ scheduler:
46
+ _model_target_: transformers.get_scheduler
47
+ _model_partial_: true
48
+ name : "linear"
49
+ num_warmup_steps: 0
50
+
51
+ model:
52
+ _model_target_: model.OpenSLUModel
53
+
54
+ encoder:
55
+ _model_target_: model.encoder.BiEncoder
56
+ intent_encoder:
57
+ _model_target_: model.encoder.AutoEncoder
58
+ encoder_name: lstm
59
+
60
+ embedding:
61
+ embedding_dim: 256
62
+ dropout_rate: 0.4
63
+
64
+ lstm:
65
+ dropout_rate: 0.5
66
+ output_dim: 256
67
+ layer_num: 2
68
+ bidirectional: true
69
+
70
+ return_with_input: true
71
+ return_sentence_level_hidden: false
72
+
73
+ slot_encoder:
74
+ _model_target_: model.encoder.AutoEncoder
75
+ encoder_name: lstm
76
+
77
+ embedding:
78
+ embedding_dim: 256
79
+ dropout_rate: 0.4
80
+
81
+ lstm:
82
+ dropout_rate: 0.5
83
+ output_dim: 256
84
+ layer_num: 2
85
+ bidirectional: true
86
+
87
+ return_with_input: true
88
+ return_sentence_level_hidden: false
89
+
90
+ decoder:
91
+ _model_target_: model.decoder.BaseDecoder
92
+ # teacher_forcing: true
93
+ interaction:
94
+ _model_target_: model.decoder.interaction.BiModelInteraction
95
+ output_dim: 256
96
+ dropout_rate: 0.4
97
+
98
+ intent_classifier:
99
+ _model_target_: model.decoder.classifier.LinearClassifier
100
+ mode: "intent"
101
+ ignore_index: -100
102
+
103
+ slot_classifier:
104
+ _model_target_: model.decoder.classifier.LinearClassifier
105
+ mode: "slot"
106
+ ignore_index: -100
config/reproduction/atis/dca-net.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla P100-PCIE-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/dca-net-atis
15
+
16
+ accelerator:
17
+ use_accelerator: false
18
+
19
+ dataset:
20
+ dataset_name: atis
21
+
22
+ evaluator:
23
+ best_key: EMA
24
+ eval_by_epoch: true
25
+ # eval_step: 1800
26
+ metric:
27
+ - intent_acc
28
+ - slot_f1
29
+ - EMA
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: lstm
55
+
56
+ embedding:
57
+ load_embedding_name: glove.6B.300d.txt
58
+ embedding_dim: 300
59
+ dropout_rate: 0.5
60
+
61
+ lstm:
62
+ dropout_rate: 0.5
63
+ output_dim: 128
64
+ layer_num: 2
65
+ bidirectional: true
66
+ output_dim: "{model.encoder.lstm.output_dim}"
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.DCANetDecoder
72
+ interaction:
73
+ _model_target_: model.decoder.interaction.DCANetInteraction
74
+ output_dim: "{model.encoder.output_dim}"
75
+ attention_dropout: 0.5
76
+ num_attention_heads: 8
77
+
78
+ intent_classifier:
79
+ _model_target_: model.decoder.classifier.LinearClassifier
80
+ mode: "intent"
81
+ input_dim: "{model.encoder.output_dim}"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ input_dim: "{model.encoder.output_dim}"
88
+ ignore_index: -100
config/reproduction/atis/deberta.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/deberta-atis
15
+
16
+ dataset:
17
+ dataset_name: atis
18
+
19
+ evaluator:
20
+ best_key: EMA
21
+ eval_by_epoch: true
22
+ # eval_step: 1800
23
+ metric:
24
+ - intent_acc
25
+ - slot_f1
26
+ - EMA
27
+
28
+ tokenizer:
29
+ _tokenizer_name_: microsoft/deberta-v3-base
30
+ _padding_side_: right
31
+ add_special_tokens: true
32
+ max_length: 512
33
+
34
+ optimizer:
35
+ _model_target_: torch.optim.AdamW
36
+ _model_partial_: true
37
+ lr: 2e-5
38
+ weight_decay: 1e-8
39
+
40
+ scheduler:
41
+ _model_target_: transformers.get_scheduler
42
+ _model_partial_: true
43
+ name : "linear"
44
+ num_warmup_steps: 0
45
+
46
+ model:
47
+ _model_target_: model.open_slu_model.OpenSLUModel
48
+ ignore_index: -100
49
+ encoder:
50
+ _model_target_: model.encoder.AutoEncoder
51
+ encoder_name: microsoft/deberta-v3-base
52
+ output_dim: 768
53
+ return_with_input: true
54
+ return_sentence_level_hidden: true
55
+
56
+ decoder:
57
+ _model_target_: model.decoder.base_decoder.BaseDecoder
58
+ intent_classifier:
59
+ _model_target_: model.decoder.classifier.LinearClassifier
60
+ mode: "intent"
61
+ ignore_index: -100
62
+
63
+
64
+ slot_classifier:
65
+ _model_target_: model.decoder.classifier.LinearClassifier
66
+ mode: "slot"
67
+ ignore_index: -100
config/reproduction/atis/electra.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/electra-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ dataset:
26
+ dataset_name: atis
27
+
28
+ tokenizer:
29
+ _tokenizer_name_: google/electra-small-discriminator
30
+ _padding_side_: right
31
+ add_special_tokens: true
32
+ max_length: 512
33
+
34
+ optimizer:
35
+ _model_target_: torch.optim.AdamW
36
+ _model_partial_: true
37
+ lr: 2e-5
38
+ weight_decay: 1e-8
39
+
40
+ scheduler:
41
+ _model_target_: transformers.get_scheduler
42
+ _model_partial_: true
43
+ name : "linear"
44
+ num_warmup_steps: 0
45
+
46
+ model:
47
+ _model_target_: model.open_slu_model.OpenSLUModel
48
+ ignore_index: -100
49
+ encoder:
50
+ _model_target_: model.encoder.AutoEncoder
51
+ encoder_name: google/electra-small-discriminator
52
+ output_dim: 256
53
+ return_with_input: true
54
+ return_sentence_level_hidden: true
55
+
56
+ decoder:
57
+ _model_target_: model.decoder.base_decoder.BaseDecoder
58
+ intent_classifier:
59
+ _model_target_: model.decoder.classifier.LinearClassifier
60
+ mode: "intent"
61
+ ignore_index: -100
62
+
63
+
64
+ slot_classifier:
65
+ _model_target_: model.decoder.classifier.LinearClassifier
66
+ mode: "slot"
67
+ ignore_index: -100
config/reproduction/atis/joint-bert.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/joint-bert-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: bert-base-uncased
33
+ _padding_side_: right
34
+ _align_mode_: general
35
+ add_special_tokens: true
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 4e-6
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: bert-base-uncased
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/atis/roberta.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB" #Useless info
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/roberta-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: roberta-base
33
+ _padding_side_: right
34
+ add_special_tokens: true
35
+ max_length: 512
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 2e-5
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: roberta-base
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/atis/slot-gated.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/slot-gated-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ ignore_index: -100
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: lstm
56
+
57
+ embedding:
58
+ embedding_dim: 256
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ dropout_rate: 0.5
63
+ output_dim: 256
64
+ layer_num: 2
65
+ bidirectional: true
66
+
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.BaseDecoder
72
+
73
+ interaction:
74
+ _model_target_: model.decoder.interaction.SlotGatedInteraction
75
+ remove_slot_attn: false
76
+ output_dim: 256
77
+ dropout_rate: 0.4
78
+
79
+ intent_classifier:
80
+ _model_target_: model.decoder.classifier.LinearClassifier
81
+ mode: "intent"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ ignore_index: -100
config/reproduction/atis/stack-propagation.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/stack-propagation-atis
15
+ save_mode: save-by-eval # save-by-step
16
+ # save_step: 100
17
+ max_save_num: 1
18
+
19
+ accelerator:
20
+ use_accelerator: false
21
+
22
+ dataset:
23
+ dataset_name: atis
24
+
25
+ evaluator:
26
+ best_key: EMA
27
+ eval_by_epoch: true
28
+ # eval_step: 1800
29
+ metric:
30
+ - intent_acc
31
+ - slot_f1
32
+ - EMA
33
+
34
+ tokenizer:
35
+ _tokenizer_name_: word_tokenizer
36
+ _padding_side_: right
37
+ _align_mode_: fast
38
+ _to_lower_case_: true
39
+ add_special_tokens: false
40
+ max_length: 512
41
+
42
+ optimizer:
43
+ _model_target_: torch.optim.Adam
44
+ _model_partial_: true
45
+ lr: 0.001
46
+ weight_decay: 1e-6
47
+
48
+ scheduler:
49
+ _model_target_: transformers.get_scheduler
50
+ _model_partial_: true
51
+ name : "linear"
52
+ num_warmup_steps: 0
53
+
54
+ model:
55
+ _model_target_: model.OpenSLUModel
56
+
57
+ encoder:
58
+ _model_target_: model.encoder.AutoEncoder
59
+ encoder_name: self-attention-lstm
60
+
61
+ embedding:
62
+ embedding_dim: 256
63
+ dropout_rate: 0.55
64
+
65
+ lstm:
66
+ layer_num: 1
67
+ bidirectional: true
68
+ output_dim: 256
69
+ dropout_rate: 0.5
70
+
71
+ attention:
72
+ hidden_dim: 1024
73
+ output_dim: 128
74
+ dropout_rate: 0.6
75
+
76
+ return_with_input: true
77
+ return_sentence_level_hidden: false
78
+
79
+ decoder:
80
+ _model_target_: model.decoder.StackPropagationDecoder
81
+ interaction:
82
+ _model_target_: model.decoder.interaction.StackInteraction
83
+ differentiable: false
84
+
85
+ intent_classifier:
86
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
87
+ layer_num: 1
88
+ bidirectional: false
89
+ force_ratio: 0.9
90
+ hidden_dim: 64
91
+ embedding_dim: 8
92
+ ignore_index: -100
93
+ dropout_rate: 0.5
94
+ mode: "token-level-intent"
95
+ use_multi: false
96
+ return_sentence_level: true
97
+
98
+ slot_classifier:
99
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
100
+ layer_num: 1
101
+ bidirectional: false
102
+ force_ratio: 0.9
103
+ hidden_dim: 64
104
+ embedding_dim: 32
105
+ ignore_index: -100
106
+ dropout_rate: 0.55
107
+ mode: "slot"
108
+ use_multi: false
109
+ return_sentence_level: false
config/reproduction/mix-atis/agif.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 3080"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 100
11
+ batch_size: 32
12
+ ignore_index: -100
13
+
14
+ model_manager:
15
+ load_dir: null
16
+ save_dir: save/agif-mix-atis
17
+
18
+ accelerator:
19
+ use_accelerator: false
20
+
21
+ dataset:
22
+ dataset_name: mix-atis
23
+
24
+ evaluator:
25
+ best_key: EMA
26
+ eval_by_epoch: true
27
+ # eval_step: 1800
28
+ metric:
29
+ - intent_acc
30
+ - intent_f1
31
+ - slot_f1
32
+ - EMA
33
+
34
+ tokenizer:
35
+ _tokenizer_name_: word_tokenizer
36
+ _padding_side_: right
37
+ _align_mode_: fast
38
+ add_special_tokens: false
39
+ max_length: 512
40
+
41
+ optimizer:
42
+ _model_target_: torch.optim.Adam
43
+ _model_partial_: true
44
+ lr: 0.001
45
+ weight_decay: 1e-6
46
+
47
+ scheduler:
48
+ _model_target_: transformers.get_scheduler
49
+ _model_partial_: true
50
+ name : "linear"
51
+ num_warmup_steps: 0
52
+
53
+ model:
54
+ _model_target_: model.OpenSLUModel
55
+
56
+ encoder:
57
+ _model_target_: model.encoder.AutoEncoder
58
+ encoder_name: self-attention-lstm
59
+
60
+ embedding:
61
+ embedding_dim: 128
62
+ dropout_rate: 0.4
63
+
64
+ lstm:
65
+ layer_num: 1
66
+ bidirectional: true
67
+ output_dim: 256
68
+ dropout_rate: 0.4
69
+
70
+ attention:
71
+ hidden_dim: 1024
72
+ output_dim: 128
73
+ dropout_rate: 0.4
74
+
75
+ unflat_attention:
76
+ dropout_rate: 0.4
77
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
78
+ return_with_input: true
79
+ return_sentence_level_hidden: true
80
+
81
+ decoder:
82
+ _model_target_: model.decoder.AGIFDecoder
83
+ # teacher_forcing: true
84
+ interaction:
85
+ _model_target_: model.decoder.interaction.AGIFInteraction
86
+ intent_embedding_dim: 128
87
+ input_dim: "{model.encoder.output_dim}"
88
+ hidden_dim: 128
89
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
90
+ dropout_rate: 0.4
91
+ alpha: 0.2
92
+ num_heads: 4
93
+ num_layers: 2
94
+ row_normalized: true
95
+
96
+ intent_classifier:
97
+ _model_target_: model.decoder.classifier.MLPClassifier
98
+ mode: "intent"
99
+ mlp:
100
+ - _model_target_: torch.nn.Linear
101
+ in_features: "{model.encoder.output_dim}"
102
+ out_features: 256
103
+ - _model_target_: torch.nn.LeakyReLU
104
+ negative_slope: 0.2
105
+ - _model_target_: torch.nn.Linear
106
+ in_features: 256
107
+ out_features: "{base.intent_label_num}"
108
+ dropout_rate: 0.4
109
+ loss_fn:
110
+ _model_target_: torch.nn.BCEWithLogitsLoss
111
+ use_multi: "{base.multi_intent}"
112
+ multi_threshold: 0.5
113
+ return_sentence_level: true
114
+ ignore_index: -100
115
+ weight: 0.3
116
+
117
+ slot_classifier:
118
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
119
+ mode: "slot"
120
+ input_dim: "{model.encoder.output_dim}"
121
+ layer_num: 1
122
+ bidirectional: false
123
+ force_ratio: 0.9
124
+ hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
125
+ embedding_dim: 128
126
+ # loss_fn:
127
+ # _model_target_: torch.nn.NLLLoss
128
+ ignore_index: -100
129
+ dropout_rate: 0.4
130
+ use_multi: false
131
+ multi_threshold: 0.5
132
+ return_sentence_level: false
133
+ weight: 0.7
config/reproduction/mix-atis/gl-gin.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 300
11
+ batch_size: 32
12
+ ignore_index: -100
13
+
14
+ model_manager:
15
+ load_dir: null
16
+ save_dir: save/gl-gin-mix-atis
17
+
18
+ evaluator:
19
+ best_key: EMA
20
+ eval_by_epoch: true
21
+ # eval_step: 1800
22
+ metric:
23
+ - intent_acc
24
+ - intent_f1
25
+ - slot_f1
26
+ - EMA
27
+
28
+ dataset:
29
+ dataset_name: mix-atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: self-attention-lstm
56
+
57
+ embedding:
58
+ embedding_dim: 128
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ layer_num: 1
63
+ bidirectional: true
64
+ output_dim: 256
65
+ dropout_rate: 0.4
66
+
67
+ attention:
68
+ hidden_dim: 1024
69
+ output_dim: 128
70
+ dropout_rate: 0.4
71
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
72
+ return_with_input: true
73
+ return_sentence_level_hidden: false
74
+
75
+ decoder:
76
+ _model_target_: model.decoder.GLGINDecoder
77
+ dropout_rate: 0.4
78
+ interaction:
79
+ _model_target_: model.decoder.interaction.GLGINInteraction
80
+ intent_embedding_dim: 64
81
+ input_dim: "{model.encoder.output_dim}"
82
+ hidden_dim: 256
83
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
84
+ dropout_rate: 0.4
85
+ alpha: 0.2
86
+ num_heads: 8
87
+ num_layers: 2
88
+ row_normalized: true
89
+ slot_graph_window: 1
90
+ intent_label_num: "{base.intent_label_num}"
91
+
92
+ intent_classifier:
93
+ _model_target_: model.decoder.classifier.MLPClassifier
94
+ mode: "token-level-intent"
95
+ mlp:
96
+ - _model_target_: torch.nn.Linear
97
+ in_features: "{model.encoder.output_dim}"
98
+ out_features: 256
99
+ - _model_target_: torch.nn.LeakyReLU
100
+ negative_slope: 0.2
101
+ - _model_target_: torch.nn.Linear
102
+ in_features: 256
103
+ out_features: "{base.intent_label_num}"
104
+ loss_fn:
105
+ _model_target_: torch.nn.BCEWithLogitsLoss
106
+ dropout_rate: 0.4
107
+ use_multi: "{base.multi_intent}"
108
+ multi_threshold: 0.5
109
+ return_sentence_level: true
110
+ ignore_index: "{base.ignore_index}"
111
+
112
+ slot_classifier:
113
+ _model_target_: model.decoder.classifier.MLPClassifier
114
+ mode: "slot"
115
+ mlp:
116
+ - _model_target_: torch.nn.Linear
117
+ in_features: "{model.decoder.interaction.output_dim}"
118
+ out_features: "{model.decoder.interaction.output_dim}"
119
+ - _model_target_: torch.nn.LeakyReLU
120
+ negative_slope: 0.2
121
+ - _model_target_: torch.nn.Linear
122
+ in_features: "{model.decoder.interaction.output_dim}"
123
+ out_features: "{base.slot_label_num}"
124
+ ignore_index: "{base.ignore_index}"
125
+ dropout_rate: 0.4
126
+ use_multi: false
127
+ multi_threshold: 0.5
128
+ return_sentence_level: false
config/reproduction/mix-atis/vanilla.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base:
2
+ name: "OpenSLUv1"
3
+ multi_intent: true
4
+ train: true
5
+ test: true
6
+ device: cuda
7
+ seed: 42
8
+ epoch_num: 100
9
+ batch_size: 16
10
+ ignore_index: -100
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/vanilla-mix-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - intent_f1
23
+ - slot_f1
24
+ - EMA
25
+
26
+ dataset:
27
+ dataset_name: atis
28
+
29
+ tokenizer:
30
+ _tokenizer_name_: word_tokenizer
31
+ _padding_side_: right
32
+ _align_mode_: fast
33
+ add_special_tokens: false
34
+ max_length: 512
35
+
36
+ optimizer:
37
+ _model_target_: torch.optim.Adam
38
+ _model_partial_: true
39
+ lr: 0.001
40
+ weight_decay: 1e-6
41
+
42
+ scheduler:
43
+ _model_target_: transformers.get_scheduler
44
+ _model_partial_: true
45
+ name : "linear"
46
+ num_warmup_steps: 0
47
+
48
+ model:
49
+ _model_target_: model.OpenSLUModel
50
+
51
+ encoder:
52
+ _model_target_: model.encoder.AutoEncoder
53
+ encoder_name: self-attention-lstm
54
+
55
+ embedding:
56
+ embedding_dim: 128
57
+ dropout_rate: 0.4
58
+
59
+ lstm:
60
+ layer_num: 1
61
+ bidirectional: true
62
+ output_dim: 256
63
+ dropout_rate: 0.4
64
+
65
+ attention:
66
+ hidden_dim: 1024
67
+ output_dim: 128
68
+ dropout_rate: 0.4
69
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
70
+ return_with_input: true
71
+ return_sentence_level_hidden: true
72
+
73
+ decoder:
74
+ _model_target_: model.decoder.BaseDecoder
75
+
76
+ intent_classifier:
77
+ _model_target_: model.decoder.classifier.LinearClassifier
78
+ mode: "intent"
79
+ input_dim: "{model.encoder.output_dim}"
80
+ loss_fn:
81
+ _model_target_: torch.nn.BCEWithLogitsLoss
82
+ use_multi: "{base.multi_intent}"
83
+ multi_threshold: 0.5
84
+ return_sentence_level: true
85
+ ignore_index: "{base.ignore_index}"
86
+
87
+
88
+ slot_classifier:
89
+ _model_target_: model.decoder.classifier.LinearClassifier
90
+ mode: "slot"
91
+ input_dim: "{model.encoder.output_dim}"
92
+ use_multi: false
93
+ multi_threshold: 0.5
94
+ ignore_index: "{base.ignore_index}"
95
+ return_sentence_level: false
config/reproduction/mix-snips/agif.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla P100-PCIE-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 50
11
+ batch_size: 64
12
+ ignore_index: -100
13
+
14
+ model_manager:
15
+ load_dir: null
16
+ save_dir: save/agif-mix-snips
17
+
18
+ evaluator:
19
+ best_key: EMA
20
+ eval_by_epoch: true
21
+ # eval_step: 1800
22
+ metric:
23
+ - intent_acc
24
+ - intent_f1
25
+ - slot_f1
26
+ - EMA
27
+
28
+ accelerator:
29
+ use_accelerator: false
30
+
31
+ dataset:
32
+ dataset_name: mix-snips
33
+
34
+ tokenizer:
35
+ _tokenizer_name_: word_tokenizer
36
+ _padding_side_: right
37
+ _align_mode_: fast
38
+ add_special_tokens: false
39
+ max_length: 512
40
+
41
+ optimizer:
42
+ _model_target_: torch.optim.Adam
43
+ _model_partial_: true
44
+ lr: 0.001
45
+ weight_decay: 1e-6
46
+
47
+ scheduler:
48
+ _model_target_: transformers.get_scheduler
49
+ _model_partial_: true
50
+ name : "linear"
51
+ num_warmup_steps: 0
52
+
53
+ model:
54
+ _model_target_: model.OpenSLUModel
55
+
56
+ encoder:
57
+ _model_target_: model.encoder.AutoEncoder
58
+ encoder_name: self-attention-lstm
59
+
60
+ embedding:
61
+ embedding_dim: 128
62
+ dropout_rate: 0.4
63
+
64
+ lstm:
65
+ layer_num: 1
66
+ bidirectional: true
67
+ output_dim: 256
68
+ dropout_rate: 0.4
69
+
70
+ attention:
71
+ hidden_dim: 1024
72
+ output_dim: 128
73
+ dropout_rate: 0.4
74
+
75
+ unflat_attention:
76
+ dropout_rate: 0.4
77
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
78
+ return_with_input: true
79
+ return_sentence_level_hidden: true
80
+
81
+ decoder:
82
+ _model_target_: model.decoder.AGIFDecoder
83
+ # teacher_forcing: true
84
+ interaction:
85
+ _model_target_: model.decoder.interaction.AGIFInteraction
86
+ intent_embedding_dim: 128
87
+ input_dim: "{model.encoder.output_dim}"
88
+ hidden_dim: 128
89
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
90
+ dropout_rate: 0.4
91
+ alpha: 0.2
92
+ num_heads: 4
93
+ num_layers: 2
94
+ row_normalized: true
95
+
96
+ intent_classifier:
97
+ _model_target_: model.decoder.classifier.MLPClassifier
98
+ mode: "intent"
99
+ mlp:
100
+ - _model_target_: torch.nn.Linear
101
+ in_features: "{model.encoder.output_dim}"
102
+ out_features: 256
103
+ - _model_target_: torch.nn.LeakyReLU
104
+ negative_slope: 0.2
105
+ - _model_target_: torch.nn.Linear
106
+ in_features: 256
107
+ out_features: "{base.intent_label_num}"
108
+ dropout_rate: 0.4
109
+ loss_fn:
110
+ _model_target_: torch.nn.BCEWithLogitsLoss
111
+ use_multi: "{base.multi_intent}"
112
+ multi_threshold: 0.5
113
+ return_sentence_level: true
114
+ ignore_index: -100
115
+ weight: 0.3
116
+
117
+ slot_classifier:
118
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
119
+ mode: "slot"
120
+ input_dim: "{model.encoder.output_dim}"
121
+ layer_num: 1
122
+ bidirectional: false
123
+ force_ratio: 0.9
124
+ hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
125
+ embedding_dim: 128
126
+ ignore_index: -100
127
+ dropout_rate: 0.4
128
+ use_multi: false
129
+ multi_threshold: 0.5
130
+ return_sentence_level: false
131
+ weight: 0.7
config/reproduction/mix-snips/gl-gin.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 50
11
+ batch_size: 32
12
+ ignore_index: -100
13
+
14
+
15
+ model_manager:
16
+ load_dir: null
17
+ save_dir: save/gl-gin-mix-snips
18
+
19
+ evaluator:
20
+ best_key: EMA
21
+ eval_by_epoch: false
22
+ eval_step: 1800
23
+ metric:
24
+ - intent_acc
25
+ - intent_f1
26
+ - slot_f1
27
+ - EMA
28
+
29
+ dataset:
30
+ dataset_name: mix-snips
31
+
32
+ tokenizer:
33
+ _tokenizer_name_: word_tokenizer
34
+ _padding_side_: right
35
+ _align_mode_: fast
36
+ add_special_tokens: false
37
+ max_length: 512
38
+
39
+ optimizer:
40
+ _model_target_: torch.optim.Adam
41
+ _model_partial_: true
42
+ lr: 0.001
43
+ weight_decay: 1e-6
44
+
45
+ scheduler:
46
+ _model_target_: transformers.get_scheduler
47
+ _model_partial_: true
48
+ name : "linear"
49
+ num_warmup_steps: 0
50
+
51
+ model:
52
+ _model_target_: model.OpenSLUModel
53
+
54
+ encoder:
55
+ _model_target_: model.encoder.AutoEncoder
56
+ encoder_name: self-attention-lstm
57
+
58
+ embedding:
59
+ embedding_dim: 128
60
+ dropout_rate: 0.4
61
+
62
+ lstm:
63
+ layer_num: 2
64
+ bidirectional: true
65
+ output_dim: 256
66
+ dropout_rate: 0.4
67
+
68
+ attention:
69
+ hidden_dim: 1024
70
+ output_dim: 128
71
+ dropout_rate: 0.4
72
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
73
+ return_with_input: true
74
+ return_sentence_level_hidden: false
75
+
76
+ decoder:
77
+ _model_target_: model.decoder.GLGINDecoder
78
+ dropout_rate: 0.4
79
+ interaction:
80
+ _model_target_: model.decoder.interaction.GLGINInteraction
81
+ intent_embedding_dim: 256
82
+ input_dim: "{model.encoder.output_dim}"
83
+ hidden_dim: 256
84
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
85
+ dropout_rate: 0.4
86
+ alpha: 0.2
87
+ num_heads: 4
88
+ num_layers: 2
89
+ row_normalized: true
90
+ slot_graph_window: 1
91
+ intent_label_num: "{base.intent_label_num}"
92
+
93
+ intent_classifier:
94
+ _model_target_: model.decoder.classifier.MLPClassifier
95
+ mode: "token-level-intent"
96
+ mlp:
97
+ - _model_target_: torch.nn.Linear
98
+ in_features: "{model.encoder.output_dim}"
99
+ out_features: 256
100
+ - _model_target_: torch.nn.LeakyReLU
101
+ negative_slope: 0.2
102
+ - _model_target_: torch.nn.Linear
103
+ in_features: 256
104
+ out_features: "{base.intent_label_num}"
105
+ loss_fn:
106
+ _model_target_: torch.nn.BCEWithLogitsLoss
107
+ dropout_rate: 0.4
108
+ use_multi: "{base.multi_intent}"
109
+ multi_threshold: 0.5
110
+ return_sentence_level: true
111
+ ignore_index: "{base.ignore_index}"
112
+ weight: 0.2
113
+
114
+ slot_classifier:
115
+ _model_target_: model.decoder.classifier.MLPClassifier
116
+ mode: "slot"
117
+ mlp:
118
+ - _model_target_: torch.nn.Linear
119
+ in_features: "{model.decoder.interaction.output_dim}"
120
+ out_features: "{model.decoder.interaction.output_dim}"
121
+ - _model_target_: torch.nn.LeakyReLU
122
+ negative_slope: 0.2
123
+ - _model_target_: torch.nn.Linear
124
+ in_features: "{model.decoder.interaction.output_dim}"
125
+ out_features: "{base.slot_label_num}"
126
+ ignore_index: "{base.ignore_index}"
127
+ dropout_rate: 0.4
128
+ use_multi: false
129
+ multi_threshold: 0.5
130
+ weight: 0.8
131
+ return_sentence_level: false
config/reproduction/mix-snips/vanilla.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base:
2
+ name: "OpenSLUv1"
3
+ multi_intent: true
4
+ train: true
5
+ test: true
6
+ device: cuda
7
+ seed: 42
8
+ epoch_num: 100
9
+ batch_size: 16
10
+ ignore_index: -100
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/vanilla-mix-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - intent_f1
23
+ - slot_f1
24
+ - EMA
25
+
26
+ dataset:
27
+ dataset_name: atis
28
+
29
+ tokenizer:
30
+ _tokenizer_name_: word_tokenizer
31
+ _padding_side_: right
32
+ _align_mode_: fast
33
+ add_special_tokens: false
34
+ max_length: 512
35
+
36
+ optimizer:
37
+ _model_target_: torch.optim.Adam
38
+ _model_partial_: true
39
+ lr: 0.001
40
+ weight_decay: 1e-6
41
+
42
+ scheduler:
43
+ _model_target_: transformers.get_scheduler
44
+ _model_partial_: true
45
+ name : "linear"
46
+ num_warmup_steps: 0
47
+
48
+ model:
49
+ _model_target_: model.OpenSLUModel
50
+
51
+ encoder:
52
+ _model_target_: model.encoder.AutoEncoder
53
+ encoder_name: self-attention-lstm
54
+
55
+ embedding:
56
+ embedding_dim: 128
57
+ dropout_rate: 0.4
58
+
59
+ lstm:
60
+ layer_num: 1
61
+ bidirectional: true
62
+ output_dim: 256
63
+ dropout_rate: 0.4
64
+
65
+ attention:
66
+ hidden_dim: 1024
67
+ output_dim: 128
68
+ dropout_rate: 0.4
69
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
70
+ return_with_input: true
71
+ return_sentence_level_hidden: true
72
+
73
+ decoder:
74
+ _model_target_: model.decoder.BaseDecoder
75
+
76
+ intent_classifier:
77
+ _model_target_: model.decoder.classifier.LinearClassifier
78
+ mode: "intent"
79
+ input_dim: "{model.encoder.output_dim}"
80
+ loss_fn:
81
+ _model_target_: torch.nn.BCEWithLogitsLoss
82
+ use_multi: "{base.multi_intent}"
83
+ multi_threshold: 0.5
84
+ return_sentence_level: true
85
+ ignore_index: "{base.ignore_index}"
86
+
87
+
88
+ slot_classifier:
89
+ _model_target_: model.decoder.classifier.LinearClassifier
90
+ mode: "slot"
91
+ input_dim: "{model.encoder.output_dim}"
92
+ use_multi: false
93
+ multi_threshold: 0.5
94
+ ignore_index: "{base.ignore_index}"
95
+ return_sentence_level: false
config/reproduction/snips/bi-model.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/bi-model-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+
53
+ encoder:
54
+ _model_target_: model.encoder.BiEncoder
55
+ intent_encoder:
56
+ _model_target_: model.encoder.AutoEncoder
57
+ encoder_name: lstm
58
+
59
+ embedding:
60
+ embedding_dim: 256
61
+ dropout_rate: 0.5
62
+
63
+ lstm:
64
+ dropout_rate: 0.5
65
+ output_dim: 256
66
+ layer_num: 2
67
+ bidirectional: true
68
+
69
+ return_with_input: true
70
+ return_sentence_level_hidden: false
71
+
72
+ slot_encoder:
73
+ _model_target_: model.encoder.AutoEncoder
74
+ encoder_name: lstm
75
+
76
+ embedding:
77
+ embedding_dim: 256
78
+ dropout_rate: 0.5
79
+
80
+ lstm:
81
+ dropout_rate: 0.5
82
+ output_dim: 256
83
+ layer_num: 2
84
+ bidirectional: true
85
+
86
+ return_with_input: true
87
+ return_sentence_level_hidden: false
88
+
89
+ decoder:
90
+ _model_target_: model.decoder.BaseDecoder
91
+ interaction:
92
+ _model_target_: model.decoder.interaction.BiModelInteraction
93
+ output_dim: 256
94
+ dropout_rate: 0.5
95
+
96
+ intent_classifier:
97
+ _model_target_: model.decoder.classifier.LinearClassifier
98
+ mode: "intent"
99
+ ignore_index: -100
100
+
101
+ slot_classifier:
102
+ _model_target_: model.decoder.classifier.LinearClassifier
103
+ mode: "slot"
104
+ ignore_index: -100
config/reproduction/snips/dca_net.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/dca-net-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: lstm
55
+
56
+ embedding:
57
+ load_embedding_name: glove.6B.300d.txt
58
+ embedding_dim: 300
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ dropout_rate: 0.4
63
+ output_dim: 128
64
+ layer_num: 2
65
+ bidirectional: true
66
+ output_dim: "{model.encoder.lstm.output_dim}"
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.DCANetDecoder
72
+ interaction:
73
+ _model_target_: model.decoder.interaction.DCANetInteraction
74
+ output_dim: "{model.encoder.output_dim}"
75
+ attention_dropout: 0.4
76
+ num_attention_heads: 8
77
+
78
+ intent_classifier:
79
+ _model_target_: model.decoder.classifier.LinearClassifier
80
+ mode: "intent"
81
+ input_dim: "{model.encoder.output_dim}"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ input_dim: "{model.encoder.output_dim}"
88
+ ignore_index: -100
config/reproduction/snips/deberta.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/deberta-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: microsoft/deberta-v3-base
33
+ _padding_side_: right
34
+ add_special_tokens: true
35
+ max_length: 512
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 2e-5
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: microsoft/deberta-v3-base
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/snips/electra.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+ base:
3
+ name: "OpenSLUv1"
4
+ train: true
5
+ test: true
6
+ device: cuda
7
+ seed: 42
8
+ epoch_num: 300
9
+ batch_size: 32
10
+
11
+ model_manager:
12
+ load_dir: null
13
+ save_dir: save/electra-snips
14
+
15
+ evaluator:
16
+ best_key: EMA
17
+ eval_by_epoch: true
18
+ # eval_step: 1800
19
+ metric:
20
+ - intent_acc
21
+ - slot_f1
22
+ - EMA
23
+
24
+ accelerator:
25
+ use_accelerator: false
26
+
27
+ dataset:
28
+ dataset_name: snips
29
+
30
+ tokenizer:
31
+ _tokenizer_name_: google/electra-small-discriminator
32
+ _padding_side_: right
33
+ add_special_tokens: true
34
+ max_length: 512
35
+
36
+ optimizer:
37
+ _model_target_: torch.optim.AdamW
38
+ _model_partial_: true
39
+ lr: 2e-5
40
+ weight_decay: 1e-8
41
+
42
+ scheduler:
43
+ _model_target_: transformers.get_scheduler
44
+ _model_partial_: true
45
+ name : "linear"
46
+ num_warmup_steps: 0
47
+
48
+ model:
49
+ _model_target_: model.open_slu_model.OpenSLUModel
50
+ ignore_index: -100
51
+ encoder:
52
+ _model_target_: model.encoder.AutoEncoder
53
+ encoder_name: google/electra-small-discriminator
54
+ output_dim: 256
55
+ return_with_input: true
56
+ return_sentence_level_hidden: true
57
+
58
+ decoder:
59
+ _model_target_: model.decoder.base_decoder.BaseDecoder
60
+ intent_classifier:
61
+ _model_target_: model.decoder.classifier.LinearClassifier
62
+ mode: "intent"
63
+ ignore_index: -100
64
+
65
+
66
+ slot_classifier:
67
+ _model_target_: model.decoder.classifier.LinearClassifier
68
+ mode: "slot"
69
+ ignore_index: -100
config/reproduction/snips/joint-bert.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/joint-bert-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ metric:
32
+ - intent_acc
33
+ - slot_f1
34
+ - EMA
35
+
36
+ tokenizer:
37
+ _tokenizer_name_: bert-base-uncased
38
+ _padding_side_: right
39
+ _align_mode_: general
40
+ add_special_tokens: true
41
+
42
+ optimizer:
43
+ _model_target_: torch.optim.AdamW
44
+ _model_partial_: true
45
+ lr: 4e-6
46
+ weight_decay: 1e-8
47
+
48
+ scheduler:
49
+ _model_target_: transformers.get_scheduler
50
+ _model_partial_: true
51
+ name : "linear"
52
+ num_warmup_steps: 0
53
+
54
+ model:
55
+ _model_target_: model.open_slu_model.OpenSLUModel
56
+ ignore_index: -100
57
+ encoder:
58
+ _model_target_: model.encoder.AutoEncoder
59
+ encoder_name: bert-base-uncased
60
+ output_dim: 768
61
+ return_with_input: true
62
+ return_sentence_level_hidden: true
63
+
64
+ decoder:
65
+ _model_target_: model.decoder.base_decoder.BaseDecoder
66
+ intent_classifier:
67
+ _model_target_: model.decoder.classifier.LinearClassifier
68
+ mode: "intent"
69
+ ignore_index: -100
70
+
71
+
72
+ slot_classifier:
73
+ _model_target_: model.decoder.classifier.LinearClassifier
74
+ mode: "slot"
75
+ ignore_index: -100
config/reproduction/snips/roberta.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/roberta-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: roberta-base
33
+ _padding_side_: right
34
+ add_special_tokens: true
35
+ max_length: 512
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 2e-5
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: roberta-base
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/snips/slot-gated.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/slot-gated-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ ignore_index: -100
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: lstm
56
+
57
+ embedding:
58
+ embedding_dim: 256
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ dropout_rate: 0.5
63
+ output_dim: 256
64
+ layer_num: 2
65
+ bidirectional: true
66
+
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.BaseDecoder
72
+
73
+ interaction:
74
+ _model_target_: model.decoder.interaction.SlotGatedInteraction
75
+ remove_slot_attn: false
76
+ output_dim: 256
77
+ dropout_rate: 0.4
78
+
79
+ intent_classifier:
80
+ _model_target_: model.decoder.classifier.LinearClassifier
81
+ mode: "intent"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ ignore_index: -100
config/reproduction/snips/stack-propagation.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/stack-propagation-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: self-attention-lstm
56
+
57
+ embedding:
58
+ embedding_dim: 256
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ layer_num: 1
63
+ bidirectional: true
64
+ output_dim: 256
65
+ dropout_rate: 0.4
66
+
67
+ attention:
68
+ hidden_dim: 1024
69
+ output_dim: 128
70
+ dropout_rate: 0.4
71
+
72
+ return_with_input: true
73
+ return_sentence_level_hidden: false
74
+
75
+ decoder:
76
+ _model_target_: model.decoder.StackPropagationDecoder
77
+ interaction:
78
+ _model_target_: model.decoder.interaction.StackInteraction
79
+ differentiable: false
80
+
81
+ intent_classifier:
82
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
83
+ layer_num: 1
84
+ bidirectional: false
85
+ force_ratio: 0.9
86
+ hidden_dim: 64
87
+ embedding_dim: 8
88
+ ignore_index: -100
89
+ dropout_rate: 0.4
90
+ mode: "token-level-intent"
91
+ use_multi: false
92
+ return_sentence_level: true
93
+
94
+ slot_classifier:
95
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
96
+ layer_num: 1
97
+ bidirectional: false
98
+ force_ratio: 0.9
99
+ hidden_dim: 64
100
+ embedding_dim: 32
101
+ ignore_index: -100
102
+ dropout_rate: 0.4
103
+ mode: "slot"
104
+ use_multi: false
105
+ return_sentence_level: false
config/visual.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ host: 127.0.0.1
2
+ port: 7861
3
+
4
+ is_push_to_public: true
5
+ output_path: save/stack/outputs.jsonl
6
+ page-size: 2
model/decoder/base_decoder.py CHANGED
@@ -16,7 +16,7 @@ class BaseDecoder(nn.Module):
16
 
17
  Notice: t is often only necessary to change this module and its sub-modules
18
  """
19
- def __init__(self, intent_classifier, slot_classifier, interaction=None):
20
  super().__init__()
21
  self.intent_classifier = intent_classifier
22
  self.slot_classifier = slot_classifier
@@ -33,7 +33,13 @@ class BaseDecoder(nn.Module):
33
  """
34
  if self.interaction is not None:
35
  hidden = self.interaction(hidden)
36
- return OutputData(self.intent_classifier(hidden), self.slot_classifier(hidden))
 
 
 
 
 
 
37
 
38
  def decode(self, output: OutputData, target: InputData = None):
39
  """decode output logits
@@ -45,7 +51,12 @@ class BaseDecoder(nn.Module):
45
  Returns:
46
  List: decoded sequence ids
47
  """
48
- return OutputData(self.intent_classifier.decode(output, target), self.slot_classifier.decode(output, target))
 
 
 
 
 
49
 
50
  def compute_loss(self, pred: OutputData, target: InputData, compute_intent_loss=True, compute_slot_loss=True):
51
  """compute loss.
@@ -60,16 +71,18 @@ class BaseDecoder(nn.Module):
60
  Returns:
61
  Tensor: loss result
62
  """
63
- intent_loss = self.intent_classifier.compute_loss(pred, target) if compute_intent_loss else None
64
- slot_loss = self.slot_classifier.compute_loss(pred, target) if compute_slot_loss else None
65
- slot_weight = self.slot_classifier.config.get("weight")
66
- slot_weight = slot_weight if slot_weight is not None else 1.
67
- intent_weight = self.intent_classifier.config.get("weight")
68
- intent_weight = intent_weight if intent_weight is not None else 1.
69
  loss = 0
70
- if intent_loss is not None:
 
 
 
 
 
71
  loss += intent_loss * intent_weight
72
- if slot_loss is not None:
 
 
 
73
  loss += slot_loss * slot_weight
74
  return loss, intent_loss, slot_loss
75
 
 
16
 
17
  Notice: t is often only necessary to change this module and its sub-modules
18
  """
19
+ def __init__(self, intent_classifier=None, slot_classifier=None, interaction=None):
20
  super().__init__()
21
  self.intent_classifier = intent_classifier
22
  self.slot_classifier = slot_classifier
 
33
  """
34
  if self.interaction is not None:
35
  hidden = self.interaction(hidden)
36
+ intent = None
37
+ slot = None
38
+ if self.intent_classifier is not None:
39
+ intent = self.intent_classifier(hidden)
40
+ if self.slot_classifier is not None:
41
+ slot = self.slot_classifier(hidden)
42
+ return OutputData(intent, slot)
43
 
44
  def decode(self, output: OutputData, target: InputData = None):
45
  """decode output logits
 
51
  Returns:
52
  List: decoded sequence ids
53
  """
54
+ intent, slot = None, None
55
+ if self.intent_classifier is not None:
56
+ intent = self.intent_classifier.decode(output, target)
57
+ if self.slot_classifier is not None:
58
+ slot = self.slot_classifier.decode(output, target)
59
+ return OutputData(intent, slot)
60
 
61
  def compute_loss(self, pred: OutputData, target: InputData, compute_intent_loss=True, compute_slot_loss=True):
62
  """compute loss.
 
71
  Returns:
72
  Tensor: loss result
73
  """
 
 
 
 
 
 
74
  loss = 0
75
+ intent_loss = None
76
+ slot_loss = None
77
+ if self.intent_classifier is not None:
78
+ intent_loss = self.intent_classifier.compute_loss(pred, target) if compute_intent_loss else None
79
+ intent_weight = self.intent_classifier.config.get("weight")
80
+ intent_weight = intent_weight if intent_weight is not None else 1.
81
  loss += intent_loss * intent_weight
82
+ if self.slot_classifier is not None:
83
+ slot_loss = self.slot_classifier.compute_loss(pred, target) if compute_slot_loss else None
84
+ slot_weight = self.slot_classifier.config.get("weight")
85
+ slot_weight = slot_weight if slot_weight is not None else 1.
86
  loss += slot_loss * slot_weight
87
  return loss, intent_loss, slot_loss
88
 
model/encoder/auto_encoder.py CHANGED
@@ -2,7 +2,7 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-01-26 17:46:10
6
  Description:
7
 
8
  '''
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-18 19:33:34
6
  Description:
7
 
8
  '''
model/encoder/non_pretrained_encoder.py CHANGED
@@ -2,7 +2,7 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-01-30 15:00:29
6
  Description: non-pretrained encoder model
7
 
8
  '''
@@ -50,7 +50,7 @@ class NonPretrainedEncoder(BaseEncoder):
50
  # Embedding Initialization
51
  embed_config = config["embedding"]
52
  self.__embedding_dim = embed_config["embedding_dim"]
53
- if embed_config.get("load_embedding_name"):
54
  self.__embedding_layer = nn.Embedding.from_pretrained(embed_config["embedding_matrix"], padding_idx=0)
55
  else:
56
  self.__embedding_layer = nn.Embedding(
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-17 21:08:19
6
  Description: non-pretrained encoder model
7
 
8
  '''
 
50
  # Embedding Initialization
51
  embed_config = config["embedding"]
52
  self.__embedding_dim = embed_config["embedding_dim"]
53
+ if embed_config.get("load_embedding_name") and embed_config.get("embedding_matrix"):
54
  self.__embedding_layer = nn.Embedding.from_pretrained(embed_config["embedding_matrix"], padding_idx=0)
55
  else:
56
  self.__embedding_layer = nn.Embedding(
model/encoder/pretrained_encoder.py CHANGED
@@ -2,11 +2,12 @@
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
- LastEditTime: 2023-01-26 17:18:01
6
  Description: pretrained encoder model
7
 
8
  '''
9
- from transformers import AutoModel
 
10
 
11
  from common.utils import InputData, HiddenData
12
  from model.encoder.base_encoder import BaseEncoder
@@ -21,7 +22,11 @@ class PretrainedEncoder(BaseEncoder):
21
  encoder_name (str): pretrained model name in hugging face.
22
  """
23
  super().__init__(**config)
24
- self.encoder = AutoModel.from_pretrained(config["encoder_name"])
 
 
 
 
25
 
26
  def forward(self, inputs: InputData):
27
  output = self.encoder(**inputs.get_inputs())
 
2
  Author: Qiguang Chen
3
  Date: 2023-01-11 10:39:26
4
  LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-18 17:38:30
6
  Description: pretrained encoder model
7
 
8
  '''
9
+ from transformers import AutoModel, AutoConfig
10
+ from common import utils
11
 
12
  from common.utils import InputData, HiddenData
13
  from model.encoder.base_encoder import BaseEncoder
 
22
  encoder_name (str): pretrained model name in hugging face.
23
  """
24
  super().__init__(**config)
25
+ if self.config.get("_is_check_point_"):
26
+ self.encoder = utils.instantiate(config["pretrained_model"], target="_pretrained_model_target_")
27
+ # print(self.encoder)
28
+ else:
29
+ self.encoder = AutoModel.from_pretrained(config["encoder_name"])
30
 
31
  def forward(self, inputs: InputData):
32
  output = self.encoder(**inputs.get_inputs())