Add model files
Browse files- README.md +18 -0
- config.json +10 -0
- merges.txt +0 -0
- model.bin +3 -0
- shared_vocabulary.json +0 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +16 -0
- vocab.json +0 -0
README.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
2 |
license: mit
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
tags:
|
5 |
+
- translation
|
6 |
+
- ctranslate2
|
7 |
license: mit
|
8 |
+
base_model: kworts/BARTxiv
|
9 |
---
|
10 |
+
# BARTxiv-ct2
|
11 |
+
This is a version of [kworts/BARTxiv](https://huggingface.co/kworts/BARTxiv) converted for use with [CTranslate2](https://github.com/OpenNMT/CTranslate2).
|
12 |
+
The conversion was performed using the following command:
|
13 |
+
|
14 |
+
```
|
15 |
+
ct2-transformers-converter --model kworts/BARTxiv --output_dir BARTxiv-ct2 \
|
16 |
+
--copy_files merges.txt special_tokens_map.json tokenizer.json \
|
17 |
+
tokenizer_config.json vocab.json
|
18 |
+
```
|
19 |
+
|
20 |
+
## License
|
21 |
+
This adaptation is based on [kworts/BARTxiv](https://huggingface.co/kworts/BARTxiv), originally provided under the MIT License. Modifications were made for compatibility with CTranslate2. Despite these modifications, this adapted version continues to be distributed under the MIT License, honoring the original licensing terms.
|
config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_source_bos": false,
|
3 |
+
"add_source_eos": false,
|
4 |
+
"bos_token": "<s>",
|
5 |
+
"decoder_start_token": "</s>",
|
6 |
+
"eos_token": "</s>",
|
7 |
+
"layer_norm_epsilon": null,
|
8 |
+
"multi_query_attention": false,
|
9 |
+
"unk_token": "<unk>"
|
10 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85d054a0747dc07577a0b009b0fc73ddc09330f110eca85f738a438a308d8757
|
3 |
+
size 1625167361
|
shared_vocabulary.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<s>",
|
4 |
+
"cls_token": "<s>",
|
5 |
+
"eos_token": "</s>",
|
6 |
+
"errors": "replace",
|
7 |
+
"mask_token": "<mask>",
|
8 |
+
"model_max_length": 1024,
|
9 |
+
"name_or_path": "facebook/bart-large-cnn",
|
10 |
+
"pad_token": "<pad>",
|
11 |
+
"sep_token": "</s>",
|
12 |
+
"special_tokens_map_file": null,
|
13 |
+
"tokenizer_class": "BartTokenizer",
|
14 |
+
"trim_offsets": true,
|
15 |
+
"unk_token": "<unk>"
|
16 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|