arxyzan commited on
Commit
f7da8bd
·
verified ·
1 Parent(s): 73ab749

Remove deprecated tokenizer config fields

Browse files
Files changed (1) hide show
  1. preprocessor/tokenizer_config.yaml +4 -4
preprocessor/tokenizer_config.yaml CHANGED
@@ -1,17 +1,17 @@
1
  name: whisper_bpe_tokenizer
2
  config_type: preprocessor
3
- max_length: 448
4
- truncation: longest_first
5
  truncation_side: right
6
- stride: 0
7
- padding: longest
8
  padding_side: right
 
9
  pad_to_multiple_of: 0
10
  pad_token_type_id: 0
11
  bos_token: <|startoftranscript|>
12
  eos_token: <|endoftext|>
13
  unk_token: <|endoftext|>
 
14
  pad_token: <|endoftext|>
 
 
15
  additional_special_tokens:
16
  - <|endoftext|>
17
  - <|endoftext|>
 
1
  name: whisper_bpe_tokenizer
2
  config_type: preprocessor
 
 
3
  truncation_side: right
 
 
4
  padding_side: right
5
+ stride: 0
6
  pad_to_multiple_of: 0
7
  pad_token_type_id: 0
8
  bos_token: <|startoftranscript|>
9
  eos_token: <|endoftext|>
10
  unk_token: <|endoftext|>
11
+ sep_token: <sep>
12
  pad_token: <|endoftext|>
13
+ cls_token: <cls>
14
+ mask_token: <mask>
15
  additional_special_tokens:
16
  - <|endoftext|>
17
  - <|endoftext|>