File size: 2,338 Bytes
49a8008
a03bd42
49a8008
 
 
 
 
 
 
 
 
 
 
 
a03bd42
49a8008
a03bd42
49a8008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a03bd42
49a8008
 
a03bd42
 
49a8008
 
 
a03bd42
49a8008
 
a03bd42
49a8008
a03bd42
49a8008
a03bd42
7a85e43
 
 
 
 
49a8008
 
 
 
 
7a85e43
 
a03bd42
7a85e43
49a8008
 
a03bd42
49a8008
 
 
 
 
a03bd42
49a8008
a03bd42
49a8008
a03bd42
49a8008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a03bd42
49a8008
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
{
  "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
  "activation_dropout": 0.0,
  "adapter_attn_dim": null,
  "adapter_kernel_size": 3,
  "adapter_stride": 2,
  "add_adapter": false,
  "apply_spec_augment": true,
  "architectures": [
    "Wav2Vec2ForCTC"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 1,
  "classifier_proj_size": 256,
  "codevector_dim": 768,
  "contrastive_logits_temperature": 0.1,
  "conv_bias": true,
  "conv_dim": [
    512,
    512,
    512,
    512,
    512,
    512,
    512
  ],
  "conv_kernel": [
    10,
    3,
    3,
    3,
    3,
    2,
    2
  ],
  "conv_stride": [
    5,
    2,
    2,
    2,
    2,
    2,
    2
  ],
  "ctc_loss_reduction": "mean",
  "ctc_zero_infinity": false,
  "diversity_loss_weight": 0.1,
  "do_stable_layer_norm": true,
  "eos_token_id": 2,
  "feat_extract_activation": "gelu",
  "feat_extract_dropout": 0.0,
  "feat_extract_norm": "layer",
  "feat_proj_dropout": 0.1,
  "feat_quantizer_dropout": 0.0,
  "final_dropout": 0.0,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "layerdrop": 0.1,
  "mask_channel_length": 10,
  "mask_channel_min_space": 1,
  "mask_channel_other": 0.0,
  "mask_channel_prob": 0.0,
  "mask_channel_selection": "static",
  "mask_feature_length": 10,
  "mask_feature_min_masks": 0,
  "mask_feature_prob": 0.0,
  "mask_time_length": 10,
  "mask_time_min_masks": 2,
  "mask_time_min_space": 1,
  "mask_time_other": 0.0,
  "mask_time_prob": 0.075,
  "mask_time_selection": "static",
  "model_type": "wav2vec2",
  "num_adapter_layers": 3,
  "num_attention_heads": 16,
  "num_codevector_groups": 2,
  "num_codevectors_per_group": 320,
  "num_conv_pos_embedding_groups": 16,
  "num_conv_pos_embeddings": 128,
  "num_feat_extract_layers": 7,
  "num_hidden_layers": 24,
  "num_negatives": 100,
  "output_hidden_size": 1024,
  "pad_token_id": 62,
  "proj_codevector_dim": 768,
  "tdnn_dilation": [
    1,
    2,
    3,
    1,
    1
  ],
  "tdnn_dim": [
    512,
    512,
    512,
    512,
    1500
  ],
  "tdnn_kernel": [
    5,
    3,
    3,
    1,
    1
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.37.1",
  "use_weighted_layer_sum": false,
  "vocab_size": 65,
  "xvector_output_dim": 512
}