Spaces:
Build error
Build error
anonymoussubmitter222
commited on
Commit
·
e6d3230
1
Parent(s):
14f4c4a
cleaned description
Browse files- app.py +10 -5
- partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/91b480b21090748d217436035a64f3662f2f9366e19725038e7fb069dbe2b8bd.b98702ed4e3c9c63563997dd1dcfc42d05132b32a020cf4da95ffe1227dd6d7a.lock +0 -0
- partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50 +99 -0
- partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.json +1 -0
- partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.lock +0 -0
app.py
CHANGED
@@ -311,10 +311,6 @@ class ASR(sb.Brain):
|
|
311 |
|
312 |
label_encoder = sb.dataio.encoder.CTCTextEncoder()
|
313 |
|
314 |
-
train_data, valid_data, test_datasets, label_encoder = dataio_prepare(
|
315 |
-
hparams
|
316 |
-
)
|
317 |
-
|
318 |
|
319 |
# We dynamicaly add the tokenizer to our brain class.
|
320 |
# NB: This tokenizer corresponds to the one used for the LM!!
|
@@ -331,7 +327,16 @@ asr_brain = ASR(
|
|
331 |
run_opts=run_opts,
|
332 |
checkpointer=hparams["checkpointer"],
|
333 |
)
|
334 |
-
description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
title = "Tunisian Arabic Automatic Speech Recognition"
|
336 |
|
337 |
|
|
|
311 |
|
312 |
label_encoder = sb.dataio.encoder.CTCTextEncoder()
|
313 |
|
|
|
|
|
|
|
|
|
314 |
|
315 |
# We dynamicaly add the tokenizer to our brain class.
|
316 |
# NB: This tokenizer corresponds to the one used for the LM!!
|
|
|
327 |
run_opts=run_opts,
|
328 |
checkpointer=hparams["checkpointer"],
|
329 |
)
|
330 |
+
description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs Tunisian transcriptions written in Arabic alphabet. Since the language is unwritten, the words' transcriptions may vary. This model is presented by Salah Zaiem, PhD candidate, contact : [email protected]
|
331 |
+
|
332 |
+
|
333 |
+
Due to the nature of the available training data, the model may encounter issues when dealing with foreign words. So, and while it is common for Tunisian speakers to use (mainly french) foreign words, these will lead to more errors. We may work on improving this in further models.
|
334 |
+
|
335 |
+
|
336 |
+
Run is done on CPU to keep it free in this space. This leads to quite long running times on long sequences. If for your project or research, you want to transcribe long sequences, feel free to drop an email here : [email protected]
|
337 |
+
|
338 |
+
|
339 |
+
"""
|
340 |
title = "Tunisian Arabic Automatic Speech Recognition"
|
341 |
|
342 |
|
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/91b480b21090748d217436035a64f3662f2f9366e19725038e7fb069dbe2b8bd.b98702ed4e3c9c63563997dd1dcfc42d05132b32a020cf4da95ffe1227dd6d7a.lock
ADDED
File without changes
|
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./wavlm-large/",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": false,
|
7 |
+
"apply_spec_augment": true,
|
8 |
+
"architectures": [
|
9 |
+
"WavLMModel"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.1,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 256,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"contrastive_logits_temperature": 0.1,
|
16 |
+
"conv_bias": false,
|
17 |
+
"conv_dim": [
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"conv_kernel": [
|
27 |
+
10,
|
28 |
+
3,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
2,
|
33 |
+
2
|
34 |
+
],
|
35 |
+
"conv_stride": [
|
36 |
+
5,
|
37 |
+
2,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2
|
43 |
+
],
|
44 |
+
"ctc_loss_reduction": "sum",
|
45 |
+
"ctc_zero_infinity": false,
|
46 |
+
"diversity_loss_weight": 0.1,
|
47 |
+
"do_stable_layer_norm": true,
|
48 |
+
"eos_token_id": 2,
|
49 |
+
"feat_extract_activation": "gelu",
|
50 |
+
"feat_extract_dropout": 0.0,
|
51 |
+
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.1,
|
53 |
+
"feat_quantizer_dropout": 0.0,
|
54 |
+
"final_dropout": 0.0,
|
55 |
+
"gradient_checkpointing": false,
|
56 |
+
"hidden_act": "gelu",
|
57 |
+
"hidden_dropout": 0.1,
|
58 |
+
"hidden_size": 1024,
|
59 |
+
"initializer_range": 0.02,
|
60 |
+
"intermediate_size": 4096,
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.1,
|
63 |
+
"mask_channel_length": 10,
|
64 |
+
"mask_channel_min_space": 1,
|
65 |
+
"mask_channel_other": 0.0,
|
66 |
+
"mask_channel_prob": 0.0,
|
67 |
+
"mask_channel_selection": "static",
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_min_space": 1,
|
74 |
+
"mask_time_other": 0.0,
|
75 |
+
"mask_time_prob": 0.075,
|
76 |
+
"mask_time_selection": "static",
|
77 |
+
"max_bucket_distance": 800,
|
78 |
+
"model_type": "wavlm",
|
79 |
+
"num_adapter_layers": 3,
|
80 |
+
"num_attention_heads": 16,
|
81 |
+
"num_buckets": 320,
|
82 |
+
"num_codevector_groups": 2,
|
83 |
+
"num_codevectors_per_group": 320,
|
84 |
+
"num_conv_pos_embedding_groups": 16,
|
85 |
+
"num_conv_pos_embeddings": 128,
|
86 |
+
"num_ctc_classes": 80,
|
87 |
+
"num_feat_extract_layers": 7,
|
88 |
+
"num_hidden_layers": 24,
|
89 |
+
"num_negatives": 100,
|
90 |
+
"output_hidden_size": 1024,
|
91 |
+
"pad_token_id": 0,
|
92 |
+
"proj_codevector_dim": 768,
|
93 |
+
"replace_prob": 0.5,
|
94 |
+
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
95 |
+
"torch_dtype": "float32",
|
96 |
+
"transformers_version": "4.15.0.dev0",
|
97 |
+
"use_weighted_layer_sum": false,
|
98 |
+
"vocab_size": 32
|
99 |
+
}
|
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"url": "https://huggingface.co/microsoft/wavlm-large/resolve/main/config.json", "etag": "\"b1d1becf90dd05db908a9114148c204484ebec69\""}
|
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.lock
ADDED
File without changes
|