anonymoussubmitter222 commited on
Commit
e6d3230
·
1 Parent(s): 14f4c4a

cleaned description

Browse files
app.py CHANGED
@@ -311,10 +311,6 @@ class ASR(sb.Brain):
311
 
312
  label_encoder = sb.dataio.encoder.CTCTextEncoder()
313
 
314
- train_data, valid_data, test_datasets, label_encoder = dataio_prepare(
315
- hparams
316
- )
317
-
318
 
319
  # We dynamicaly add the tokenizer to our brain class.
320
  # NB: This tokenizer corresponds to the one used for the LM!!
@@ -331,7 +327,16 @@ asr_brain = ASR(
331
  run_opts=run_opts,
332
  checkpointer=hparams["checkpointer"],
333
  )
334
- description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs tunisian transcriptions in arabic language. Since the language is unwritten, the transcriptions may vary. This model is the work of Salah Zaiem, PhD candidate, contact : [email protected]"""
 
 
 
 
 
 
 
 
 
335
  title = "Tunisian Arabic Automatic Speech Recognition"
336
 
337
 
 
311
 
312
  label_encoder = sb.dataio.encoder.CTCTextEncoder()
313
 
 
 
 
 
314
 
315
  # We dynamicaly add the tokenizer to our brain class.
316
  # NB: This tokenizer corresponds to the one used for the LM!!
 
327
  run_opts=run_opts,
328
  checkpointer=hparams["checkpointer"],
329
  )
330
+ description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs Tunisian transcriptions written in Arabic alphabet. Since the language is unwritten, the words' transcriptions may vary. This model is presented by Salah Zaiem, PhD candidate, contact : [email protected]
331
+
332
+
333
+ Due to the nature of the available training data, the model may encounter issues when dealing with foreign words. So, and while it is common for Tunisian speakers to use (mainly french) foreign words, these will lead to more errors. We may work on improving this in further models.
334
+
335
+
336
+ Run is done on CPU to keep it free in this space. This leads to quite long running times on long sequences. If for your project or research, you want to transcribe long sequences, feel free to drop an email here : [email protected]
337
+
338
+
339
+ """
340
  title = "Tunisian Arabic Automatic Speech Recognition"
341
 
342
 
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/91b480b21090748d217436035a64f3662f2f9366e19725038e7fb069dbe2b8bd.b98702ed4e3c9c63563997dd1dcfc42d05132b32a020cf4da95ffe1227dd6d7a.lock ADDED
File without changes
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50 ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./wavlm-large/",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "WavLMModel"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": false,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "sum",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.075,
76
+ "mask_time_selection": "static",
77
+ "max_bucket_distance": 800,
78
+ "model_type": "wavlm",
79
+ "num_adapter_layers": 3,
80
+ "num_attention_heads": 16,
81
+ "num_buckets": 320,
82
+ "num_codevector_groups": 2,
83
+ "num_codevectors_per_group": 320,
84
+ "num_conv_pos_embedding_groups": 16,
85
+ "num_conv_pos_embeddings": 128,
86
+ "num_ctc_classes": 80,
87
+ "num_feat_extract_layers": 7,
88
+ "num_hidden_layers": 24,
89
+ "num_negatives": 100,
90
+ "output_hidden_size": 1024,
91
+ "pad_token_id": 0,
92
+ "proj_codevector_dim": 768,
93
+ "replace_prob": 0.5,
94
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
95
+ "torch_dtype": "float32",
96
+ "transformers_version": "4.15.0.dev0",
97
+ "use_weighted_layer_sum": false,
98
+ "vocab_size": 32
99
+ }
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"url": "https://huggingface.co/microsoft/wavlm-large/resolve/main/config.json", "etag": "\"b1d1becf90dd05db908a9114148c204484ebec69\""}
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.lock ADDED
File without changes