Spaces:

SalahZa
/

Tunisian-ASR-v0

Build error

App Files Files Community

anonymoussubmitter222 commited on Jan 16, 2023

Commit

e6d3230

1 Parent(s): 14f4c4a

cleaned description

Browse files

Files changed (5) hide show

app.py +10 -5
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/91b480b21090748d217436035a64f3662f2f9366e19725038e7fb069dbe2b8bd.b98702ed4e3c9c63563997dd1dcfc42d05132b32a020cf4da95ffe1227dd6d7a.lock +0 -0
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50 +99 -0
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.json +1 -0
partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50.lock +0 -0

app.py CHANGED Viewed

@@ -311,10 +311,6 @@ class ASR(sb.Brain):
 label_encoder = sb.dataio.encoder.CTCTextEncoder()
-train_data, valid_data, test_datasets, label_encoder = dataio_prepare(
-        hparams
-    )
 # We dynamicaly add the tokenizer to our brain class.
 # NB: This tokenizer corresponds to the one used for the LM!!
@@ -331,7 +327,16 @@ asr_brain = ASR(
     run_opts=run_opts,
     checkpointer=hparams["checkpointer"],
 )
-description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs tunisian transcriptions in arabic language. Since the language is unwritten, the transcriptions may vary. This model is the work of Salah Zaiem, PhD candidate, contact : [email protected]"""
 title = "Tunisian Arabic Automatic Speech Recognition"

 label_encoder = sb.dataio.encoder.CTCTextEncoder()
 # We dynamicaly add the tokenizer to our brain class.
 # NB: This tokenizer corresponds to the one used for the LM!!
     run_opts=run_opts,
     checkpointer=hparams["checkpointer"],
 )
+description = """This is a speechbrain-based Automatic Speech Recognition (ASR) model for Tunisian arabic. It outputs Tunisian transcriptions written in Arabic alphabet. Since the language is unwritten, the words' transcriptions may vary. This model is presented by Salah Zaiem, PhD candidate, contact : [email protected]
+Due to the nature of the available training data, the model may encounter issues when dealing with foreign words. So, and while it is common for Tunisian speakers to use (mainly french) foreign words, these will lead to more errors. We may work on improving this in further models.
+Run is done on CPU to keep it free in this space. This leads to quite long running times on long sequences. If for your project or research, you want to transcribe long sequences, feel free to drop an email here : [email protected]
+"""
 title = "Tunisian Arabic Automatic Speech Recognition"

partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/91b480b21090748d217436035a64f3662f2f9366e19725038e7fb069dbe2b8bd.b98702ed4e3c9c63563997dd1dcfc42d05132b32a020cf4da95ffe1227dd6d7a.lock ADDED Viewed

File without changes

partly_frozen_splitted_wavlm/1986/save/wav2vec2_hubert_checkpoint/e664f369e559be304060cca431d84d2d8617a334d9a87439f7379ef4f5b384ed.0cdc6d92f6604a6716684d93dcde5b2a792e30e53e8cca630e7b91ef143f4a50 ADDED Viewed

	@@ -0,0 +1,99 @@

+{
+  "_name_or_path": "./wavlm-large/",
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "WavLMModel"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_channel_length": 10,
+  "mask_channel_min_space": 1,
+  "mask_channel_other": 0.0,
+  "mask_channel_prob": 0.0,
+  "mask_channel_selection": "static",
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_min_space": 1,
+  "mask_time_other": 0.0,
+  "mask_time_prob": 0.075,
+  "mask_time_selection": "static",
+  "max_bucket_distance": 800,
+  "model_type": "wavlm",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_buckets": 320,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_ctc_classes": 80,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 768,
+  "replace_prob": 0.5,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.15.0.dev0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

	@@ -0,0 +1 @@


1	+ {"url": "https://huggingface.co/microsoft/wavlm-large/resolve/main/config.json", "etag": "\"b1d1becf90dd05db908a9114148c204484ebec69\""}

File without changes