"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import IPython.display as ipd\n",
+ "import numpy as np\n",
+ "import random\n",
+ "\n",
+ "rand_int = random.randint(0, len(common_voice_train)-1)\n",
+ "\n",
+ "print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n",
+ "print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n",
+ "print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])\n",
+ "ipd.Audio(data=common_voice_train[rand_int][\"audio\"][\"array\"], autoplay=True, rate=16000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "0a51643b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This does not prepare the input for the Transformer model.\n",
+ "# This will resample the data and convert the sentence into indices\n",
+ "# Batch here is just for one entry (row)\n",
+ "def prepare_dataset(batch):\n",
+ " audio = batch[\"audio\"]\n",
+ " \n",
+ " # batched output is \"un-batched\"\n",
+ " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n",
+ " batch[\"input_length\"] = len(batch[\"input_values\"])\n",
+ " \n",
+ " with processor.as_target_processor():\n",
+ " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "e6a3137b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "common_voice_train = common_voice_train.map(prepare_dataset, remove_columns=common_voice_train.column_names, num_proc=16)\n",
+ "common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names, num_proc=16)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "ffe2e1e6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# In case the dataset is too long which can lead to OOM. We should filter them out.\n",
+ "# max_input_length_in_sec = 5.0\n",
+ "# common_voice_train = common_voice_train.filter(lambda x: x < max_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=[\"input_length\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "a1aec6d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "\n",
+ "from dataclasses import dataclass, field\n",
+ "from typing import Any, Dict, List, Optional, Union\n",
+ "\n",
+ "@dataclass\n",
+ "class DataCollatorCTCWithPadding:\n",
+ " \"\"\"\n",
+ " Data collator that will dynamically pad the inputs received.\n",
+ " Args:\n",
+ " processor (:class:`~transformers.Wav2Vec2Processor`)\n",
+ " The processor used for proccessing the data.\n",
+ " padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n",
+ " Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n",
+ " among:\n",
+ " * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n",
+ " sequence if provided).\n",
+ " * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n",
+ " maximum acceptable input length for the model if that argument is not provided.\n",
+ " * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n",
+ " different lengths).\n",
+ " \"\"\"\n",
+ "\n",
+ " processor: Wav2Vec2Processor\n",
+ " padding: Union[bool, str] = True\n",
+ "\n",
+ " def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
+ " # split inputs and labels since they have to be of different lenghts and need\n",
+ " # different padding methods\n",
+ " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n",
+ " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
+ "\n",
+ " batch = self.processor.pad(\n",
+ " input_features,\n",
+ " padding=self.padding,\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ "\n",
+ " with self.processor.as_target_processor():\n",
+ " labels_batch = self.processor.pad(\n",
+ " label_features,\n",
+ " padding=self.padding,\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ "\n",
+ " # replace padding with -100 to ignore loss correctly\n",
+ " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
+ "\n",
+ " batch[\"labels\"] = labels\n",
+ "\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "1f73c038",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "322220b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wer_metric = load_metric(\"wer\")\n",
+ "# cer_metric = load_metric(\"cer\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "19d15ad2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def compute_metrics(pred):\n",
+ " pred_logits = pred.predictions\n",
+ " pred_ids = np.argmax(pred_logits, axis=-1)\n",
+ "\n",
+ " pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id\n",
+ "\n",
+ " pred_str = tokenizer.batch_decode(pred_ids)\n",
+ " label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)\n",
+ "\n",
+ " print(\"pred : \", pred_ids[0])\n",
+ " print(\"label: \", pred.label_ids[0])\n",
+ " print(\"-----------------\")\n",
+ " \n",
+ " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n",
+ "\n",
+ " return {\"wer\": wer}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "fa0dd3e4",
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.1,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"mean\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.0,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.1,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.0,\n",
+ " \"mask_feature_length\": 64,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.25,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.75,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 72,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.17.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 75,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n",
+ "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['project_q.bias', 'project_hid.weight', 'project_hid.bias', 'quantizer.weight_proj.bias', 'quantizer.weight_proj.weight', 'project_q.weight', 'quantizer.codevectors']\n",
+ "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import Wav2Vec2ForCTC\n",
+ "\n",
+ "model = Wav2Vec2ForCTC.from_pretrained(\n",
+ " \"facebook/wav2vec2-xls-r-300m\", \n",
+ " attention_dropout=0.1,\n",
+ " layerdrop=0.0,\n",
+ " feat_proj_dropout=0.0,\n",
+ " mask_time_prob=0.75, \n",
+ " mask_time_length=10,\n",
+ " mask_feature_prob=0.25,\n",
+ " mask_feature_length=64,\n",
+ " ctc_loss_reduction=\"mean\",\n",
+ " pad_token_id=processor.tokenizer.pad_token_id,\n",
+ " vocab_size=len(processor.tokenizer)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "6b01e8e0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model.freeze_feature_encoder()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "3fe63254",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "PyTorch: setting up devices\n",
+ "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import TrainingArguments\n",
+ "\n",
+ "training_args = TrainingArguments(\n",
+ " output_dir='.',\n",
+ " group_by_length=True,\n",
+ " per_device_train_batch_size=8,\n",
+ " gradient_accumulation_steps=4,\n",
+ " evaluation_strategy=\"steps\",\n",
+ " gradient_checkpointing=True,\n",
+ " fp16=True,\n",
+ " num_train_epochs=50,\n",
+ " save_steps=400,\n",
+ " eval_steps=400,\n",
+ " logging_steps=100,\n",
+ " learning_rate=5e-5,\n",
+ " warmup_steps=1000,\n",
+ " save_total_limit=3,\n",
+ " load_best_model_at_end=True\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "745522a2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using amp half precision backend\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import Trainer\n",
+ "\n",
+ "trainer = Trainer(\n",
+ " model=model,\n",
+ " data_collator=data_collator,\n",
+ " args=training_args,\n",
+ " compute_metrics=compute_metrics,\n",
+ " train_dataset=common_voice_train,\n",
+ " eval_dataset=common_voice_test,\n",
+ " tokenizer=processor.feature_extractor,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "a2a214a5",
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " warnings.warn(\n",
+ "***** Running training *****\n",
+ " Num examples = 2615\n",
+ " Num Epochs = 50\n",
+ " Instantaneous batch size per device = 8\n",
+ " Total train batch size (w. parallel, distributed & accumulation) = 32\n",
+ " Gradient Accumulation steps = 4\n",
+ " Total optimization steps = 4050\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [4050/4050 2:16:04, Epoch 49/50]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ " Validation Loss | \n",
+ " Wer | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 400 | \n",
+ " 5.204900 | \n",
+ " 4.556981 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 800 | \n",
+ " 3.569000 | \n",
+ " 3.541533 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 1200 | \n",
+ " 3.483000 | \n",
+ " 3.395552 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 1600 | \n",
+ " 2.190600 | \n",
+ " 1.173165 | \n",
+ " 0.789678 | \n",
+ "
\n",
+ " \n",
+ " 2000 | \n",
+ " 1.796800 | \n",
+ " 0.763436 | \n",
+ " 0.667831 | \n",
+ "
\n",
+ " \n",
+ " 2400 | \n",
+ " 1.615000 | \n",
+ " 0.618224 | \n",
+ " 0.592161 | \n",
+ "
\n",
+ " \n",
+ " 2800 | \n",
+ " 1.520000 | \n",
+ " 0.547277 | \n",
+ " 0.547924 | \n",
+ "
\n",
+ " \n",
+ " 3200 | \n",
+ " 1.469600 | \n",
+ " 0.500246 | \n",
+ " 0.513000 | \n",
+ "
\n",
+ " \n",
+ " 3600 | \n",
+ " 1.417500 | \n",
+ " 0.475214 | \n",
+ " 0.502134 | \n",
+ "
\n",
+ " \n",
+ " 4000 | \n",
+ " 1.394300 | \n",
+ " 0.463765 | \n",
+ " 0.494373 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-400\n",
+ "Configuration saved in ./checkpoint-400/config.json\n",
+ "Model weights saved in ./checkpoint-400/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-400/preprocessor_config.json\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-800\n",
+ "Configuration saved in ./checkpoint-800/config.json\n",
+ "Model weights saved in ./checkpoint-800/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-800/preprocessor_config.json\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [ 1 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-1200\n",
+ "Configuration saved in ./checkpoint-1200/config.json\n",
+ "Model weights saved in ./checkpoint-1200/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-1200/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-500] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 45 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 11 43 72 72 72 72 6 26 0 72 25 72 72 72 72\n",
+ " 72 72 72 18 49 72 72 72 72 72 0 72 20 58 72 72 0 0 72 72 72 23 54 72\n",
+ " 72 72 0 72 11 55 72 72 28 0 0 72 72 21 70 70 27 43 72 72 72 72 72 72\n",
+ " 0 0 0 33 72 72 72 72 26 26 72 72 11 48 72 72 72 21 21 64 0 72 72 30\n",
+ " 72 72 72 72 72 72 59 72 72 72 23 54 72 72 72 27 27 72 72 72 72 72 1 72\n",
+ " 72 0 0 72 72 72 72 72 3 70 27 27 50 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 5 70 72 31 31 43 72 72 72 72 72 72 72 72 27 27 72 72 72 72 72 25\n",
+ " 72 0 0 72 26 72 72 72 72 72 72 72 1 0 72 72 18 58 72 0 0 0 33 72\n",
+ " 72 72 72 72 72 72 26 26 0 0 72 72 25 25 49 72 72 72 72 72 72 72 72 26\n",
+ " 0 0 72 20 58 72 72 72 72 0 0 21 25 72 70 70 70 72 11 72 72 72 72 72\n",
+ " 72 59 72 72 72 72 72 29 72 72 72 72 72 70 70 16 0 0 30 30 72 72 72 25\n",
+ " 70 70 72 27 48 72 72 72 5 5 0 33 72 72 20 70 70 70 70 11 55 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-1600\n",
+ "Configuration saved in ./checkpoint-1600/config.json\n",
+ "Model weights saved in ./checkpoint-1600/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-1600/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 63 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 45 72 72 72 72 72 11 43 72 72 72 72 6 72 0 72 25 72 72 72 72\n",
+ " 72 72 72 16 72 72 72 72 72 72 0 72 20 58 72 72 0 0 72 72 72 23 54 72\n",
+ " 72 27 0 72 11 55 72 72 28 0 0 72 72 21 70 72 27 51 72 72 72 72 72 72\n",
+ " 0 0 0 33 70 72 72 72 26 0 0 72 72 11 72 72 72 21 21 64 0 72 72 30\n",
+ " 30 72 72 72 72 72 59 72 72 72 23 54 72 72 72 27 27 72 72 72 72 1 72 72\n",
+ " 72 72 72 72 72 72 72 72 3 70 72 27 50 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 70 70 70 31 31 43 72 72 72 72 72 72 72 72 27 27 44 72 72 72 72 25\n",
+ " 72 0 0 72 26 72 72 72 72 72 72 1 1 0 72 72 18 58 72 0 0 0 33 70\n",
+ " 70 72 72 72 72 72 26 72 0 0 72 72 72 25 50 72 72 72 72 72 72 72 26 26\n",
+ " 0 0 72 20 58 72 72 72 72 0 0 72 25 72 72 70 70 72 11 72 72 72 72 72\n",
+ " 72 59 72 0 0 72 72 29 29 16 72 72 72 70 16 16 0 0 72 30 72 72 72 25\n",
+ " 70 70 72 27 72 72 72 72 5 5 0 33 72 72 15 70 70 70 72 11 55 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-2000\n",
+ "Configuration saved in ./checkpoint-2000/config.json\n",
+ "Model weights saved in ./checkpoint-2000/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-2000/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 63 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 45 72 72 72 72 72 72 11 43 72 72 72 72 6 6 0 72 25 62 72 72 72\n",
+ " 72 72 72 16 49 72 72 72 72 72 0 72 20 58 72 72 0 0 72 72 23 54 72 72\n",
+ " 72 28 0 11 55 72 72 72 28 0 0 72 72 21 70 70 27 51 72 72 72 72 72 72\n",
+ " 0 0 0 33 70 72 72 72 26 0 0 72 11 72 72 72 72 21 21 64 0 72 72 30\n",
+ " 72 72 72 72 72 72 59 72 72 72 23 54 72 72 72 27 27 72 72 72 72 1 72 72\n",
+ " 0 0 72 72 72 72 72 72 3 70 72 27 52 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 70 70 72 31 43 72 72 72 72 72 72 72 72 72 27 44 44 72 72 72 25 25\n",
+ " 72 0 0 72 26 26 72 72 72 72 72 1 1 0 72 18 58 72 72 0 0 72 33 70\n",
+ " 72 72 72 72 72 72 26 72 0 0 72 72 72 25 49 72 72 72 72 72 72 72 26 26\n",
+ " 0 0 72 20 58 72 72 72 72 72 0 72 25 72 70 70 72 11 48 72 72 72 72 72\n",
+ " 59 59 72 0 0 72 72 29 16 16 72 72 70 70 16 72 0 0 30 30 72 72 72 25\n",
+ " 70 70 72 27 72 72 72 72 5 72 0 33 72 72 15 70 70 72 11 55 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-2400\n",
+ "Configuration saved in ./checkpoint-2400/config.json\n",
+ "Model weights saved in ./checkpoint-2400/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-2400/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-1200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 63 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 11 54 72 72 72 72 6 72 0 72 25 62 49 72 72\n",
+ " 72 72 72 16 49 72 72 72 72 72 0 72 20 58 72 72 0 0 72 72 23 54 72 72\n",
+ " 72 28 0 72 11 55 72 72 28 0 0 72 72 21 70 70 27 51 72 72 72 72 72 72\n",
+ " 0 0 0 33 70 72 72 72 26 0 0 72 11 48 72 72 72 21 21 64 0 72 72 30\n",
+ " 72 72 72 72 72 25 59 72 72 72 23 54 72 72 72 27 27 72 72 72 72 72 72 72\n",
+ " 0 0 72 72 72 72 72 72 3 70 72 27 52 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 70 70 72 31 43 72 72 72 72 72 72 72 72 72 27 44 72 72 72 72 25 25\n",
+ " 72 0 0 72 26 72 72 72 72 72 72 1 1 0 72 18 58 72 72 0 0 0 33 70\n",
+ " 72 72 72 72 72 72 26 72 0 0 72 72 72 25 50 72 72 72 72 72 72 72 26 26\n",
+ " 0 0 72 20 58 72 72 72 72 72 0 72 25 72 70 70 70 72 11 48 72 72 72 72\n",
+ " 72 59 72 0 0 72 72 29 16 72 72 72 70 70 16 72 0 0 30 30 72 72 72 25\n",
+ " 70 72 72 27 72 72 72 72 5 72 0 33 72 72 15 70 70 72 11 55 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-2800\n",
+ "Configuration saved in ./checkpoint-2800/config.json\n",
+ "Model weights saved in ./checkpoint-2800/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-2800/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-1600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 63 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 11 43 72 72 72 72 6 72 0 72 25 62 62 49 72\n",
+ " 72 72 72 16 49 72 72 72 72 0 0 72 20 58 72 72 0 0 72 72 23 54 18 72\n",
+ " 72 28 0 72 11 55 72 28 28 0 0 72 72 21 70 72 27 51 72 72 72 72 72 72\n",
+ " 0 0 0 33 70 72 72 72 26 0 0 72 11 48 72 72 72 21 64 64 0 72 72 30\n",
+ " 72 72 72 72 72 25 72 72 72 72 23 54 72 72 72 27 27 72 72 72 72 1 72 72\n",
+ " 0 0 72 72 72 72 72 72 3 70 27 52 72 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 70 70 72 31 43 72 72 72 72 72 72 72 72 72 27 44 72 72 72 72 25 72\n",
+ " 72 0 0 72 26 26 72 72 72 72 72 1 72 0 72 18 58 72 72 0 0 0 33 70\n",
+ " 72 72 72 72 72 72 26 72 0 0 72 72 72 25 50 72 72 72 72 72 72 72 26 72\n",
+ " 0 0 72 20 58 72 72 72 72 0 0 72 25 72 72 70 70 72 11 48 72 72 72 72\n",
+ " 59 72 72 0 0 72 72 29 16 72 72 72 70 70 16 72 0 0 72 30 72 72 72 25\n",
+ " 70 72 27 27 72 72 72 72 5 72 0 33 72 72 15 70 70 72 11 55 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-3200\n",
+ "Configuration saved in ./checkpoint-3200/config.json\n",
+ "Model weights saved in ./checkpoint-3200/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-3200/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-2000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 63 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 11 44 72 72 72 72 6 72 0 72 25 62 49 49 72\n",
+ " 72 72 72 16 49 72 72 72 72 72 0 72 20 58 72 72 0 0 72 72 23 54 72 72\n",
+ " 72 28 0 72 11 55 72 72 28 0 0 72 72 21 70 70 27 51 72 72 72 72 72 72\n",
+ " 0 0 0 42 70 72 72 72 26 0 0 72 11 48 72 72 72 21 64 64 0 72 72 30\n",
+ " 72 72 72 72 72 25 72 72 72 72 23 54 72 72 72 27 27 72 72 72 72 72 72 72\n",
+ " 0 0 72 72 72 72 72 72 3 70 72 27 52 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 70 70 72 31 43 72 72 72 72 72 72 72 72 72 27 44 72 72 72 72 25 72\n",
+ " 72 0 0 72 72 26 72 72 72 72 72 1 72 0 72 18 58 72 72 0 0 0 33 70\n",
+ " 72 72 72 72 72 72 26 72 0 0 72 72 72 25 50 72 72 72 72 72 72 72 26 26\n",
+ " 0 0 72 20 58 72 72 72 72 72 0 72 25 72 72 70 70 72 11 48 72 72 72 72\n",
+ " 72 59 72 72 0 72 72 29 16 72 72 72 72 70 16 72 0 0 72 30 72 72 72 25\n",
+ " 70 72 72 27 72 72 72 72 5 72 0 33 72 72 15 70 70 72 11 55 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-3600\n",
+ "Configuration saved in ./checkpoint-3600/config.json\n",
+ "Model weights saved in ./checkpoint-3600/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-3600/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-2400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 291\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred : [30 63 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 11 43 72 72 72 72 6 72 0 72 25 62 62 49 72\n",
+ " 72 72 72 16 49 72 72 72 72 72 0 72 20 58 72 72 0 0 72 72 23 54 18 72\n",
+ " 72 28 0 0 11 55 72 72 28 0 0 72 72 21 70 70 27 51 72 72 72 72 72 72\n",
+ " 0 0 0 42 70 72 72 72 26 0 0 72 11 48 72 72 72 21 21 64 0 72 72 30\n",
+ " 30 72 72 72 72 25 72 72 72 72 23 54 72 72 72 27 27 72 72 72 72 72 72 72\n",
+ " 0 0 72 72 72 72 72 72 3 70 72 27 52 72 72 72 5 0 0 72 30 30 44 72\n",
+ " 72 5 70 70 72 31 43 72 72 72 72 72 72 72 72 72 27 46 72 72 72 72 25 72\n",
+ " 72 0 0 72 72 26 72 72 72 72 72 1 72 0 72 18 58 72 72 0 0 0 33 70\n",
+ " 72 72 72 72 72 72 26 72 0 0 72 72 72 25 50 72 72 72 72 72 72 72 26 26\n",
+ " 0 0 72 20 58 72 72 72 72 72 0 72 25 72 70 70 70 72 11 48 72 72 72 72\n",
+ " 72 59 72 72 0 72 72 29 16 72 72 72 70 70 16 72 0 0 72 30 72 72 72 25\n",
+ " 72 72 72 27 72 72 72 72 5 72 0 33 72 72 15 70 70 72 72 12 55 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 16 16 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0]\n",
+ "label: [30 63 45 0 11 43 6 64 0 25 62 49 16 49 0 20 58 0 23 54 28 0 11 55\n",
+ " 28 0 21 70 27 51 0 42 70 26 0 13 48 21 0 30 25 70 24 43 27 61 0 3\n",
+ " 70 27 52 5 0 30 5 70 31 43 27 46 25 0 26 1 0 18 58 0 42 70 26 0\n",
+ " 25 62 49 26 0 20 58 0 25 70 11 48 59 0 29 16 70 11 0 30 59 27 57 5\n",
+ " 33 15 70 11 55 16 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72\n",
+ " 72 72 72 72 72 72 72]\n",
+ "-----------------\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to ./checkpoint-4000\n",
+ "Configuration saved in ./checkpoint-4000/config.json\n",
+ "Model weights saved in ./checkpoint-4000/pytorch_model.bin\n",
+ "Configuration saved in ./checkpoint-4000/preprocessor_config.json\n",
+ "Deleting older checkpoint [checkpoint-2800] due to args.save_total_limit\n",
+ "\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "Loading best model from ./checkpoint-4000 (score: 0.46376487612724304).\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=4050, training_loss=2.89372775796019, metrics={'train_runtime': 8168.6927, 'train_samples_per_second': 16.006, 'train_steps_per_second': 0.496, 'total_flos': 1.9735608328149316e+19, 'train_loss': 2.89372775796019, 'epoch': 49.99})"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "6cc9f33d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "tokenizer config file saved in vitouphy/xls-r-300m-km/tokenizer_config.json\n",
+ "Special tokens file saved in vitouphy/xls-r-300m-km/special_tokens_map.json\n",
+ "added tokens file saved in vitouphy/xls-r-300m-km/added_tokens.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "tokenizer.push_to_hub('vitouphy/xls-r-300m-km')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "72f2e951",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kwargs = {\n",
+ " \"finetuned_from\": \"facebook/wav2vec2-xls-r-300m\",\n",
+ " \"tasks\": \"speech-recognition\",\n",
+ " \"tags\": [\"automatic-speech-recognition\", \"openslr\", \"robust-speech-event\", \"km\"],\n",
+ " \"dataset_args\": f\"Config: km, Training split: train+validation, Eval split: test\",\n",
+ " \"dataset\": \"openslr\",\n",
+ " \"language\": \"km\"\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "78d7353f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Dropping the following result as it does not have all the necessary fields:\n",
+ "{}\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer.create_model_card(**kwargs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "d9bb5fa1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Configuration saved in vitouphy/xls-r-300m-km/config.json\n",
+ "Model weights saved in vitouphy/xls-r-300m-km/pytorch_model.bin\n",
+ "Several commits (2) will be pushed upstream.\n",
+ "The progress bars may be unreliable.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "65189fc5b517439b87208f0898179afd",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Upload file pytorch_model.bin: 0%| | 3.39k/1.18G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "To https://huggingface.co/vitouphy/xls-r-300m-km\n",
+ " e25c362..dff1f30 main -> main\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'https://huggingface.co/vitouphy/xls-r-300m-km/commit/dff1f3008b5c2afbbbcab722e17fded4bf8f782b'"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.push_to_hub('vitouphy/xls-r-300m-km')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "e56874c6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to .\n",
+ "Configuration saved in ./config.json\n",
+ "Model weights saved in ./pytorch_model.bin\n",
+ "Configuration saved in ./preprocessor_config.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer.save_model()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}