{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "IQxLmB8NW6pf" }, "outputs": [], "source": [ "from transformers import AutoTokenizer\n", "\n", "model_name = \"naver-clova-ix/donut-base\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "57525\n" ] }, { "data": { "text/plain": [ "XLMRobertaTokenizerFast(name_or_path='naver-clova-ix/donut-base', vocab_size=57522, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'sep_token': '', 'pad_token': '', 'cls_token': '', 'mask_token': AddedToken(\"\", rstrip=False, lstrip=True, single_word=False, normalized=True), 'additional_special_tokens': ['', '']}, clean_up_tokenization_spaces=True)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.save_pretrained(\"old_tokenizer\")\n", "\n", "print(len(tokenizer))\n", "tokenizer" ] }, { "cell_type": "markdown", "metadata": { "id": "Q8tn9ryurY2L" }, "source": [ "# Modifying the sentencepiece file\n", "\n", "\n", "Reference: https://blog.ceshine.net/post/trim-down-sentencepiece-vocabulary/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HDKf6E35pQ8F", "outputId": "2f399f62-7796-463a-b0e1-59ec14357d2c" }, "outputs": [ { "data": { "text/plain": [ "57520" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers.convert_slow_tokenizer import import_protobuf\n", "\n", "model_pb2 = import_protobuf()\n", "\n", "m = model_pb2.ModelProto()\n", "m.ParseFromString(open(\"./old_tokenizer/sentencepiece.bpe.model\", 'rb').read())\n", "len(m.pieces)" ] }, { "cell_type": "markdown", "metadata": { "id": "elf0xBimspjR" }, "source": [ "Because m.pieces is a Protocol Buffers field, we can not merely point it to a new list. Instead, we need to use the field’s methods to manipulate its content:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "oXfLQmYwsavB" }, "outputs": [], "source": [ "kept_pieces = []\n", "\n", "\n", "for p in m.pieces:\n", "\n", " # WRITE YOUR OWN RULE FOR WHAT TOKENS TO KEEP\n", " if p.piece.lstrip(\"▁\").isascii():\n", " kept_pieces.append(p)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "i = 0\n", "\n", "kept_tokens = set([x.piece for x in kept_pieces])\n", "\n", "# go backwards from end\n", "# until at start\n", "while i < len(m.pieces):\n", " \n", " idx = len(m.pieces) - i - 1\n", "\n", " if m.pieces[idx].piece not in kept_tokens:\n", " m.pieces.pop(idx)\n", " else:\n", " i += 1\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "27510" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(m.pieces)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The Donut tokenizer doesn't have the \"1\" token\n", "\n", "It has tokens for \" 1\", \"10\", and \"1.1\", but certain scenarios result in the UNK token being used" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3\n" ] }, { "data": { "text/plain": [ "[0, 56881, 3, 2]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(tokenizer.unk_token_id)\n", "\n", "# This results in the token turning into an unknown token (3)\n", "tokenizer(\">1\").input_ids" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 39772, 3, 9447, 3, 54915, 3, 2]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Whenever a character is before the number 1, there is a decent chance the 1 will turn into UNK (id = 3)\n", "tokenizer(\"10.1 )1 a1\").input_ids" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding 1 into the sentencepiece model" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from copy import deepcopy\n", "\n", "# copy the last piece\n", "piece1 = deepcopy(m.pieces[-1])\n", "\n", "# modify the values of the following variables\n", "piece1.piece = \"1\"\n", "piece1.score = -10\n", "\n", "# include it in the models list of pieces\n", "m.pieces.extend([piece1])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "OrQk2mvZKWg-" }, "outputs": [], "source": [ "# create temporary sentencepiece file\n", "\n", "with open(\"temp_sentencepiece.bpe.model\", 'wb') as f:\n", " f.write(m.SerializeToString())" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from transformers import XLMRobertaTokenizer\n", "\n", "new_tokenizer = XLMRobertaTokenizer(vocab_file=\"temp_sentencepiece.bpe.model\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(27513, 57525)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(new_tokenizer), len(tokenizer)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('donut-base-ascii/tokenizer_config.json',\n", " 'donut-base-ascii/special_tokens_map.json',\n", " 'donut-base-ascii/sentencepiece.bpe.model',\n", " 'donut-base-ascii/added_tokens.json')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# the special tokens are in the model, but due to a quirk, they need to be added again\n", "\n", "new_tokenizer.add_special_tokens(new_tokenizer.special_tokens_map)\n", "\n", "new_tokenizer.save_pretrained('donut-base-ascii')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(27513, 57525)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(new_tokenizer), len(tokenizer)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# reload to get all features\n", "\n", "new_tokenizer = AutoTokenizer.from_pretrained(\"donut-base-ascii\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "old_mapping = tokenizer.vocab\n", "\n", "new_mapping = new_tokenizer.vocab\n", "\n", "sorted_new_mapping = sorted(new_mapping.items(), key=lambda x: x[1])# sort by id, ascending\n", "\n", "# `embed_indexes` will have the old index value stored at the new index\n", "# e.g. embed_indexes[i] = j means the new embedding id at i has the same value\n", "# as the old embedding id of j\n", "embed_indexes = [old_mapping[tok] for tok, _ in sorted_new_mapping[:-2]]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('1', 27511), ('', 27512)]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# embed_indexes ignores the last two because\n", "# the second to last one is brand new.\n", "\n", "# these two embeddings will get added later\n", "sorted_new_mapping[-2:]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([27511, 1024])\n" ] } ], "source": [ "from transformers import VisionEncoderDecoderModel\n", "\n", "model_name = \"naver-clova-ix/donut-base\"\n", "model = VisionEncoderDecoderModel.from_pretrained(model_name)\n", "\n", "old_embeds = model.decoder.model.decoder.embed_tokens.weight.data\n", "old_embeds\n", "\n", "new_embeds = old_embeds[embed_indexes, :].clone()\n", "\n", "print(new_embeds.shape)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1024])\n", "torch.Size([1024])\n" ] }, { "data": { "text/plain": [ "torch.Size([27513, 1024])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "\n", "# setting the embedding for the new token to be the same as \" 1\"\n", "# during training, they will differentiate\n", "embed_1 = old_embeds[old_mapping[\"▁1\"]].clone()\n", "print(embed_1.shape)\n", "\n", "embed_mask = old_embeds[old_mapping[\"\"]].clone()\n", "print(embed_mask.shape)\n", "\n", "new_embeds = torch.vstack([new_embeds, embed_1.unsqueeze(0), embed_mask.unsqueeze(0)])\n", "\n", "new_embeds.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Put embeddings back into model" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "model.decoder.model.decoder.embed_tokens.weight.data = new_embeds\n", "\n", "model.decoder.config.update({\n", " \"vocab_size\": new_embeds.shape[0]\n", "})\n", "\n", "model.save_pretrained(\"donut-base-ascii\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Making sure the embeddings are correct" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0, 37199, 35816, 34554, 2]\n", "[0, 14026, 13045, 12147, 2]\n" ] } ], "source": [ "old_ids = tokenizer(\"hello there\").input_ids\n", "print(old_ids)\n", "\n", "new_ids = new_tokenizer(\"hello there\").input_ids\n", "print(new_ids)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(True)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "\n", "old_embeddings = torch.stack([old_embeds[i] for i in old_ids])\n", "new_embeddings = torch.stack([new_embeds[i] for i in new_ids])\n", "\n", "torch.all(torch.eq(old_embeddings, new_embeddings))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add image processor so that all files are together" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n" ] }, { "data": { "text/plain": [ "['donut-base-ascii/preprocessor_config.json']" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoImageProcessor\n", "\n", "proc = AutoImageProcessor.from_pretrained(model_name)\n", "proc.save_pretrained(\"donut-base-ascii\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check that the new token for 1 works\n", "\n", "\n", "unk_token_id = 3, so that shouldn't be present! Instead it should have 27511, the new token for \"1\"" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 15793, 27511, 4056, 27511, 26020, 27511, 2]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_tokenizer(\"10.1 )1 a1\").input_ids" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "0199dce34d0b4101ab2da9cd761f17ea": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9ac895ad7a3d4af4b75076fc1e8433ac", "placeholder": "​", "style": "IPY_MODEL_f97ecaf1a1af41029bb2d79334e83b3d", "value": " 4.74k/4.74k [00:00<00:00, 230kB/s]" } }, "01d989190ff34c499ef4eb023a982a13": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3dcafb3def654095a3a02644eb1b79b6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "493c720b2da54790a6b8e3ec0ee44f8d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4aa596c990844b06b1081f373235cbe9": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_493c720b2da54790a6b8e3ec0ee44f8d", "max": 4742, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3dcafb3def654095a3a02644eb1b79b6", "value": 4742 } }, "66b76b6edce045b480ebed513ba1ab6e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6b432819bf504227a04a10a749a848e9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6e87b5a1db834af09c4507881ce12fd8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "70ec1c59c5c34b448d91ad895137b7c0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "77506fb9c6404b74a5f8d82fa323a275": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9f316334ef9440a979c332a0ae8e7cd", "placeholder": "​", "style": "IPY_MODEL_01d989190ff34c499ef4eb023a982a13", "value": "Downloading pytorch_model.bin: 100%" } }, "78760960021e43ce85e63f08c55b821d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_8dd9d54bf1a3499c9d075d7dd34e8f3a", "IPY_MODEL_4aa596c990844b06b1081f373235cbe9", "IPY_MODEL_0199dce34d0b4101ab2da9cd761f17ea" ], "layout": "IPY_MODEL_66b76b6edce045b480ebed513ba1ab6e" } }, "80ce735e6b314dcb92fab111b26a43d6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "824c138272c94f4086f9035f97b082c3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cd5a87bcb60a44e194b3db834f200061", "max": 809168699, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_70ec1c59c5c34b448d91ad895137b7c0", "value": 809168699 } }, "8dd9d54bf1a3499c9d075d7dd34e8f3a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f98d3c18e9454ed2be3197330e5d84b1", "placeholder": "​", "style": "IPY_MODEL_6e87b5a1db834af09c4507881ce12fd8", "value": "Downloading (…)lve/main/config.json: 100%" } }, "9ac895ad7a3d4af4b75076fc1e8433ac": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a9f316334ef9440a979c332a0ae8e7cd": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c35a984617774c8fbde92917bcae872e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_77506fb9c6404b74a5f8d82fa323a275", "IPY_MODEL_824c138272c94f4086f9035f97b082c3", "IPY_MODEL_e0dedfb1d27d4b1aa4c090477d985257" ], "layout": "IPY_MODEL_6b432819bf504227a04a10a749a848e9" } }, "c53794a14c3049d193260cffca0a6aaa": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "cd5a87bcb60a44e194b3db834f200061": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e0dedfb1d27d4b1aa4c090477d985257": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_80ce735e6b314dcb92fab111b26a43d6", "placeholder": "​", "style": "IPY_MODEL_c53794a14c3049d193260cffca0a6aaa", "value": " 809M/809M [00:07<00:00, 116MB/s]" } }, "f97ecaf1a1af41029bb2d79334e83b3d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f98d3c18e9454ed2be3197330e5d84b1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } } } }, "nbformat": 4, "nbformat_minor": 0 }