{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "IQxLmB8NW6pf"
},
"outputs": [],
"source": [
"from transformers import AutoTokenizer\n",
"\n",
"model_name = \"naver-clova-ix/donut-base\"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"57525\n"
]
},
{
"data": {
"text/plain": [
"XLMRobertaTokenizerFast(name_or_path='naver-clova-ix/donut-base', vocab_size=57522, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'sep_token': '', 'pad_token': '', 'cls_token': '', 'mask_token': AddedToken(\"\", rstrip=False, lstrip=True, single_word=False, normalized=True), 'additional_special_tokens': ['', '']}, clean_up_tokenization_spaces=True)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tokenizer.save_pretrained(\"old_tokenizer\")\n",
"\n",
"print(len(tokenizer))\n",
"tokenizer"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Q8tn9ryurY2L"
},
"source": [
"# Modifying the sentencepiece file\n",
"\n",
"\n",
"Reference: https://blog.ceshine.net/post/trim-down-sentencepiece-vocabulary/"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HDKf6E35pQ8F",
"outputId": "2f399f62-7796-463a-b0e1-59ec14357d2c"
},
"outputs": [
{
"data": {
"text/plain": [
"57520"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers.convert_slow_tokenizer import import_protobuf\n",
"\n",
"model_pb2 = import_protobuf()\n",
"\n",
"m = model_pb2.ModelProto()\n",
"m.ParseFromString(open(\"./old_tokenizer/sentencepiece.bpe.model\", 'rb').read())\n",
"len(m.pieces)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "elf0xBimspjR"
},
"source": [
"Because m.pieces is a Protocol Buffers field, we can not merely point it to a new list. Instead, we need to use the field’s methods to manipulate its content:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "oXfLQmYwsavB"
},
"outputs": [],
"source": [
"kept_pieces = []\n",
"\n",
"\n",
"for p in m.pieces:\n",
"\n",
" # WRITE YOUR OWN RULE FOR WHAT TOKENS TO KEEP\n",
" if p.piece.lstrip(\"▁\").isascii():\n",
" kept_pieces.append(p)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"i = 0\n",
"\n",
"kept_tokens = set([x.piece for x in kept_pieces])\n",
"\n",
"# go backwards from end\n",
"# until at start\n",
"while i < len(m.pieces):\n",
" \n",
" idx = len(m.pieces) - i - 1\n",
"\n",
" if m.pieces[idx].piece not in kept_tokens:\n",
" m.pieces.pop(idx)\n",
" else:\n",
" i += 1\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"27510"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(m.pieces)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# The Donut tokenizer doesn't have the \"1\" token\n",
"\n",
"It has tokens for \" 1\", \"10\", and \"1.1\", but certain scenarios result in the UNK token being used"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n"
]
},
{
"data": {
"text/plain": [
"[0, 56881, 3, 2]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(tokenizer.unk_token_id)\n",
"\n",
"# This results in the token turning into an unknown token (3)\n",
"tokenizer(\">1\").input_ids"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 39772, 3, 9447, 3, 54915, 3, 2]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Whenever a character is before the number 1, there is a decent chance the 1 will turn into UNK (id = 3)\n",
"tokenizer(\"10.1 )1 a1\").input_ids"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding 1 into the sentencepiece model"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from copy import deepcopy\n",
"\n",
"# copy the last piece\n",
"piece1 = deepcopy(m.pieces[-1])\n",
"\n",
"# modify the values of the following variables\n",
"piece1.piece = \"1\"\n",
"piece1.score = -10\n",
"\n",
"# include it in the models list of pieces\n",
"m.pieces.extend([piece1])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "OrQk2mvZKWg-"
},
"outputs": [],
"source": [
"# create temporary sentencepiece file\n",
"\n",
"with open(\"temp_sentencepiece.bpe.model\", 'wb') as f:\n",
" f.write(m.SerializeToString())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from transformers import XLMRobertaTokenizer\n",
"\n",
"new_tokenizer = XLMRobertaTokenizer(vocab_file=\"temp_sentencepiece.bpe.model\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(27513, 57525)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(new_tokenizer), len(tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('donut-base-ascii/tokenizer_config.json',\n",
" 'donut-base-ascii/special_tokens_map.json',\n",
" 'donut-base-ascii/sentencepiece.bpe.model',\n",
" 'donut-base-ascii/added_tokens.json')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# the special tokens are in the model, but due to a quirk, they need to be added again\n",
"\n",
"new_tokenizer.add_special_tokens(new_tokenizer.special_tokens_map)\n",
"\n",
"new_tokenizer.save_pretrained('donut-base-ascii')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(27513, 57525)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(new_tokenizer), len(tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# reload to get all features\n",
"\n",
"new_tokenizer = AutoTokenizer.from_pretrained(\"donut-base-ascii\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"old_mapping = tokenizer.vocab\n",
"\n",
"new_mapping = new_tokenizer.vocab\n",
"\n",
"sorted_new_mapping = sorted(new_mapping.items(), key=lambda x: x[1])# sort by id, ascending\n",
"\n",
"# `embed_indexes` will have the old index value stored at the new index\n",
"# e.g. embed_indexes[i] = j means the new embedding id at i has the same value\n",
"# as the old embedding id of j\n",
"embed_indexes = [old_mapping[tok] for tok, _ in sorted_new_mapping[:-2]]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('1', 27511), ('', 27512)]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# embed_indexes ignores the last two because\n",
"# the second to last one is brand new.\n",
"\n",
"# these two embeddings will get added later\n",
"sorted_new_mapping[-2:]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([27511, 1024])\n"
]
}
],
"source": [
"from transformers import VisionEncoderDecoderModel\n",
"\n",
"model_name = \"naver-clova-ix/donut-base\"\n",
"model = VisionEncoderDecoderModel.from_pretrained(model_name)\n",
"\n",
"old_embeds = model.decoder.model.decoder.embed_tokens.weight.data\n",
"old_embeds\n",
"\n",
"new_embeds = old_embeds[embed_indexes, :].clone()\n",
"\n",
"print(new_embeds.shape)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1024])\n",
"torch.Size([1024])\n"
]
},
{
"data": {
"text/plain": [
"torch.Size([27513, 1024])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"\n",
"# setting the embedding for the new token to be the same as \" 1\"\n",
"# during training, they will differentiate\n",
"embed_1 = old_embeds[old_mapping[\"▁1\"]].clone()\n",
"print(embed_1.shape)\n",
"\n",
"embed_mask = old_embeds[old_mapping[\"\"]].clone()\n",
"print(embed_mask.shape)\n",
"\n",
"new_embeds = torch.vstack([new_embeds, embed_1.unsqueeze(0), embed_mask.unsqueeze(0)])\n",
"\n",
"new_embeds.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Put embeddings back into model"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"model.decoder.model.decoder.embed_tokens.weight.data = new_embeds\n",
"\n",
"model.decoder.config.update({\n",
" \"vocab_size\": new_embeds.shape[0]\n",
"})\n",
"\n",
"model.save_pretrained(\"donut-base-ascii\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Making sure the embeddings are correct"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 37199, 35816, 34554, 2]\n",
"[0, 14026, 13045, 12147, 2]\n"
]
}
],
"source": [
"old_ids = tokenizer(\"hello there\").input_ids\n",
"print(old_ids)\n",
"\n",
"new_ids = new_tokenizer(\"hello there\").input_ids\n",
"print(new_ids)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(True)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"\n",
"old_embeddings = torch.stack([old_embeds[i] for i in old_ids])\n",
"new_embeddings = torch.stack([new_embeds[i] for i in new_ids])\n",
"\n",
"torch.all(torch.eq(old_embeddings, new_embeddings))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add image processor so that all files are together"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n"
]
},
{
"data": {
"text/plain": [
"['donut-base-ascii/preprocessor_config.json']"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoImageProcessor\n",
"\n",
"proc = AutoImageProcessor.from_pretrained(model_name)\n",
"proc.save_pretrained(\"donut-base-ascii\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Check that the new token for 1 works\n",
"\n",
"\n",
"unk_token_id = 3, so that shouldn't be present! Instead it should have 27511, the new token for \"1\""
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 15793, 27511, 4056, 27511, 26020, 27511, 2]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_tokenizer(\"10.1 )1 a1\").input_ids"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"0199dce34d0b4101ab2da9cd761f17ea": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9ac895ad7a3d4af4b75076fc1e8433ac",
"placeholder": "",
"style": "IPY_MODEL_f97ecaf1a1af41029bb2d79334e83b3d",
"value": " 4.74k/4.74k [00:00<00:00, 230kB/s]"
}
},
"01d989190ff34c499ef4eb023a982a13": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3dcafb3def654095a3a02644eb1b79b6": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"493c720b2da54790a6b8e3ec0ee44f8d": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4aa596c990844b06b1081f373235cbe9": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_493c720b2da54790a6b8e3ec0ee44f8d",
"max": 4742,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_3dcafb3def654095a3a02644eb1b79b6",
"value": 4742
}
},
"66b76b6edce045b480ebed513ba1ab6e": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6b432819bf504227a04a10a749a848e9": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6e87b5a1db834af09c4507881ce12fd8": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"70ec1c59c5c34b448d91ad895137b7c0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"77506fb9c6404b74a5f8d82fa323a275": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a9f316334ef9440a979c332a0ae8e7cd",
"placeholder": "",
"style": "IPY_MODEL_01d989190ff34c499ef4eb023a982a13",
"value": "Downloading pytorch_model.bin: 100%"
}
},
"78760960021e43ce85e63f08c55b821d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_8dd9d54bf1a3499c9d075d7dd34e8f3a",
"IPY_MODEL_4aa596c990844b06b1081f373235cbe9",
"IPY_MODEL_0199dce34d0b4101ab2da9cd761f17ea"
],
"layout": "IPY_MODEL_66b76b6edce045b480ebed513ba1ab6e"
}
},
"80ce735e6b314dcb92fab111b26a43d6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"824c138272c94f4086f9035f97b082c3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cd5a87bcb60a44e194b3db834f200061",
"max": 809168699,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_70ec1c59c5c34b448d91ad895137b7c0",
"value": 809168699
}
},
"8dd9d54bf1a3499c9d075d7dd34e8f3a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f98d3c18e9454ed2be3197330e5d84b1",
"placeholder": "",
"style": "IPY_MODEL_6e87b5a1db834af09c4507881ce12fd8",
"value": "Downloading (…)lve/main/config.json: 100%"
}
},
"9ac895ad7a3d4af4b75076fc1e8433ac": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a9f316334ef9440a979c332a0ae8e7cd": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c35a984617774c8fbde92917bcae872e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_77506fb9c6404b74a5f8d82fa323a275",
"IPY_MODEL_824c138272c94f4086f9035f97b082c3",
"IPY_MODEL_e0dedfb1d27d4b1aa4c090477d985257"
],
"layout": "IPY_MODEL_6b432819bf504227a04a10a749a848e9"
}
},
"c53794a14c3049d193260cffca0a6aaa": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cd5a87bcb60a44e194b3db834f200061": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e0dedfb1d27d4b1aa4c090477d985257": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_80ce735e6b314dcb92fab111b26a43d6",
"placeholder": "",
"style": "IPY_MODEL_c53794a14c3049d193260cffca0a6aaa",
"value": " 809M/809M [00:07<00:00, 116MB/s]"
}
},
"f97ecaf1a1af41029bb2d79334e83b3d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f98d3c18e9454ed2be3197330e5d84b1": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}