diff --git "a/cli/shroom-data/Baseline_LLMs_SHROOM_SemEval_2024_Task_6.ipynb" "b/cli/shroom-data/Baseline_LLMs_SHROOM_SemEval_2024_Task_6.ipynb" new file mode 100644--- /dev/null +++ "b/cli/shroom-data/Baseline_LLMs_SHROOM_SemEval_2024_Task_6.ipynb" @@ -0,0 +1,9286 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Installing dependencies. You might need to tweak the CMAKE_ARGS for the `llama-cpp-python` pip package." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CKL68Itp9Bm-", + "outputId": "dd33c010-aa3e-4f6a-c763-e30047591c5e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting llama-cpp-python>=0.1.79\n", + " Downloading llama_cpp_python-0.2.24.tar.gz (8.8 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0mm eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting typing-extensions>=4.5.0 (from llama-cpp-python>=0.1.79)\n", + " Downloading typing_extensions-4.9.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting numpy>=1.20.0 (from llama-cpp-python>=0.1.79)\n", + " Downloading numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting diskcache>=5.6.1 (from llama-cpp-python>=0.1.79)\n", + " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", + "Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading typing_extensions-4.9.0-py3-none-any.whl (32 kB)\n", + "Building wheels for collected packages: llama-cpp-python\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25lerror\n", + " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", + " \n", + " \u001b[31m×\u001b[0m \u001b[32mBuilding wheel for llama-cpp-python \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mpyproject.toml\u001b[0m\u001b[1;32m)\u001b[0m did not run successfully.\n", + " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", + " \u001b[31m╰─>\u001b[0m \u001b[31m[37 lines of output]\u001b[0m\n", + " \u001b[31m \u001b[0m \u001b[92m***\u001b[0m \u001b[1m\u001b[92mscikit-build-core 0.7.0\u001b[0m using \u001b[94mCMake 3.22.1\u001b[0m \u001b[91m(wheel)\u001b[0m\u001b[0m\n", + " \u001b[31m \u001b[0m \u001b[92m***\u001b[0m \u001b[1mConfiguring CMake...\u001b[0m\n", + " \u001b[31m \u001b[0m loading initial cache file /tmp/tmpmgsfdp15/build/CMakeInit.txt\n", + " \u001b[31m \u001b[0m -- The C compiler identification is GNU 11.4.0\n", + " \u001b[31m \u001b[0m -- The CXX compiler identification is GNU 11.4.0\n", + " \u001b[31m \u001b[0m -- Detecting C compiler ABI info\n", + " \u001b[31m \u001b[0m -- Detecting C compiler ABI info - done\n", + " \u001b[31m \u001b[0m -- Check for working C compiler: /usr/bin/cc - skipped\n", + " \u001b[31m \u001b[0m -- Detecting C compile features\n", + " \u001b[31m \u001b[0m -- Detecting C compile features - done\n", + " \u001b[31m \u001b[0m -- Detecting CXX compiler ABI info\n", + " \u001b[31m \u001b[0m -- Detecting CXX compiler ABI info - done\n", + " \u001b[31m \u001b[0m -- Check for working CXX compiler: /usr/bin/c++ - skipped\n", + " \u001b[31m \u001b[0m -- Detecting CXX compile features\n", + " \u001b[31m \u001b[0m -- Detecting CXX compile features - done\n", + " \u001b[31m \u001b[0m -- Found Git: /usr/bin/git (found version \"2.34.1\")\n", + " \u001b[31m \u001b[0m -- Looking for pthread.h\n", + " \u001b[31m \u001b[0m -- Looking for pthread.h - found\n", + " \u001b[31m \u001b[0m -- Performing Test CMAKE_HAVE_LIBC_PTHREAD\n", + " \u001b[31m \u001b[0m -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success\n", + " \u001b[31m \u001b[0m -- Found Threads: TRUE\n", + " \u001b[31m \u001b[0m -- Found CUDAToolkit: /usr/local/cuda/include (found version \"12.0.140\")\n", + " \u001b[31m \u001b[0m -- cuBLAS found\n", + " \u001b[31m \u001b[0m -- The CUDA compiler identification is unknown\n", + " \u001b[31m \u001b[0m \u001b[31mCMake Error at vendor/llama.cpp/CMakeLists.txt:267 (enable_language):\n", + " \u001b[31m \u001b[0m No CMAKE_CUDA_COMPILER could be found.\n", + " \u001b[31m \u001b[0m \n", + " \u001b[31m \u001b[0m Tell CMake where to find the compiler by setting either the environment\n", + " \u001b[31m \u001b[0m variable \"CUDACXX\" or the CMake cache entry CMAKE_CUDA_COMPILER to the full\n", + " \u001b[31m \u001b[0m path to the compiler, or to the compiler name if it is in the PATH.\n", + " \u001b[31m \u001b[0m \n", + " \u001b[31m \u001b[0m \u001b[0m\n", + " \u001b[31m \u001b[0m -- Configuring incomplete, errors occurred!\n", + " \u001b[31m \u001b[0m See also \"/tmp/tmpmgsfdp15/build/CMakeFiles/CMakeOutput.log\".\n", + " \u001b[31m \u001b[0m See also \"/tmp/tmpmgsfdp15/build/CMakeFiles/CMakeError.log\".\n", + " \u001b[31m \u001b[0m \n", + " \u001b[31m \u001b[0m \u001b[91m\u001b[1m*** CMake configuration failed\u001b[0m\n", + " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n", + " \n", + " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", + "\u001b[31m ERROR: Failed building wheel for llama-cpp-python\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[?25hFailed to build llama-cpp-python\n", + "\u001b[31mERROR: Could not build wheels for llama-cpp-python, which is required to install pyproject.toml-based projects\u001b[0m\u001b[31m\n", + "\u001b[0mRequirement already satisfied: huggingface_hub in /home/mickus/shroom/.venv/lib/python3.10/site-packages (0.19.4)\n", + "Requirement already satisfied: filelock in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (2023.10.0)\n", + "Requirement already satisfied: requests in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (4.66.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (4.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface_hub) (23.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2.1.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2023.11.17)\n", + "Requirement already satisfied: datasets in /home/mickus/shroom/.venv/lib/python3.10/site-packages (2.15.0)\n", + "Requirement already satisfied: numpy>=1.17 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (1.26.2)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (14.0.1)\n", + "Requirement already satisfied: pyarrow-hotfix in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (0.6)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (0.3.7)\n", + "Requirement already satisfied: pandas in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (2.1.4)\n", + "Requirement already satisfied: requests>=2.19.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (4.66.1)\n", + "Requirement already satisfied: xxhash in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (0.70.15)\n", + "Requirement already satisfied: fsspec<=2023.10.0,>=2023.1.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from fsspec[http]<=2023.10.0,>=2023.1.0->datasets) (2023.10.0)\n", + "Requirement already satisfied: aiohttp in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (3.9.1)\n", + "Requirement already satisfied: huggingface-hub>=0.18.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (0.19.4)\n", + "Requirement already satisfied: packaging in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from datasets) (6.0.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: filelock in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets) (3.13.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets) (4.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2.1.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.11.17)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from pandas->datasets) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /home/mickus/shroom/.venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n" + ] + } + ], + "source": [ + "# GPU llama-cpp-python; Starting from version llama-cpp-python==0.1.79, it supports GGUF\n", + "!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on \" pip install 'llama-cpp-python>=0.1.79' --force-reinstall --upgrade --no-cache-dir\n", + "# For download the models\n", + "!pip install huggingface_hub\n", + "!pip install datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by downloading an instruction-finetuned Mistral model, which we will ask to classify model outputs for us." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 106, + "referenced_widgets": [ + "2ae89d1a8a074a249b750d138587e44d", + "eb30e73c1e824fa8942f0c58104d696f", + "df0a135d8a5b43d5ab94bef15b2db5aa", + "a5e99c0d3739407799fde2f29a301d05", + "fa5555299e2e47ae9d2cc7a7e58415f4", + "c96a1b051a7b4fbfbd873be07cf44cf0", + "fa37a3f2205749468f31309b6061ffef", + "a0ceffacff7f492d87084da291061006", + "af87959da48a436e842f58ac691717df", + "e35a5293e19748679095d1222f1a31e5", + "2abefc6082af406ab1c955a880a2b419" + ] + }, + "id": "uDMqQmBfAhYO", + "outputId": "eacd2078-6e5a-4451-84b4-69c6789cb4d1" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ggml_init_cublas: GGML_CUDA_FORCE_MMQ: no\n", + "ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n", + "ggml_init_cublas: found 1 CUDA devices:\n", + " Device 0: NVIDIA GeForce RTX 3080 Laptop GPU, compute capability 8.6\n", + "llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /home/mickus/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.2-GGUF/snapshots/3a6fbf4a41a1d52e415a4958cde6856d34b2db93/mistral-7b-instruct-v0.2.Q6_K.gguf (version GGUF V3 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q6_K [ 4096, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: blk.0.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 2: blk.0.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.1.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.1.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.2.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.2.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.2.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.2.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.2.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.2.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.2.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.3.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.3.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.3.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.3.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.3.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.3.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.3.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.4.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.4.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.4.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.4.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.4.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.4.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.4.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.5.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.5.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.5.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.5.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.5.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.5.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.5.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.6.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.6.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.6.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.6.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.6.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.6.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.6.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.7.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.7.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.7.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.7.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.7.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.7.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.7.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.8.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.8.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.8.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.8.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.8.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.8.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.8.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.9.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.9.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.9.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.9.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.9.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.9.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.9.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.10.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.10.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.10.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.10.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.10.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.10.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.10.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.11.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.11.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.11.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.11.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.11.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.11.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.11.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.12.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.12.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.12.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.12.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.12.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.12.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.12.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.13.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.13.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.13.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.13.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.13.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.13.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.13.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.14.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.14.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.14.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.14.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.14.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.14.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.14.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.15.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.15.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.16.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.16.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.17.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.17.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.18.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.18.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.19.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.19.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.20.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.20.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.21.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.21.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.22.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.22.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.23.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.23.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.24.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.24.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.25.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.25.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.26.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.26.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.27.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.27.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.28.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.28.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.29.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.29.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.30.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.30.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 277: blk.30.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.31.attn_q.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.31.attn_k.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.attn_output.weight q6_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.ffn_gate.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.ffn_up.weight q6_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 290: output.weight q6_K [ 4096, 32000, 1, 1 ]\n", + "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", + "llama_model_loader: - kv 0: general.architecture str = llama\n", + "llama_model_loader: - kv 1: general.name str = mistralai_mistral-7b-instruct-v0.2\n", + "llama_model_loader: - kv 2: llama.context_length u32 = 32768\n", + "llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n", + "llama_model_loader: - kv 4: llama.block_count u32 = 32\n", + "llama_model_loader: - kv 5: llama.feed_forward_length u32 = 14336\n", + "llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n", + "llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n", + "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 8\n", + "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n", + "llama_model_loader: - kv 10: llama.rope.freq_base f32 = 1000000.000000\n", + "llama_model_loader: - kv 11: general.file_type u32 = 18\n", + "llama_model_loader: - kv 12: tokenizer.ggml.model str = llama\n", + "llama_model_loader: - kv 13: tokenizer.ggml.tokens arr[str,32000] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n", + "llama_model_loader: - kv 14: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n", + "llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n", + "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 = 1\n", + "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 = 2\n", + "llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32 = 0\n", + "llama_model_loader: - kv 19: tokenizer.ggml.padding_token_id u32 = 0\n", + "llama_model_loader: - kv 20: tokenizer.ggml.add_bos_token bool = true\n", + "llama_model_loader: - kv 21: tokenizer.ggml.add_eos_token bool = false\n", + "llama_model_loader: - kv 22: tokenizer.chat_template str = {{ bos_token }}{% for message in mess...\n", + "llama_model_loader: - kv 23: general.quantization_version u32 = 2\n", + "llama_model_loader: - type f32: 65 tensors\n", + "llama_model_loader: - type q6_K: 226 tensors\n", + "llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n", + "llm_load_print_meta: format = GGUF V3 (latest)\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32000\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 32768\n", + "llm_load_print_meta: n_embd = 4096\n", + "llm_load_print_meta: n_head = 32\n", + "llm_load_print_meta: n_head_kv = 8\n", + "llm_load_print_meta: n_layer = 32\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 4\n", + "llm_load_print_meta: f_norm_eps = 0.0e+00\n", + "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n", + "llm_load_print_meta: f_clamp_kqv = 0.0e+00\n", + "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n", + "llm_load_print_meta: n_ff = 14336\n", + "llm_load_print_meta: n_expert = 0\n", + "llm_load_print_meta: n_expert_used = 0\n", + "llm_load_print_meta: rope scaling = linear\n", + "llm_load_print_meta: freq_base_train = 1000000.0\n", + "llm_load_print_meta: freq_scale_train = 1\n", + "llm_load_print_meta: n_yarn_orig_ctx = 32768\n", + "llm_load_print_meta: rope_finetuned = unknown\n", + "llm_load_print_meta: model type = 7B\n", + "llm_load_print_meta: model ftype = Q6_K\n", + "llm_load_print_meta: model params = 7.24 B\n", + "llm_load_print_meta: model size = 5.53 GiB (6.56 BPW) \n", + "llm_load_print_meta: general.name = mistralai_mistral-7b-instruct-v0.2\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 2 ''\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: PAD token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.11 MiB\n", + "llm_load_tensors: using CUDA for GPU acceleration\n", + "llm_load_tensors: mem required = 205.20 MiB\n", + "llm_load_tensors: offloading 32 repeating layers to GPU\n", + "llm_load_tensors: offloaded 32/33 layers to GPU\n", + "llm_load_tensors: VRAM used: 5461.00 MiB\n", + "...................................................................................................\n", + "llama_new_context_with_model: n_ctx = 8192\n", + "llama_new_context_with_model: freq_base = 1000000.0\n", + "llama_new_context_with_model: freq_scale = 1\n", + "llama_new_context_with_model: KV self size = 1024.00 MiB, K (f16): 512.00 MiB, V (f16): 512.00 MiB\n", + "llama_build_graph: non-view tensors processed: 676/676\n", + "llama_new_context_with_model: compute buffer total size = 8628.25 MiB\n", + "llama_new_context_with_model: VRAM scratch buffer: 8625.06 MiB\n", + "llama_new_context_with_model: total VRAM used: 14086.06 MiB (model: 5461.00 MiB, context: 8625.06 MiB)\n", + "AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n" + ] + } + ], + "source": [ + "from huggingface_hub import hf_hub_download\n", + "\n", + "model_name_or_path = \"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\"\n", + "model_basename = \"mistral-7b-instruct-v0.2.Q6_K.gguf\"\n", + "model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)\n", + "\n", + "# This config has been tested on an RTX 3080 (VRAM of 16GB).\n", + "# you might need to tweak with respect to your hardware.\n", + "from llama_cpp import Llama\n", + "lcpp_llm = Llama(\n", + " model_path=model_path,\n", + " n_threads=16, # CPU cores\n", + " n_batch=8000, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n", + " n_gpu_layers=32, # Change this value based on your model and your GPU VRAM pool.\n", + " n_ctx=8192, # Context window\n", + " logits_all=True\n", + ")\n", + "\n", + "run_on_test = False # whether this baseline system is ran on the test splits or the val splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Running on the model-aware track data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UKo1-X5OvT4b", + "outputId": "4eba054f-48c4-4aea-a1de-4c14c9c45fa7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "501\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b64f002577ab4009bd2ee570b4642ea6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/501 [00:00[INST] {message} [/INST]\"\n", + "\n", + " response = lcpp_llm(\n", + " prompt=prompt,\n", + " temperature= 0.0,\n", + " logprobs=1,\n", + " )\n", + " answer = str(response[\"choices\"][0][\"text\"]).strip().lower()\n", + " if answer.startswith(\"yes\"):\n", + " output_label = \"Not Hallucination\"\n", + " prob = 1-float(np.exp(response[\"choices\"][0][\"logprobs\"][\"token_logprobs\"][0]))\n", + " if answer.startswith(\"no\"):\n", + " output_label = \"Hallucination\"\n", + " prob = float(np.exp(response[\"choices\"][0][\"logprobs\"][\"token_logprobs\"][0]))\n", + " if not answer.startswith(\"no\") and not answer.startswith(\"yes\"):\n", + " idx_random = random.randint(0,len(labels)-1)\n", + " output_label = labels[idx_random]\n", + " prob = float(0.5)\n", + "\n", + " item_to_json = {\"label\":output_label, \"p(Hallucination)\":prob}\n", + " if run_on_test:\n", + " item_to_json['id'] = id\n", + " \n", + " output_json.append(item_to_json)\n", + "\n", + "\n", + "f = open(path_val_model_aware_output, 'w', encoding='utf-8')\n", + "json.dump(output_json, f)\n", + "f.close()\n", + "print(\"done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Running on the model-agnostic track data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "aff193ecfc2e4d5a8b3ddd4f63604e63", + "48be64dd9497468f83d73bd119591271", + "04b2b191f387469facbc7e0f63edd957", + "e225b3758fa24df3a0d6f1a039d3220a", + "aeaed97ed3f441e9aa2ce24c87e02d87", + "cebd82bbc195424a908c9527ee1a21d3", + "8665cfefbc984fc4873e73cd96d6c018", + "1c18583fabf94cf88d89e9d0ad83cd46", + "16ceb8ceabea4adeb2ed5d3c62a52e87", + "6c4a2676871e492897d305d6d9a6fac9", + "f432e32a03704652a5bcd21c7ce36abd", + "86da540e05824f2c95b5c8bea9b4581d", + "d1f94d67f08449439e3191bcdf87c6bf", + "cb886b4dac084c0290e1fd1c229b092e", + "8b8fd80c79c54e479b15f798bc545b96", + "3e1566a3d2f64b5fbbaf7cc51b9c9902", + "ac217ebd99d94729ac89ed81fc0a0ab5", + "2b25549d8eac4efd99bf1beb4fb26b0c", + "4facca9ecbd74aa5b4dc474634686064", + "f52b2088b6724e6dad9ee18ba364c009", + "08db236b9ee74ccb9ac456bf09e298e1", + "977e8b1928ec42a285804dcc8fc13cb5", + "a0f2fe09ab0a4a21acda513f96bb7faf", + "4f891d2316604dd08cd5ffd22c8854d9", + "0ea36c0ff6cd4559bf733fb73ff82693", + "de38e0a8f5a24cbdbf755db3cfd399ec", + "9f4e1bc76cfb4643877686a6f0271b52", + "5c70248a7e6e45199ed626fa68037174", + "07bb3c8d23084467b680d0f8be879bcd", + "fca89659d3684477bb46613bbb96383d", + "265b13864e334d2d8875d1de157c428a", + "823cdbf0fa2c43559d01de4664258a86", + "e5ae38c7214c4f05974de99e5d5c3485" + ] + }, + "id": "-2KYuv-H-LYU", + "outputId": "55d8a874-ee9c-4833-f426-279caf6813ec" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DatasetDict({\n", + " val: Dataset({\n", + " features: ['labels', 'src', 'model', 'hyp', 'task', 'ref', 'tgt', 'label', 'p(Hallucination)'],\n", + " num_rows: 499\n", + " })\n", + "})\n", + "499\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "75227940743046f6a311c44651947dd3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/499 [00:00[INST] {message} [/INST]\"\n", + "\n", + " response = lcpp_llm(\n", + " prompt=prompt,\n", + " temperature= 0.0,\n", + " logprobs=1,\n", + " )\n", + " answer = str(response[\"choices\"][0][\"text\"]).strip().lower()\n", + " if answer.startswith(\"yes\"):\n", + " output_label = \"Not Hallucination\"\n", + " prob = 1-float(np.exp(response[\"choices\"][0][\"logprobs\"][\"token_logprobs\"][0]))\n", + " if answer.startswith(\"no\"):\n", + " output_label = \"Hallucination\"\n", + " prob = float(np.exp(response[\"choices\"][0][\"logprobs\"][\"token_logprobs\"][0]))\n", + " if not answer.startswith(\"no\") and not answer.startswith(\"yes\"):\n", + " idx_random = random.randint(0,len(labels)-1)\n", + " output_label = labels[idx_random]\n", + " prob = float(0.5)\n", + "\n", + " item_to_json = {\"label\":output_label, \"p(Hallucination)\":prob}\n", + " if run_on_test:\n", + " item_to_json['id'] = id\n", + " output_json.append(item_to_json)\n", + "\n", + "\n", + "f = open(path_val_model_agnostic_output, 'w', encoding='utf-8')\n", + "json.dump(output_json, f)\n", + "f.close()\n", + "print(\"done\")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "04b2b191f387469facbc7e0f63edd957": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1c18583fabf94cf88d89e9d0ad83cd46", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_16ceb8ceabea4adeb2ed5d3c62a52e87", + "value": 1 + } + }, + "07bb3c8d23084467b680d0f8be879bcd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "08db236b9ee74ccb9ac456bf09e298e1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ea36c0ff6cd4559bf733fb73ff82693": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fca89659d3684477bb46613bbb96383d", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_265b13864e334d2d8875d1de157c428a", + "value": 1 + } + }, + "16ceb8ceabea4adeb2ed5d3c62a52e87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1c18583fabf94cf88d89e9d0ad83cd46": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "265b13864e334d2d8875d1de157c428a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2abefc6082af406ab1c955a880a2b419": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2ae89d1a8a074a249b750d138587e44d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_eb30e73c1e824fa8942f0c58104d696f", + "IPY_MODEL_df0a135d8a5b43d5ab94bef15b2db5aa", + "IPY_MODEL_a5e99c0d3739407799fde2f29a301d05" + ], + "layout": "IPY_MODEL_fa5555299e2e47ae9d2cc7a7e58415f4" + } + }, + "2b25549d8eac4efd99bf1beb4fb26b0c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3e1566a3d2f64b5fbbaf7cc51b9c9902": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48be64dd9497468f83d73bd119591271": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cebd82bbc195424a908c9527ee1a21d3", + "placeholder": "​", + "style": "IPY_MODEL_8665cfefbc984fc4873e73cd96d6c018", + "value": "Downloading data files: 100%" + } + }, + "4f891d2316604dd08cd5ffd22c8854d9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5c70248a7e6e45199ed626fa68037174", + "placeholder": "​", + "style": "IPY_MODEL_07bb3c8d23084467b680d0f8be879bcd", + "value": "Generating val split: " + } + }, + "4facca9ecbd74aa5b4dc474634686064": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5c70248a7e6e45199ed626fa68037174": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6c4a2676871e492897d305d6d9a6fac9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "823cdbf0fa2c43559d01de4664258a86": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8665cfefbc984fc4873e73cd96d6c018": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "86da540e05824f2c95b5c8bea9b4581d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d1f94d67f08449439e3191bcdf87c6bf", + "IPY_MODEL_cb886b4dac084c0290e1fd1c229b092e", + "IPY_MODEL_8b8fd80c79c54e479b15f798bc545b96" + ], + "layout": "IPY_MODEL_3e1566a3d2f64b5fbbaf7cc51b9c9902" + } + }, + "8b8fd80c79c54e479b15f798bc545b96": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_08db236b9ee74ccb9ac456bf09e298e1", + "placeholder": "​", + "style": "IPY_MODEL_977e8b1928ec42a285804dcc8fc13cb5", + "value": " 1/1 [00:00<00:00, 1.86it/s]" + } + }, + "977e8b1928ec42a285804dcc8fc13cb5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9f4e1bc76cfb4643877686a6f0271b52": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a0ceffacff7f492d87084da291061006": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a0f2fe09ab0a4a21acda513f96bb7faf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4f891d2316604dd08cd5ffd22c8854d9", + "IPY_MODEL_0ea36c0ff6cd4559bf733fb73ff82693", + "IPY_MODEL_de38e0a8f5a24cbdbf755db3cfd399ec" + ], + "layout": "IPY_MODEL_9f4e1bc76cfb4643877686a6f0271b52" + } + }, + "a5e99c0d3739407799fde2f29a301d05": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e35a5293e19748679095d1222f1a31e5", + "placeholder": "​", + "style": "IPY_MODEL_2abefc6082af406ab1c955a880a2b419", + "value": " 5.94G/5.94G [00:45<00:00, 157MB/s]" + } + }, + "ac217ebd99d94729ac89ed81fc0a0ab5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aeaed97ed3f441e9aa2ce24c87e02d87": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af87959da48a436e842f58ac691717df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "aff193ecfc2e4d5a8b3ddd4f63604e63": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_48be64dd9497468f83d73bd119591271", + "IPY_MODEL_04b2b191f387469facbc7e0f63edd957", + "IPY_MODEL_e225b3758fa24df3a0d6f1a039d3220a" + ], + "layout": "IPY_MODEL_aeaed97ed3f441e9aa2ce24c87e02d87" + } + }, + "c96a1b051a7b4fbfbd873be07cf44cf0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb886b4dac084c0290e1fd1c229b092e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4facca9ecbd74aa5b4dc474634686064", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f52b2088b6724e6dad9ee18ba364c009", + "value": 1 + } + }, + "cebd82bbc195424a908c9527ee1a21d3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d1f94d67f08449439e3191bcdf87c6bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac217ebd99d94729ac89ed81fc0a0ab5", + "placeholder": "​", + "style": "IPY_MODEL_2b25549d8eac4efd99bf1beb4fb26b0c", + "value": "Extracting data files: 100%" + } + }, + "de38e0a8f5a24cbdbf755db3cfd399ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_823cdbf0fa2c43559d01de4664258a86", + "placeholder": "​", + "style": "IPY_MODEL_e5ae38c7214c4f05974de99e5d5c3485", + "value": " 499/0 [00:00<00:00, 2393.49 examples/s]" + } + }, + "df0a135d8a5b43d5ab94bef15b2db5aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a0ceffacff7f492d87084da291061006", + "max": 5942065440, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_af87959da48a436e842f58ac691717df", + "value": 5942065440 + } + }, + "e225b3758fa24df3a0d6f1a039d3220a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6c4a2676871e492897d305d6d9a6fac9", + "placeholder": "​", + "style": "IPY_MODEL_f432e32a03704652a5bcd21c7ce36abd", + "value": " 1/1 [00:00<00:00, 29.62it/s]" + } + }, + "e35a5293e19748679095d1222f1a31e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e5ae38c7214c4f05974de99e5d5c3485": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eb30e73c1e824fa8942f0c58104d696f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c96a1b051a7b4fbfbd873be07cf44cf0", + "placeholder": "��", + "style": "IPY_MODEL_fa37a3f2205749468f31309b6061ffef", + "value": "mistral-7b-instruct-v0.2.Q6_K.gguf: 100%" + } + }, + "f432e32a03704652a5bcd21c7ce36abd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f52b2088b6724e6dad9ee18ba364c009": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fa37a3f2205749468f31309b6061ffef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fa5555299e2e47ae9d2cc7a7e58415f4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fca89659d3684477bb46613bbb96383d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}