Spaces:

SadP0i
/

GGUF-Model-VRAM-Calculator

Running

App Files Files Community

SadP0i commited on Jul 11

Commit

615c77c

•

1 Parent(s): f38793c

Upload index.html

Browse files

Added a sanity check when HF returns an error instead of model info

Files changed (1) hide show

index.html +485 -575

index.html CHANGED Viewed

@@ -1,575 +1,485 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <script>
-      function strToHtml(str) {
-        let parser = new DOMParser();
-        return parser.parseFromString(str, "text/html");
-      }
-      //Short, jQuery-independent function to read html table and write them into an Array.
-      //Kudos to RobG at StackOverflow
-      function tableToObj(table) {
-        var rows = table.rows;
-        var propCells = rows[0].cells;
-        var propNames = [];
-        var results = [];
-        var obj, row, cells;
-        // Use the first row for the property names
-        // Could use a header section but result is the same if
-        // there is only one header row
-        for (var i = 0, iLen = propCells.length; i < iLen; i++) {
-          propNames.push(
-            (propCells[i].textContent || propCells[i].innerText).trim()
-          );
-        }
-        // Use the rows for data
-        // Could use tbody rows here to exclude header & footer
-        // but starting from 1 gives required result
-        for (var j = 1, jLen = rows.length; j < jLen; j++) {
-          cells = rows[j].cells;
-          obj = {};
-          for (var k = 0; k < iLen; k++) {
-            obj[propNames[k]] = (
-              cells[k].textContent || cells[k].innerText
-            ).trim();
-          }
-          results.push(obj);
-        }
-        return results;
-      }
-      function formatGpu(gpus) {
-        return gpus.map(
-          (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
-        );
-      }
-      const gguf_quants = {
-        "IQ1_S": 1.56,
-        "IQ1_M": 1.75,
-        "IQ2_XXS": 2.06,
-        "IQ2_XS": 2.31,
-        "IQ2_S": 2.5,
-        "IQ3_XXS": 3.06,
-        "IQ3_XS": 3.3,
-        "IQ3_S": 3.44,
-        "IQ3_M": 3.66,
-        "Q2_K": 3.35,
-        "Q3_K_S": 3.5,
-        "Q3_K_M": 3.91,
-        "Q3_K_L": 4.27,
-        "IQ4_XS": 4.25,
-        "Q4_0": 4.55,
-        "Q4_K_S": 4.58,
-        "Q4_K_M": 4.85,
-        "Q5_0": 5.54,
-        "Q5_K_S": 5.54,
-        "Q5_K_M": 5.69,
-        "Q6_K": 6.59,
-        "Q8_0": 8.5,
-      }
-      async function modelConfig(hf_model) {
-        let config = await fetch(
-          `https://huggingface.co/${hf_model}/raw/main/config.json`
-        ).then(r => r.json())
-        let model_size = 0
-        try {
-          model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
-          if (isNaN(model_size)) {
-            throw new Erorr("no size in safetensors metadata")
-          }
-        } catch (e) {
-          try {
-            model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
-            if (isNaN(model_size)) {
-              throw new Erorr("no size in pytorch metadata")
-            }
-          } catch {
-            let model_page = await fetch(
-                "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
-            ).then(r => r.text())
-            let el = document.createElement( 'html' );
-            el.innerHTML = model_page
-            let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
-            if (params_el !== null) {
-              model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
-            } else {
-              params_el = el.querySelector('div[data-target="ModelHeader"]')
-              model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
-            }
-          }
-        }
-        config.parameters = model_size
-        return config
-      }
-      function inputBuffer(context=8192, model_config, bsz=512) {
-        /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
-          ctx->inp_tokens  = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
-          ctx->inp_embd    = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
-          ctx->inp_pos     = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
-          ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
-          ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
-          ctx->inp_sum     = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);
-          n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
-        */
-        const inp_tokens = bsz
-        const inp_embd = model_config["hidden_size"] * bsz
-        const inp_pos = bsz
-        const inp_KQ_mask = context * bsz
-        const inp_K_shift = context
-        const inp_sum = bsz
-        return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
-      }
-      function computeBuffer(context=8192, model_config, bsz=512) {
-        if (bsz != 512) {
-          alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
-        }
-        return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
-      }
-      function kvCache(context=8192, model_config, cache_bit=16) {
-        const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
-        const n_embd_gqa = model_config["hidden_size"] / n_gqa
-        const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
-        const size = 2 * n_elements
-        return size * (cache_bit / 8)
-      }
-      function contextSize(context=8192, model_config, bsz=512, cache_bit=16) {
-        return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
-      }
-      function modelSize(model_config, bpw=4.5) {
-        return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
-      }
-      async function calculateSizes(format, context_loc) {
-        format = "gguf"
-        try {
-          const model_config = await modelConfig(document.getElementById("modelsearch").value)
-          const context = parseInt(document.getElementById("contextsize").value)
-          let bsz = 512
-          let cache_bit = 16
-          let bpw = 0
-          if (format === "gguf") {
-            bsz = parseInt(document.getElementById("batchsize").value)
-            bpw = gguf_quants[document.getElementById("quantsize").innerText]
-          } else if (format == "exl2") {
-            cache_bit = Number.parseInt(document.getElementById("kvCache").value)
-            bpw = Number.parseFloat(document.getElementById("bpw").value)
-          }
-          const model_size = modelSize(model_config, bpw)
-          const context_size = contextSize(context, model_config, bsz, cache_bit)
-          const total_size = ((model_size + context_size) / 2**30)
-          document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2)
-          document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2)
-          const result_total_el = document.getElementById("resulttotal");
-          result_total_el.innerText = total_size.toFixed(2)
-          const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
-          const vram = allocated_vram
-          if (vram - total_size > 0.5) {
-            result_total_el.style.backgroundColor = "#bef264"
-          } else if (vram - total_size > 0) {
-            result_total_el.style.backgroundColor = "#facc15"
-          } else {
-            result_total_el.style.backgroundColor = "#ef4444"
-          }
-          const layer_size = ((model_size / 2**30) / model_config["num_hidden_layers"])
-          const layer_size_el = document.getElementById("layersize");
-          layer_size_el.innerText = layer_size.toFixed(2)
-          const context_dealloc = context_loc === "vram" ? (context_size / 2**30) : 0;
-          const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
-          const layers_offload_el = document.getElementById("layersoffload");
-          layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}`
-        } catch(e) {
-          alert(e);
-        }
-      }
-    </script>
-    <link href="./styles.css" rel="stylesheet">
-    <title>Can I split it? - GGUF VRAM Calculator</title>
-  </head>
-  <body class="p-8">
-    <div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
-      <div style="text-align: center;">
-      <h1 class="text-xl font-semibold leading-6 text-gray-900">
-        GGUF Model, Can I split it?
-      </h1>
-      <h3 class="font-semibold leading-6 text-gray-900">
-        Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM calculator</a>
-      </h3>
-      </div>
-      <div class="flex flex-col gap-10">
-        <div class="w-auto flex flex-col gap-4">
-          <div class="relative">
-            <label
-              for="maxvram"
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Max Allocated VRAM
-            </label>
-            <input
-              value="24"
-              type="number"
-              name="maxvram"
-              id="maxvram"
-              step="1"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            />
-          </div>
-          <!-- Model Selector -->
-          <div class="flex flex-row gap-4 relative">
-            <label
-              for="contextsize"
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Model (unquantized)
-            </label>
-            <div
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-              x-data="{
-                          open: false,
-                          value: 'Nexusflow/Starling-LM-7B-beta',
-                          results: null,
-                          toggle() {
-                            if (this.open) {
-                              return this.close()
-                            }
-                            this.$refs.input.focus()
-                            this.open = true
-                          },
-                          close(focusAfter) {
-                            if (! this.open) return
-                            this.open = false
-                            focusAfter && focusAfter.focus()
-                          }
-                        }"
-              x-on:keydown.escape.prevent.stop="close($refs.input)"
-              x-id="['model-typeahead']"
-              class="relative"
-            >
-              <!-- Input -->
-              <input
-                id="modelsearch"
-                x-ref="input"
-                x-on:click="toggle()"
-                @keypress.debounce.150ms="results = (await
-                    fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
-                    encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
-                :aria-expanded="open"
-                :aria-controls="$id('model-typeahead')"
-                x-model="value"
-                class="flex justify-between items-center gap-2 w-full"
-              />
-              <!-- Panel -->
-              <div
-                x-ref="panel"
-                x-show="open"
-                x-transition.origin.top.left
-                x-on:click.outside="close($refs.input)"
-                :id="$id('model-typeahead')"
-                style="display: none"
-                class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
-              >
-                <template x-for="result in results">
-                  <a
-                    @click="value = result.id; close($refs.input)"
-                    x-text="result.id"
-                    class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
-                  ></a>
-                </template>
-              </div>
-            </div>
-          </div>
-          <!-- Context Size Selector -->
-          <div class="relative">
-            <label
-              for="contextsize"
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Context Size
-            </label>
-            <input
-              value="8192"
-              type="number"
-              name="contextsize"
-              id="contextsize"
-              step="1024"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            />
-          </div>
-          <div class="relative">
-            <label
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-              >Context offloaded to</label
-            >
-            <fieldset
-              x-model="context_loc"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            >
-              <legend class="sr-only">Context location</legend>
-              <div
-                class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0"
-              >
-                <div class="flex items-center">
-                  <input
-                    id="context-vram"
-                    name="context-allocation"
-                    type="radio"
-                    value="vram"
-                    checked
-                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
-                  />
-                  <label
-                    for="context-vram"
-                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
-                    >VRAM</label
-                  >
-                </div>
-                <div class="flex items-center">
-                  <input
-                    id="context-ram"
-                    name="context-allocation"
-                    type="radio"
-                    value="ram"
-                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
-                  />
-                  <label
-                    for="context-ram"
-                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
-                    >RAM</label
-                  >
-                </div>
-              </div>
-            </fieldset>
-          </div>
-          <!-- GGUF Options -->
-          <div x-show="format === 'gguf'" class="relative">
-            <div class="flex flex-row gap-4">
-              <label
-                for="contextsize"
-                class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-              >
-                Quantization Size
-              </label>
-              <div
-                class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-                x-data="{
-                open: false,
-                value: '',
-                toggle() {
-                  if (this.open) {
-                    return this.close()
-                  }
-                  this.$refs.button.focus()
-                  this.open = true
-                },
-                close(focusAfter) {
-                  if (! this.open) return
-                  this.open = false
-                  focusAfter && focusAfter.focus()
-                }
-              }"
-                x-on:keydown.escape.prevent.stop="close($refs.button)"
-                x-id="['dropdown-button']"
-                class="relative"
-              >
-                <!-- Button -->
-                <button
-                  x-ref="button"
-                  x-on:click="toggle()"
-                  :aria-expanded="open"
-                  :aria-controls="$id('dropdown-button')"
-                  type="button"
-                  id="quantsize"
-                  x-text="value.length === 0 ? 'Q4_K_S' : value"
-                  class="flex justify-between items-center gap-2 w-full"
-                >
-                  Q4_K_S
-                  <!-- Heroicon: chevron-down -->
-                  <svg
-                    xmlns="http://www.w3.org/2000/svg"
-                    class="h-5 w-5 text-gray-400"
-                    viewBox="0 0 20 20"
-                    fill="currentColor"
-                  >
-                    <path
-                      fill-rule="evenodd"
-                      d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
-                      clip-rule="evenodd"
-                    />
-                  </svg>
-                </button>
-                <!-- Panel -->
-                <div
-                  x-data="{ quants: [
-                    'IQ1_S',
-                    'IQ1_M',
-                    'IQ2_XXS',
-                    'IQ2_XS',
-                    'IQ2_S',
-                    'IQ3_XXS',
-                    'IQ3_XS',
-                    'IQ3_S',
-                    'IQ3_M',
-                    'Q2_K',
-                    'Q3_K_S',
-                    'Q3_K_M',
-                    'Q3_K_L',
-                    'IQ4_XS',
-                    'Q4_0',
-                    'Q4_K_S',
-                    'Q4_K_M',
-                    'Q5_0',
-                    'Q5_K_S',
-                    'Q5_K_M',
-                    'Q6_K',
-                    'Q8_0'
-                  ]}"
-                  x-ref="panel"
-                  x-show="open"
-                  x-transition.origin.top.left
-                  x-on:click.outside="close($refs.button)"
-                  :id="$id('dropdown-button')"
-                  style="display: none"
-                  class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
-                >
-                  <template x-for="quant in quants">
-                    <a
-                      @click="value = quant; close($refs.button)"
-                      x-text="quant"
-                      class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
-                    ></a>
-                  </template>
-                </div>
-              </div>
-              <div class="relative">
-                <label
-                  for="batchsize"
-                  class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-                >
-                  Batch Size
-                </label>
-                <input
-                  value="512"
-                  type="number"
-                  step="128"
-                  id="batchsize"
-                  class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-                />
-              </div>
-            </div>
-          </div>
-          <button
-            type="button"
-            class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
-            @click="calculateSizes(format, context_loc)"
-          >
-            Submit
-          </button>
-        </div>
-        <div class="w-auto flex flex-col gap-4">
-          <div class="relative">
-            <label
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Model Size (GB)
-            </label>
-            <div
-              id="resultmodel"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            >4.20</div>
-          </div>
-          <div class="relative">
-            <label
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Context Size (GB)
-            </label>
-            <div
-              id="resultcontext"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            >6.90</div>
-          </div>
-          <div class="relative">
-            <label
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Total Size (GB)
-            </label>
-            <div
-              id="resulttotal"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            >420.69</div>
-          </div>
-          <div class="relative">
-            <label
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Layer size (GB)
-            </label>
-            <div
-              id="layersize"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            >42.69</div>
-          </div>
-          <div class="relative">
-            <label
-              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
-            >
-              Layers offloaded to GPU (out of total)
-            </label>
-            <div
-              id="layersoffload"
-              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
-            >42</div>
-          </div>
-        </div>
-      </div>
-    </div>
-    <script
-      src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"
-    ></script>
-    <script defer>
-      calculateSizes("gguf", "vram")
-    </script>
-  </body>
-</html>

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <script>
+    function strToHtml(str) {
+      let parser = new DOMParser();
+      return parser.parseFromString(str, "text/html");
+    }
+    //Short, jQuery-independent function to read html table and write them into an Array.
+    //Kudos to RobG at StackOverflow
+    function tableToObj(table) {
+      var rows = table.rows;
+      var propCells = rows[0].cells;
+      var propNames = [];
+      var results = [];
+      var obj, row, cells;
+      // Use the first row for the property names
+      // Could use a header section but result is the same if
+      // there is only one header row
+      for (var i = 0, iLen = propCells.length; i < iLen; i++) {
+        propNames.push(
+          (propCells[i].textContent || propCells[i].innerText).trim()
+        );
+      }
+      // Use the rows for data
+      // Could use tbody rows here to exclude header & footer
+      // but starting from 1 gives required result
+      for (var j = 1, jLen = rows.length; j < jLen; j++) {
+        cells = rows[j].cells;
+        obj = {};
+        for (var k = 0; k < iLen; k++) {
+          obj[propNames[k]] = (
+            cells[k].textContent || cells[k].innerText
+          ).trim();
+        }
+        results.push(obj);
+      }
+      return results;
+    }
+    function formatGpu(gpus) {
+      return gpus.map(
+        (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
+      );
+    }
+    const gguf_quants = {
+      "IQ1_S": 1.56,
+      "IQ1_M": 1.75,
+      "IQ2_XXS": 2.06,
+      "IQ2_XS": 2.31,
+      "IQ2_S": 2.5,
+      "IQ3_XXS": 3.06,
+      "IQ3_XS": 3.3,
+      "IQ3_S": 3.44,
+      "IQ3_M": 3.66,
+      "Q2_K": 3.35,
+      "Q3_K_S": 3.5,
+      "Q3_K_M": 3.91,
+      "Q3_K_L": 4.27,
+      "IQ4_XS": 4.25,
+      "Q4_0": 4.55,
+      "Q4_K_S": 4.58,
+      "Q4_K_M": 4.85,
+      "Q5_0": 5.54,
+      "Q5_K_S": 5.54,
+      "Q5_K_M": 5.69,
+      "Q6_K": 6.59,
+      "Q8_0": 8.5,
+    }
+    function sanitize(string) {
+      const map = {
+        '&': '&amp;',
+        '<': '&lt;',
+        '>': '&gt;',
+        '"': '&quot;',
+        "'": '&#x27;',
+        "/": '&#x2F;',
+      };
+      const reg = /[&<>"'/]/ig;
+      return string.replace(reg, (match) => (map[match]));
+    }
+    async function modelConfig(hf_model) {
+      let config = {}
+      try {
+        config = await fetch(
+          `https://huggingface.co/${hf_model}/raw/main/config.json`
+        ).then(r => r.json());
+      } catch (err) {
+        alert(sanitize(err));
+        return config;
+      }
+      let model_size = 0
+      try {
+        model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
+        if (isNaN(model_size)) {
+          throw new Erorr("no size in safetensors metadata")
+        }
+      } catch (e) {
+        try {
+          model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
+          if (isNaN(model_size)) {
+            throw new Erorr("no size in pytorch metadata")
+          }
+        } catch {
+          let model_page = await fetch(
+            "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
+          ).then(r => r.text())
+          let el = document.createElement('html');
+          el.innerHTML = model_page
+          let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
+          if (params_el !== null) {
+            model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
+          } else {
+            params_el = el.querySelector('div[data-target="ModelHeader"]')
+            model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
+          }
+        }
+      }
+      config.parameters = model_size
+      return config
+    }
+    function inputBuffer(context = 8192, model_config, bsz = 512) {
+      /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
+        ctx->inp_tokens  = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
+        ctx->inp_embd    = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
+        ctx->inp_pos     = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
+        ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
+        ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
+        ctx->inp_sum     = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);
+        n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
+      */
+      const inp_tokens = bsz
+      const inp_embd = model_config["hidden_size"] * bsz
+      const inp_pos = bsz
+      const inp_KQ_mask = context * bsz
+      const inp_K_shift = context
+      const inp_sum = bsz
+      return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
+    }
+    function computeBuffer(context = 8192, model_config, bsz = 512) {
+      if (bsz != 512) {
+        alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
+      }
+      return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
+    }
+    function kvCache(context = 8192, model_config, cache_bit = 16) {
+      const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
+      const n_embd_gqa = model_config["hidden_size"] / n_gqa
+      const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
+      const size = 2 * n_elements
+      return size * (cache_bit / 8)
+    }
+    function contextSize(context = 8192, model_config, bsz = 512, cache_bit = 16) {
+      return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
+    }
+    function modelSize(model_config, bpw = 4.5) {
+      return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
+    }
+    async function calculateSizes(format, context_loc) {
+      format = "gguf"
+      try {
+        const model_config = await modelConfig(document.getElementById("modelsearch").value)
+        const context = parseInt(document.getElementById("contextsize").value)
+        let bsz = 512
+        let cache_bit = 16
+        let bpw = 0
+        if (format === "gguf") {
+          bsz = parseInt(document.getElementById("batchsize").value)
+          bpw = gguf_quants[document.getElementById("quantsize").innerText]
+        } else if (format == "exl2") {
+          cache_bit = Number.parseInt(document.getElementById("kvCache").value)
+          bpw = Number.parseFloat(document.getElementById("bpw").value)
+        }
+        const model_size = modelSize(model_config, bpw)
+        const context_size = contextSize(context, model_config, bsz, cache_bit)
+        const total_size = ((model_size + context_size) / 2 ** 30)
+        document.getElementById("resultmodel").innerText = (model_size / 2 ** 30).toFixed(2)
+        document.getElementById("resultcontext").innerText = (context_size / 2 ** 30).toFixed(2)
+        const result_total_el = document.getElementById("resulttotal");
+        result_total_el.innerText = total_size.toFixed(2)
+        const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
+        const vram = allocated_vram
+        if (vram - total_size > 0.5) {
+          result_total_el.style.backgroundColor = "#bef264"
+        } else if (vram - total_size > 0) {
+          result_total_el.style.backgroundColor = "#facc15"
+        } else {
+          result_total_el.style.backgroundColor = "#ef4444"
+        }
+        const layer_size = ((model_size / 2 ** 30) / model_config["num_hidden_layers"])
+        const layer_size_el = document.getElementById("layersize");
+        layer_size_el.innerText = layer_size.toFixed(2)
+        const context_dealloc = context_loc === "vram" ? (context_size / 2 ** 30) : 0;
+        const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
+        const layers_offload_el = document.getElementById("layersoffload");
+        layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}`
+      } catch (e) {
+        alert(e);
+      }
+    }
+  </script>
+  <link href="./styles.css" rel="stylesheet">
+  <title>Can I split it? - GGUF VRAM Calculator</title>
+</head>
+<body class="p-8">
+  <div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
+    <div style="text-align: center;">
+      <h1 class="text-xl font-semibold leading-6 text-gray-900">
+        GGUF Model, Can I split it?
+      </h1>
+      <h3 class="font-semibold leading-6 text-gray-900">
+        Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a
+          href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM
+          calculator</a>
+      </h3>
+    </div>
+    <div class="flex flex-col gap-10">
+      <div class="w-auto flex flex-col gap-4">
+        <div class="relative">
+          <label for="maxvram"
+            class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Max Allocated VRAM
+          </label>
+          <input value="24" type="number" name="maxvram" id="maxvram" step="1"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
+        </div>
+        <!-- Model Selector -->
+        <div class="flex flex-row gap-4 relative">
+          <label for="contextsize"
+            class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Model (unquantized)
+          </label>
+          <div
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
+            x-data="{
+                          open: false,
+                          value: 'Nexusflow/Starling-LM-7B-beta',
+                          results: null,
+                          toggle() {
+                            if (this.open) {
+                              return this.close()
+                            }
+                            this.$refs.input.focus()
+                            this.open = true
+                          },
+                          close(focusAfter) {
+                            if (! this.open) return
+                            this.open = false
+                            focusAfter && focusAfter.focus()
+                          }
+                        }" x-on:keydown.escape.prevent.stop="close($refs.input)" x-id="['model-typeahead']"
+            class="relative">
+            <!-- Input -->
+            <input id="modelsearch" x-ref="input" x-on:click="toggle()"
+              @keypress.debounce.150ms="results = (await
+                    fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
+                    encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
+              :aria-expanded="open" :aria-controls="$id('model-typeahead')" x-model="value"
+              class="flex justify-between items-center gap-2 w-full" />
+            <!-- Panel -->
+            <div x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.input)"
+              :id="$id('model-typeahead')" style="display: none"
+              class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
+              <template x-for="result in results">
+                <a @click="value = result.id; close($refs.input)" x-text="result.id"
+                  class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
+              </template>
+            </div>
+          </div>
+        </div>
+        <!-- Context Size Selector -->
+        <div class="relative">
+          <label for="contextsize"
+            class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Context Size
+          </label>
+          <input value="8192" type="number" name="contextsize" id="contextsize" step="1024"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
+        </div>
+        <div class="relative">
+          <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">Context
+            offloaded to</label>
+          <fieldset x-model="context_loc"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
+            <legend class="sr-only">Context location</legend>
+            <div class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0">
+              <div class="flex items-center">
+                <input id="context-vram" name="context-allocation" type="radio" value="vram" checked
+                  class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
+                <label for="context-vram" class="ml-3 block text-sm font-medium leading-6 text-gray-900">VRAM</label>
+              </div>
+              <div class="flex items-center">
+                <input id="context-ram" name="context-allocation" type="radio" value="ram"
+                  class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
+                <label for="context-ram" class="ml-3 block text-sm font-medium leading-6 text-gray-900">RAM</label>
+              </div>
+            </div>
+          </fieldset>
+        </div>
+        <!-- GGUF Options -->
+        <div x-show="format === 'gguf'" class="relative">
+          <div class="flex flex-row gap-4">
+            <label for="contextsize"
+              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+              Quantization Size
+            </label>
+            <div
+              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
+              x-data="{
+                open: false,
+                value: '',
+                toggle() {
+                  if (this.open) {
+                    return this.close()
+                  }
+                  this.$refs.button.focus()
+                  this.open = true
+                },
+                close(focusAfter) {
+                  if (! this.open) return
+                  this.open = false
+                  focusAfter && focusAfter.focus()
+                }
+              }" x-on:keydown.escape.prevent.stop="close($refs.button)" x-id="['dropdown-button']" class="relative">
+              <!-- Button -->
+              <button x-ref="button" x-on:click="toggle()" :aria-expanded="open" :aria-controls="$id('dropdown-button')"
+                type="button" id="quantsize" x-text="value.length === 0 ? 'Q4_K_S' : value"
+                class="flex justify-between items-center gap-2 w-full">
+                Q4_K_S
+                <!-- Heroicon: chevron-down -->
+                <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-gray-400" viewBox="0 0 20 20"
+                  fill="currentColor">
+                  <path fill-rule="evenodd"
+                    d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
+                    clip-rule="evenodd" />
+                </svg>
+              </button>
+              <!-- Panel -->
+              <div x-data="{ quants: [
+                    'IQ1_S',
+                    'IQ1_M',
+                    'IQ2_XXS',
+                    'IQ2_XS',
+                    'IQ2_S',
+                    'IQ3_XXS',
+                    'IQ3_XS',
+                    'IQ3_S',
+                    'IQ3_M',
+                    'Q2_K',
+                    'Q3_K_S',
+                    'Q3_K_M',
+                    'Q3_K_L',
+                    'IQ4_XS',
+                    'Q4_0',
+                    'Q4_K_S',
+                    'Q4_K_M',
+                    'Q5_0',
+                    'Q5_K_S',
+                    'Q5_K_M',
+                    'Q6_K',
+                    'Q8_0'
+                  ]}" x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.button)"
+                :id="$id('dropdown-button')" style="display: none"
+                class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
+                <template x-for="quant in quants">
+                  <a @click="value = quant; close($refs.button)" x-text="quant"
+                    class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
+                </template>
+              </div>
+            </div>
+            <div class="relative">
+              <label for="batchsize"
+                class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+                Batch Size
+              </label>
+              <input value="512" type="number" step="128" id="batchsize"
+                class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
+            </div>
+          </div>
+        </div>
+        <button type="button"
+          class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
+          @click="calculateSizes(format, context_loc)">
+          Submit
+        </button>
+      </div>
+      <div class="w-auto flex flex-col gap-4">
+        <div class="relative">
+          <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Model Size (GB)
+          </label>
+          <div id="resultmodel"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
+            4.20</div>
+        </div>
+        <div class="relative">
+          <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Context Size (GB)
+          </label>
+          <div id="resultcontext"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
+            6.90</div>
+        </div>
+        <div class="relative">
+          <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Total Size (GB)
+          </label>
+          <div id="resulttotal"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
+            420.69</div>
+        </div>
+        <div class="relative">
+          <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Layer size (GB)
+          </label>
+          <div id="layersize"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
+            42.69</div>
+        </div>
+        <div class="relative">
+          <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
+            Layers offloaded to GPU (out of total)
+          </label>
+          <div id="layersoffload"
+            class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
+            42</div>
+        </div>
+      </div>
+    </div>
+  </div>
+  <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"></script>
+  <script defer>
+    calculateSizes("gguf", "vram")
+  </script>
+</body>
+</html>