|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
|
|
<head>
|
|
<meta charset="UTF-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<script>
|
|
function strToHtml(str) {
|
|
let parser = new DOMParser();
|
|
return parser.parseFromString(str, "text/html");
|
|
}
|
|
|
|
|
|
|
|
function tableToObj(table) {
|
|
var rows = table.rows;
|
|
var propCells = rows[0].cells;
|
|
var propNames = [];
|
|
var results = [];
|
|
var obj, row, cells;
|
|
|
|
|
|
|
|
|
|
for (var i = 0, iLen = propCells.length; i < iLen; i++) {
|
|
propNames.push(
|
|
(propCells[i].textContent || propCells[i].innerText).trim()
|
|
);
|
|
}
|
|
|
|
|
|
|
|
|
|
for (var j = 1, jLen = rows.length; j < jLen; j++) {
|
|
cells = rows[j].cells;
|
|
obj = {};
|
|
|
|
for (var k = 0; k < iLen; k++) {
|
|
obj[propNames[k]] = (
|
|
cells[k].textContent || cells[k].innerText
|
|
).trim();
|
|
}
|
|
results.push(obj);
|
|
}
|
|
return results;
|
|
}
|
|
|
|
function formatGpu(gpus) {
|
|
return gpus.map(
|
|
(g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
|
|
);
|
|
}
|
|
|
|
const gguf_quants = {
|
|
"IQ1_S": 1.56,
|
|
"IQ1_M": 1.75,
|
|
"IQ2_XXS": 2.06,
|
|
"IQ2_XS": 2.31,
|
|
"IQ2_S": 2.5,
|
|
"IQ3_XXS": 3.06,
|
|
"IQ3_XS": 3.3,
|
|
"IQ3_S": 3.44,
|
|
"IQ3_M": 3.66,
|
|
"Q2_K": 3.35,
|
|
"Q3_K_S": 3.5,
|
|
"Q3_K_M": 3.91,
|
|
"Q3_K_L": 4.27,
|
|
"IQ4_XS": 4.25,
|
|
"Q4_0": 4.55,
|
|
"Q4_K_S": 4.58,
|
|
"Q4_K_M": 4.85,
|
|
"Q5_0": 5.54,
|
|
"Q5_K_S": 5.54,
|
|
"Q5_K_M": 5.69,
|
|
"Q6_K": 6.59,
|
|
"Q8_0": 8.5,
|
|
}
|
|
|
|
function sanitize(string) {
|
|
const map = {
|
|
'&': '&',
|
|
'<': '<',
|
|
'>': '>',
|
|
'"': '"',
|
|
"'": ''',
|
|
"/": '/',
|
|
};
|
|
const reg = /[&<>"'/]/ig;
|
|
return string.replace(reg, (match) => (map[match]));
|
|
}
|
|
|
|
async function modelConfig(hf_model) {
|
|
let config = {}
|
|
|
|
try {
|
|
config = await fetch(
|
|
`https://huggingface.co/${hf_model}/raw/main/config.json`
|
|
).then(r => r.json());
|
|
} catch (err) {
|
|
alert(sanitize(err));
|
|
return config;
|
|
}
|
|
|
|
let model_size = 0
|
|
try {
|
|
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
|
|
if (isNaN(model_size)) {
|
|
throw new Erorr("no size in safetensors metadata")
|
|
}
|
|
} catch (e) {
|
|
try {
|
|
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
|
|
if (isNaN(model_size)) {
|
|
throw new Erorr("no size in pytorch metadata")
|
|
}
|
|
} catch {
|
|
let model_page = await fetch(
|
|
"https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
|
|
).then(r => r.text())
|
|
let el = document.createElement('html');
|
|
el.innerHTML = model_page
|
|
let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
|
|
if (params_el !== null) {
|
|
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
|
|
} else {
|
|
params_el = el.querySelector('div[data-target="ModelHeader"]')
|
|
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
|
|
}
|
|
}
|
|
}
|
|
config.parameters = model_size
|
|
return config
|
|
}
|
|
|
|
function inputBuffer(context = 8192, model_config, bsz = 512) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const inp_tokens = bsz
|
|
const inp_embd = model_config["hidden_size"] * bsz
|
|
const inp_pos = bsz
|
|
const inp_KQ_mask = context * bsz
|
|
const inp_K_shift = context
|
|
const inp_sum = bsz
|
|
|
|
return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
|
|
}
|
|
|
|
function computeBuffer(context = 8192, model_config, bsz = 512) {
|
|
if (bsz != 512) {
|
|
alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
|
|
}
|
|
return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
|
|
}
|
|
|
|
function kvCache(context = 8192, model_config, cache_bit = 16) {
|
|
const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
|
|
const n_embd_gqa = model_config["hidden_size"] / n_gqa
|
|
const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
|
|
const size = 2 * n_elements
|
|
return size * (cache_bit / 8)
|
|
}
|
|
|
|
function contextSize(context = 8192, model_config, bsz = 512, cache_bit = 16) {
|
|
return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
|
|
}
|
|
|
|
function modelSize(model_config, bpw = 4.5) {
|
|
return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
|
|
}
|
|
|
|
async function calculateSizes(format, context_loc) {
|
|
|
|
format = "gguf"
|
|
|
|
try {
|
|
const model_config = await modelConfig(document.getElementById("modelsearch").value)
|
|
const context = parseInt(document.getElementById("contextsize").value)
|
|
let bsz = 512
|
|
let cache_bit = 16
|
|
let bpw = 0
|
|
if (format === "gguf") {
|
|
bsz = parseInt(document.getElementById("batchsize").value)
|
|
bpw = gguf_quants[document.getElementById("quantsize").innerText]
|
|
|
|
} else if (format == "exl2") {
|
|
cache_bit = Number.parseInt(document.getElementById("kvCache").value)
|
|
bpw = Number.parseFloat(document.getElementById("bpw").value)
|
|
}
|
|
|
|
const model_size = modelSize(model_config, bpw)
|
|
const context_size = contextSize(context, model_config, bsz, cache_bit)
|
|
const total_size = ((model_size + context_size) / 2 ** 30)
|
|
document.getElementById("resultmodel").innerText = (model_size / 2 ** 30).toFixed(2)
|
|
document.getElementById("resultcontext").innerText = (context_size / 2 ** 30).toFixed(2)
|
|
const result_total_el = document.getElementById("resulttotal");
|
|
result_total_el.innerText = total_size.toFixed(2)
|
|
|
|
const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
|
|
const vram = allocated_vram
|
|
if (vram - total_size > 0.5) {
|
|
result_total_el.style.backgroundColor = "#bef264"
|
|
} else if (vram - total_size > 0) {
|
|
result_total_el.style.backgroundColor = "#facc15"
|
|
} else {
|
|
result_total_el.style.backgroundColor = "#ef4444"
|
|
}
|
|
|
|
const layer_size = ((model_size / 2 ** 30) / model_config["num_hidden_layers"])
|
|
const layer_size_el = document.getElementById("layersize");
|
|
layer_size_el.innerText = layer_size.toFixed(2)
|
|
|
|
const context_dealloc = context_loc === "vram" ? (context_size / 2 ** 30) : 0;
|
|
const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
|
|
|
|
const layers_offload_el = document.getElementById("layersoffload");
|
|
layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}`
|
|
|
|
} catch (e) {
|
|
alert(e);
|
|
}
|
|
}
|
|
</script>
|
|
<link href="./styles.css" rel="stylesheet">
|
|
<title>Can I split it? - GGUF VRAM Calculator</title>
|
|
</head>
|
|
|
|
<body class="p-8">
|
|
<div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
|
|
<div style="text-align: center;">
|
|
<h1 class="text-xl font-semibold leading-6 text-gray-900">
|
|
GGUF Model, Can I split it?
|
|
</h1>
|
|
<h3 class="font-semibold leading-6 text-gray-900">
|
|
Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a
|
|
href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM
|
|
calculator</a>
|
|
</h3>
|
|
</div>
|
|
<div class="flex flex-col gap-10">
|
|
<div class="w-auto flex flex-col gap-4">
|
|
<div class="relative">
|
|
<label for="maxvram"
|
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Max Allocated VRAM
|
|
</label>
|
|
<input value="24" type="number" name="maxvram" id="maxvram" step="1"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div class="flex flex-row gap-4 relative">
|
|
<label for="contextsize"
|
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Model (unquantized)
|
|
</label>
|
|
<div
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
|
|
x-data="{
|
|
open: false,
|
|
value: 'Nexusflow/Starling-LM-7B-beta',
|
|
results: null,
|
|
toggle() {
|
|
if (this.open) {
|
|
return this.close()
|
|
}
|
|
|
|
this.$refs.input.focus()
|
|
|
|
this.open = true
|
|
},
|
|
close(focusAfter) {
|
|
if (! this.open) return
|
|
|
|
this.open = false
|
|
|
|
focusAfter && focusAfter.focus()
|
|
}
|
|
}" x-on:keydown.escape.prevent.stop="close($refs.input)" x-id="['model-typeahead']"
|
|
class="relative">
|
|
|
|
<input id="modelsearch" x-ref="input" x-on:click="toggle()"
|
|
@keypress.debounce.150ms="results = (await
|
|
fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
|
|
encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
|
|
:aria-expanded="open" :aria-controls="$id('model-typeahead')" x-model="value"
|
|
class="flex justify-between items-center gap-2 w-full" />
|
|
|
|
|
|
<div x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.input)"
|
|
:id="$id('model-typeahead')" style="display: none"
|
|
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
|
|
<template x-for="result in results">
|
|
<a @click="value = result.id; close($refs.input)" x-text="result.id"
|
|
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
|
|
</template>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="relative">
|
|
<label for="contextsize"
|
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Context Size
|
|
</label>
|
|
<input value="8192" type="number" name="contextsize" id="contextsize" step="1024"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
|
|
</div>
|
|
|
|
<div class="relative">
|
|
<label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">Context
|
|
offloaded to</label>
|
|
<fieldset x-model="context_loc"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
|
<legend class="sr-only">Context location</legend>
|
|
<div class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0">
|
|
<div class="flex items-center">
|
|
<input id="context-vram" name="context-allocation" type="radio" value="vram" checked
|
|
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
|
|
<label for="context-vram" class="ml-3 block text-sm font-medium leading-6 text-gray-900">VRAM</label>
|
|
</div>
|
|
<div class="flex items-center">
|
|
<input id="context-ram" name="context-allocation" type="radio" value="ram"
|
|
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
|
|
<label for="context-ram" class="ml-3 block text-sm font-medium leading-6 text-gray-900">RAM</label>
|
|
</div>
|
|
</div>
|
|
</fieldset>
|
|
</div>
|
|
|
|
|
|
<div x-show="format === 'gguf'" class="relative">
|
|
<div class="flex flex-row gap-4">
|
|
<label for="contextsize"
|
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Quantization Size
|
|
</label>
|
|
<div
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
|
|
x-data="{
|
|
open: false,
|
|
value: '',
|
|
toggle() {
|
|
if (this.open) {
|
|
return this.close()
|
|
}
|
|
|
|
this.$refs.button.focus()
|
|
|
|
this.open = true
|
|
},
|
|
close(focusAfter) {
|
|
if (! this.open) return
|
|
|
|
this.open = false
|
|
|
|
focusAfter && focusAfter.focus()
|
|
}
|
|
}" x-on:keydown.escape.prevent.stop="close($refs.button)" x-id="['dropdown-button']" class="relative">
|
|
|
|
<button x-ref="button" x-on:click="toggle()" :aria-expanded="open" :aria-controls="$id('dropdown-button')"
|
|
type="button" id="quantsize" x-text="value.length === 0 ? 'Q4_K_S' : value"
|
|
class="flex justify-between items-center gap-2 w-full">
|
|
Q4_K_S
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-gray-400" viewBox="0 0 20 20"
|
|
fill="currentColor">
|
|
<path fill-rule="evenodd"
|
|
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
|
|
clip-rule="evenodd" />
|
|
</svg>
|
|
</button>
|
|
|
|
|
|
<div x-data="{ quants: [
|
|
'IQ1_S',
|
|
'IQ1_M',
|
|
'IQ2_XXS',
|
|
'IQ2_XS',
|
|
'IQ2_S',
|
|
'IQ3_XXS',
|
|
'IQ3_XS',
|
|
'IQ3_S',
|
|
'IQ3_M',
|
|
'Q2_K',
|
|
'Q3_K_S',
|
|
'Q3_K_M',
|
|
'Q3_K_L',
|
|
'IQ4_XS',
|
|
'Q4_0',
|
|
'Q4_K_S',
|
|
'Q4_K_M',
|
|
'Q5_0',
|
|
'Q5_K_S',
|
|
'Q5_K_M',
|
|
'Q6_K',
|
|
'Q8_0'
|
|
]}" x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.button)"
|
|
:id="$id('dropdown-button')" style="display: none"
|
|
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
|
|
<template x-for="quant in quants">
|
|
<a @click="value = quant; close($refs.button)" x-text="quant"
|
|
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
|
|
</template>
|
|
</div>
|
|
</div>
|
|
<div class="relative">
|
|
<label for="batchsize"
|
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Batch Size
|
|
</label>
|
|
<input value="512" type="number" step="128" id="batchsize"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<button type="button"
|
|
class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
|
|
@click="calculateSizes(format, context_loc)">
|
|
Submit
|
|
</button>
|
|
</div>
|
|
<div class="w-auto flex flex-col gap-4">
|
|
<div class="relative">
|
|
<label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Model Size (GB)
|
|
</label>
|
|
<div id="resultmodel"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
|
4.20</div>
|
|
</div>
|
|
<div class="relative">
|
|
<label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Context Size (GB)
|
|
</label>
|
|
<div id="resultcontext"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
|
6.90</div>
|
|
</div>
|
|
<div class="relative">
|
|
<label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Total Size (GB)
|
|
</label>
|
|
<div id="resulttotal"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
|
420.69</div>
|
|
</div>
|
|
<div class="relative">
|
|
<label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Layer size (GB)
|
|
</label>
|
|
<div id="layersize"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
|
42.69</div>
|
|
</div>
|
|
<div class="relative">
|
|
<label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
|
|
Layers offloaded to GPU (out of total)
|
|
</label>
|
|
<div id="layersoffload"
|
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
|
42</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"></script>
|
|
<script defer>
|
|
calculateSizes("gguf", "vram")
|
|
</script>
|
|
</body>
|
|
|
|
</html> |