<html> <head> <meta content="text/html;charset=utf-8" http-equiv="Content-Type" /> <title>Candle Llama.c Rust/WASM</title> </head> <body></body> </html> <!doctype html> <html> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <style> @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap"); html, body { font-family: "Source Sans 3", sans-serif; } code, output, select, pre { font-family: "Source Code Pro", monospace; } </style> <script src="https://cdn.tailwindcss.com"></script> <script type="module"> // base url for audio examples const MODELS_BASE_URL = "https://huggingface.co/karpathy/tinyllamas/resolve/main"; // models base url const MODELS = { stories15M: { url: "stories15M.bin", seq_len: 256, }, stories42M: { url: "stories42M.bin", seq_len: 1024, }, stories110M: { url: "stories110M.bin", seq_len: 1024, }, }; const llamaWorker = new Worker("./llama2cWorker.js", { type: "module", }); async function generateSequence(controller) { const getValue = (id) => document.querySelector(`#${id}`).value; const modelID = getValue("model"); const model = MODELS[modelID]; const weightsURL = `${MODELS_BASE_URL}/${model.url}`; const prompt = getValue("prompt"); const temperature = getValue("temperature"); const repeatPenalty = getValue("repeat_penalty"); const seed = getValue("seed"); const maxSeqLen = getValue("max-seq"); function updateStatus(data) { const outStatus = document.querySelector("#output-status"); const outGen = document.querySelector("#output-generation"); const outCounter = document.querySelector("#output-counter"); switch (data.status) { case "loading": outStatus.hidden = false; outStatus.textContent = data.message; outGen.hidden = true; outCounter.hidden = true; break; case "generating": const { message, prompt, sentence, tokensSec, totalTime } = data; outStatus.hidden = true; outCounter.hidden = false; outGen.hidden = false; outGen.innerHTML = `<span class="font-semibold">${prompt}</span>${sentence.replace( /\<s\>|\<\/s\>/g, "" )}`; outCounter.innerHTML = `${(totalTime / 1000).toFixed( 2 )}s (${tokensSec.toFixed(2)} tok/s)`; break; case "complete": outStatus.hidden = true; outGen.hidden = false; break; } } return new Promise((resolve, reject) => { llamaWorker.postMessage({ weightsURL, modelID, tokenizerURL: "tokenizer.json", prompt, temp: temperature, repeatPenalty, seed: BigInt(seed), maxSeqLen, command: "start", }); const handleAbort = () => { llamaWorker.postMessage({ command: "abort" }); }; const handleMessage = (event) => { const { status, error, message, prompt, sentence } = event.data; if (status) updateStatus(event.data); if (error) reject(new Error(error)); if (status === "complete") resolve(event.data); }; controller.signal.addEventListener("abort", handleAbort); llamaWorker.addEventListener("message", handleMessage); }); } const form = document.querySelector("#form"); const prompt = document.querySelector("#prompt"); const clearBtn = document.querySelector("#clear-btn"); const runBtn = document.querySelector("#run"); const modelSelect = document.querySelector("#model"); let runController = new AbortController(); let isRunning = false; modelSelect.addEventListener("change", (e) => { const model = MODELS[e.target.value]; document.querySelector("#max-seq").max = model.seq_len; document.querySelector("#max-seq").nextElementSibling.value = model.seq_len; }); form.addEventListener("submit", async (e) => { e.preventDefault(); if (isRunning) { stopRunning(); } else { startRunning(); await generateSequence(runController); stopRunning(); } }); function startRunning() { isRunning = true; runBtn.textContent = "Stop"; } function stopRunning() { runController.abort(); runController = new AbortController(); runBtn.textContent = "Run"; isRunning = false; } clearBtn.addEventListener("click", (e) => { e.preventDefault(); prompt.value = ""; clearBtn.classList.add("invisible"); runBtn.disabled = true; stopRunning(); }); prompt.addEventListener("input", (e) => { runBtn.disabled = false; if (e.target.value.length > 0) { clearBtn.classList.remove("invisible"); } else { clearBtn.classList.add("invisible"); } }); </script> </head> <body class="container max-w-4xl mx-auto p-4 text-gray-800"> <main class="grid grid-cols-1 gap-8 relative"> <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> <div> <h1 class="text-5xl font-bold">Candle Llama2.c</h1> <h2 class="text-2xl font-bold">Rust/WASM Demo</h2> <p class="max-w-lg"> <a href="https://github.com/karpathy/llama2.c" target="_blank" class="underline hover:text-blue-500 hover:no-underline" target="_blank" >Llama2.c</a > is Andrey Karpathy's C implementation of the Llama 2 LLM model in C. This demo uses <a href="https://github.com/huggingface/candle/" target="_blank" class="underline hover:text-blue-500 hover:no-underline" >Candle </a> to run Llama2.c in the browser using rust/wasm. </p> </div> <div> <label for="model" class="font-medium">Models Options: </label> <select id="model" class="border-2 border-gray-500 rounded-md font-light" > <option value="stories15M" selected>stories 15M (60.8 MB)</option> <option value="stories42M">stories 42M (167 MB)</option> <option value="stories110M">stories 110M (438 MB)</option> </select> </div> <form id="form" class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center" > <input type="submit" hidden /> <input type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1 resize-none outline-none" placeholder="Add your prompt here..." value="Once upon a time" /> <button id="clear-btn"> <svg fill="none" xmlns="http://www.w3.org/2000/svg" width="40" viewBox="0 0 70 40" > <path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" /> <path d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1" opacity=".5" stroke="#1F2937" stroke-width="2" /> </svg> </button> <button id="run" class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed" > Run </button> </form> <div class="grid grid-cols-3 max-w-md items-center gap-3"> <label class="text-sm font-medium" for="max-seq">Maximum length </label> <input type="range" id="max-seq" name="temperature" min="1" max="256" step="1" value="200" oninput="this.nextElementSibling.value = Number(this.value)" /> <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md" > 200</output > <label class="text-sm font-medium" for="temperature">Temperature</label> <input type="range" id="temperature" name="temperature" min="0" max="2" step="0.01" value="0.50" oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" /> <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md" > 0.50</output > <label class="text-sm font-medium" for="repeat_penalty" >Repeat Penalty</label > <input type="range" id="repeat_penalty" name="repeat_penalty" min="1" max="2" step="0.01" value="1.10" oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" /> <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md" >1.10</output > <label class="text-sm font-medium" for="seed">Seed</label> <input type="number" id="seed" name="seed" value="299792458" class="font-light border border-gray-700 text-right rounded-md p-2" /> <button id="run" onclick="document.querySelector('#seed').value = BigInt(Math.floor(Math.random() * 2**64-1))" class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm" > Rand </button> </div> <div> <h3 class="font-medium">Generation:</h3> <div class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2" > <div id="output-counter" hidden class="ml-auto font-semibold grid-rows-1 text-sm" ></div> <p hidden id="output-generation" class="grid-rows-2"></p> <span id="output-status" class="m-auto font-light" >No output yet</span > </div> </div> </main> </body> </html>