<html>
  <head>
    <meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
    <title>Candle Llama.c Rust/WASM</title>
  </head>
  <body></body>
</html>

<!doctype html>
<html>
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <style>
      @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
      html,
      body {
        font-family: "Source Sans 3", sans-serif;
      }
      code,
      output,
      select,
      pre {
        font-family: "Source Code Pro", monospace;
      }
    </style>
    <script src="https://cdn.tailwindcss.com"></script>
    <script type="module">
      // base url for audio examples
      const MODELS_BASE_URL =
        "https://huggingface.co/karpathy/tinyllamas/resolve/main";

      // models base url
      const MODELS = {
        stories15M: {
          url: "stories15M.bin",
          seq_len: 256,
        },
        stories42M: {
          url: "stories42M.bin",
          seq_len: 1024,
        },
        stories110M: {
          url: "stories110M.bin",
          seq_len: 1024,
        },
      };

      const llamaWorker = new Worker("./llama2cWorker.js", {
        type: "module",
      });
      async function generateSequence(controller) {
        const getValue = (id) => document.querySelector(`#${id}`).value;
        const modelID = getValue("model");
        const model = MODELS[modelID];
        const weightsURL = `${MODELS_BASE_URL}/${model.url}`;
        const prompt = getValue("prompt");
        const temperature = getValue("temperature");
        const repeatPenalty = getValue("repeat_penalty");
        const seed = getValue("seed");
        const maxSeqLen = getValue("max-seq");

        function updateStatus(data) {
          const outStatus = document.querySelector("#output-status");
          const outGen = document.querySelector("#output-generation");
          const outCounter = document.querySelector("#output-counter");

          switch (data.status) {
            case "loading":
              outStatus.hidden = false;
              outStatus.textContent = data.message;
              outGen.hidden = true;
              outCounter.hidden = true;
              break;
            case "generating":
              const { message, prompt, sentence, tokensSec, totalTime } = data;
              outStatus.hidden = true;
              outCounter.hidden = false;
              outGen.hidden = false;
              outGen.innerHTML = `<span class="font-semibold">${prompt}</span>${sentence.replace(
                /\<s\>|\<\/s\>/g,
                ""
              )}`;
              outCounter.innerHTML = `${(totalTime / 1000).toFixed(
                2
              )}s (${tokensSec.toFixed(2)} tok/s)`;
              break;
            case "complete":
              outStatus.hidden = true;
              outGen.hidden = false;
              break;
          }
        }

        return new Promise((resolve, reject) => {
          llamaWorker.postMessage({
            weightsURL,
            modelID,
            tokenizerURL: "tokenizer.json",
            prompt,
            temp: temperature,
            repeatPenalty,
            seed: BigInt(seed),
            maxSeqLen,
            command: "start",
          });

          const handleAbort = () => {
            llamaWorker.postMessage({ command: "abort" });
          };
          const handleMessage = (event) => {
            const { status, error, message, prompt, sentence } = event.data;
            if (status) updateStatus(event.data);
            if (error) reject(new Error(error));
            if (status === "complete") resolve(event.data);
          };

          controller.signal.addEventListener("abort", handleAbort);
          llamaWorker.addEventListener("message", handleMessage);
        });
      }

      const form = document.querySelector("#form");
      const prompt = document.querySelector("#prompt");
      const clearBtn = document.querySelector("#clear-btn");
      const runBtn = document.querySelector("#run");
      const modelSelect = document.querySelector("#model");
      let runController = new AbortController();
      let isRunning = false;

      modelSelect.addEventListener("change", (e) => {
        const model = MODELS[e.target.value];
        document.querySelector("#max-seq").max = model.seq_len;
        document.querySelector("#max-seq").nextElementSibling.value =
          model.seq_len;
      });

      form.addEventListener("submit", async (e) => {
        e.preventDefault();
        if (isRunning) {
          stopRunning();
        } else {
          startRunning();
          await generateSequence(runController);
          stopRunning();
        }
      });

      function startRunning() {
        isRunning = true;
        runBtn.textContent = "Stop";
      }

      function stopRunning() {
        runController.abort();
        runController = new AbortController();
        runBtn.textContent = "Run";
        isRunning = false;
      }
      clearBtn.addEventListener("click", (e) => {
        e.preventDefault();
        prompt.value = "";
        clearBtn.classList.add("invisible");
        runBtn.disabled = true;
        stopRunning();
      });
      prompt.addEventListener("input", (e) => {
        runBtn.disabled = false;
        if (e.target.value.length > 0) {
          clearBtn.classList.remove("invisible");
        } else {
          clearBtn.classList.add("invisible");
        }
      });
    </script>
  </head>
  <body class="container max-w-4xl mx-auto p-4 text-gray-800">
    <main class="grid grid-cols-1 gap-8 relative">
      <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
      <div>
        <h1 class="text-5xl font-bold">Candle Llama2.c</h1>
        <h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
        <p class="max-w-lg">
          <a
            href="https://github.com/karpathy/llama2.c"
            target="_blank"
            class="underline hover:text-blue-500 hover:no-underline"
            target="_blank"
            >Llama2.c</a
          >
          is Andrey Karpathy's C implementation of the Llama 2 LLM model in C.
          This demo uses
          <a
            href="https://github.com/huggingface/candle/"
            target="_blank"
            class="underline hover:text-blue-500 hover:no-underline"
            >Candle
          </a>
          to run Llama2.c in the browser using rust/wasm.
        </p>
      </div>

      <div>
        <label for="model" class="font-medium">Models Options: </label>
        <select
          id="model"
          class="border-2 border-gray-500 rounded-md font-light"
        >
          <option value="stories15M" selected>stories 15M (60.8 MB)</option>
          <option value="stories42M">stories 42M (167 MB)</option>
          <option value="stories110M">stories 110M (438 MB)</option>
        </select>
      </div>
      <form
        id="form"
        class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center"
      >
        <input type="submit" hidden />
        <input
          type="text"
          id="prompt"
          class="font-light w-full px-3 py-2 mx-1 resize-none outline-none"
          placeholder="Add your prompt here..."
          value="Once upon a time"
        />
        <button id="clear-btn">
          <svg
            fill="none"
            xmlns="http://www.w3.org/2000/svg"
            width="40"
            viewBox="0 0 70 40"
          >
            <path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" />
            <path
              d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1"
              opacity=".5"
              stroke="#1F2937"
              stroke-width="2"
            />
          </svg>
        </button>
        <button
          id="run"
          class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed"
        >
          Run
        </button>
      </form>
      <div class="grid grid-cols-3 max-w-md items-center gap-3">
        <label class="text-sm font-medium" for="max-seq">Maximum length </label>
        <input
          type="range"
          id="max-seq"
          name="temperature"
          min="1"
          max="256"
          step="1"
          value="200"
          oninput="this.nextElementSibling.value = Number(this.value)"
        />
        <output
          class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
        >
          200</output
        >
        <label class="text-sm font-medium" for="temperature">Temperature</label>
        <input
          type="range"
          id="temperature"
          name="temperature"
          min="0"
          max="2"
          step="0.01"
          value="0.50"
          oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
        />
        <output
          class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
        >
          0.50</output
        >

        <label class="text-sm font-medium" for="repeat_penalty"
          >Repeat Penalty</label
        >

        <input
          type="range"
          id="repeat_penalty"
          name="repeat_penalty"
          min="1"
          max="2"
          step="0.01"
          value="1.10"
          oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
        />
        <output
          class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
          >1.10</output
        >
        <label class="text-sm font-medium" for="seed">Seed</label>
        <input
          type="number"
          id="seed"
          name="seed"
          value="299792458"
          class="font-light border border-gray-700 text-right rounded-md p-2"
        />
        <button
          id="run"
          onclick="document.querySelector('#seed').value = BigInt(Math.floor(Math.random() * 2**64-1))"
          class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm"
        >
          Rand
        </button>
      </div>
      <div>
        <h3 class="font-medium">Generation:</h3>
        <div
          class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"
        >
          <div
            id="output-counter"
            hidden
            class="ml-auto font-semibold grid-rows-1 text-sm"
          ></div>
          <p hidden id="output-generation" class="grid-rows-2"></p>
          <span id="output-status" class="m-auto font-light"
            >No output yet</span
          >
        </div>
      </div>
    </main>
  </body>
</html>