openchat

Runtime error

ztime commited on Jan 15, 2024

Commit

8fd0c06

1 Parent(s): a9adc36

add llama.cpp server

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -8,11 +8,18 @@ RUN apt-get update && \
     ninja-build \
     build-essential \
     pkg-config \
-    curl
 RUN pip install -U pip setuptools wheel && \
     CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
 # Download model
 RUN mkdir model && \
     curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin

     ninja-build \
     build-essential \
     pkg-config \
+    curl cmake git
 RUN pip install -U pip setuptools wheel && \
     CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
+RUN git clone https://github.com/ggerganov/llama.cpp.git llamacpp --depth 1 && \
+    cd llamacpp && \
+    cmake -B build && \
+    cmake --build build --config Release main server && \
+    cp build/bin/* ~/
 # Download model
 RUN mkdir model && \
     curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin

index.html CHANGED Viewed

@@ -1,10 +1,10 @@
 <!DOCTYPE html>
 <html>
   <head>
-    <title>OpenHermes-2.5-Mistral-7B-GGUF (Q4_K_M)</title>
   </head>
   <body>
-    <h1>OpenHermes-2.5-Mistral-7B-GGUF (Q4_K_M)</h1>
     <p>
       With the utilization of the
       <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
@@ -16,15 +16,15 @@
     <ul>
       <li>
         The API endpoint:
-        <a href="https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/v1"
-          >https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/v1</a
         >
       </li>
       <li>
         The API doc:
         <a
-          href="https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/docs"
-          >https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/docs</a
         >
       </li>
     </ul>

 <!DOCTYPE html>
 <html>
   <head>
+    <title>openchat-3.5-0106-GGUF (Q4_K_M)</title>
   </head>
   <body>
+    <h1>openchat-3.5-0106-GGUF (Q4_K_M)</h1>
     <p>
       With the utilization of the
       <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
     <ul>
       <li>
         The API endpoint:
+        <a href="https://ztime-openchat.hf.space/v1"
+          >https://https://ztime-openchat.hf.space/v1</a
         >
       </li>
       <li>
         The API doc:
         <a
+          href="https://ztime-openchat.hf.space/docs"
+          >https://ztime-openchat.hf.space/docs</a
         >
       </li>
     </ul>

start_server.sh CHANGED Viewed

@@ -3,4 +3,8 @@
 # For mlock support
 ulimit -l unlimited
-python3 -B main.py

 # For mlock support
 ulimit -l unlimited
+~/server --port 7890 -m model/gguf-model.bin
+./llamacpp/build/bin/server --port 7890 -m model/gguf-model.bin
+# python3 -B main.py