Spaces:
Runtime error
Runtime error
ztime
commited on
Commit
•
8fd0c06
1
Parent(s):
a9adc36
add llama.cpp server
Browse files- Dockerfile +8 -1
- index.html +6 -6
- start_server.sh +5 -1
Dockerfile
CHANGED
@@ -8,11 +8,18 @@ RUN apt-get update && \
|
|
8 |
ninja-build \
|
9 |
build-essential \
|
10 |
pkg-config \
|
11 |
-
curl
|
12 |
|
13 |
RUN pip install -U pip setuptools wheel && \
|
14 |
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Download model
|
17 |
RUN mkdir model && \
|
18 |
curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin
|
|
|
8 |
ninja-build \
|
9 |
build-essential \
|
10 |
pkg-config \
|
11 |
+
curl cmake git
|
12 |
|
13 |
RUN pip install -U pip setuptools wheel && \
|
14 |
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
|
15 |
|
16 |
+
|
17 |
+
RUN git clone https://github.com/ggerganov/llama.cpp.git llamacpp --depth 1 && \
|
18 |
+
cd llamacpp && \
|
19 |
+
cmake -B build && \
|
20 |
+
cmake --build build --config Release main server && \
|
21 |
+
cp build/bin/* ~/
|
22 |
+
|
23 |
# Download model
|
24 |
RUN mkdir model && \
|
25 |
curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin
|
index.html
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
<html>
|
3 |
<head>
|
4 |
-
<title>
|
5 |
</head>
|
6 |
<body>
|
7 |
-
<h1>
|
8 |
<p>
|
9 |
With the utilization of the
|
10 |
<a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
|
@@ -16,15 +16,15 @@
|
|
16 |
<ul>
|
17 |
<li>
|
18 |
The API endpoint:
|
19 |
-
<a href="https://
|
20 |
-
>https://
|
21 |
>
|
22 |
</li>
|
23 |
<li>
|
24 |
The API doc:
|
25 |
<a
|
26 |
-
href="https://
|
27 |
-
>https://
|
28 |
>
|
29 |
</li>
|
30 |
</ul>
|
|
|
1 |
<!DOCTYPE html>
|
2 |
<html>
|
3 |
<head>
|
4 |
+
<title>openchat-3.5-0106-GGUF (Q4_K_M)</title>
|
5 |
</head>
|
6 |
<body>
|
7 |
+
<h1>openchat-3.5-0106-GGUF (Q4_K_M)</h1>
|
8 |
<p>
|
9 |
With the utilization of the
|
10 |
<a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
|
|
|
16 |
<ul>
|
17 |
<li>
|
18 |
The API endpoint:
|
19 |
+
<a href="https://ztime-openchat.hf.space/v1"
|
20 |
+
>https://https://ztime-openchat.hf.space/v1</a
|
21 |
>
|
22 |
</li>
|
23 |
<li>
|
24 |
The API doc:
|
25 |
<a
|
26 |
+
href="https://ztime-openchat.hf.space/docs"
|
27 |
+
>https://ztime-openchat.hf.space/docs</a
|
28 |
>
|
29 |
</li>
|
30 |
</ul>
|
start_server.sh
CHANGED
@@ -3,4 +3,8 @@
|
|
3 |
# For mlock support
|
4 |
ulimit -l unlimited
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
3 |
# For mlock support
|
4 |
ulimit -l unlimited
|
5 |
|
6 |
+
|
7 |
+
|
8 |
+
~/server --port 7890 -m model/gguf-model.bin
|
9 |
+
./llamacpp/build/bin/server --port 7890 -m model/gguf-model.bin
|
10 |
+
# python3 -B main.py
|