Spaces:
Build error
Build error
try out docker sdk
Browse filestweak dockerfile to use local builder
install bnb
add gradio dep
fix venv path
use chat.py, not app.py
explicit gradio app launch
- Dockerfile +68 -0
- README.md +1 -1
- chat.py +1 -1
- entrypoint.sh +6 -0
- requirements.txt +0 -2
Dockerfile
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder-llamacpp
|
2 |
+
|
3 |
+
RUN apt-get update && \
|
4 |
+
apt-get install --no-install-recommends -y git vim build-essential python3 python3-pip python3-dev python3-venv libblas-dev liblapack-dev libopenblas-dev cmake && \
|
5 |
+
rm -rf /var/lib/apt/lists/* && \
|
6 |
+
pip3 install scikit-build
|
7 |
+
|
8 |
+
RUN git clone --depth 1 --branch v0.1.49 https://github.com/abetlen/llama-cpp-python.git /build
|
9 |
+
RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
|
10 |
+
|
11 |
+
WORKDIR /build
|
12 |
+
|
13 |
+
RUN CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py bdist_wheel
|
14 |
+
# dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
|
15 |
+
|
16 |
+
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
|
17 |
+
|
18 |
+
LABEL maintainer="Wing Lian <[email protected]>"
|
19 |
+
|
20 |
+
RUN apt-get update && \
|
21 |
+
apt-get install --no-install-recommends -y \
|
22 |
+
build-essential curl libportaudio2 libasound-dev git python3 python3-pip make g++ \
|
23 |
+
libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
|
24 |
+
libblas-dev liblapack-dev libopenblas-dev cmake git-lfs && \
|
25 |
+
git lfs install && \
|
26 |
+
rm -rf /var/lib/apt/lists/*
|
27 |
+
|
28 |
+
RUN groupadd -g 1000 appuser && \
|
29 |
+
useradd -r -u 1000 -g appuser appuser -m -d /home/appuser
|
30 |
+
|
31 |
+
RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
|
32 |
+
RUN mkdir /app
|
33 |
+
RUN mkdir -p /opt/venv
|
34 |
+
RUN chown -R appuser:appuser /app
|
35 |
+
|
36 |
+
WORKDIR /app
|
37 |
+
|
38 |
+
RUN virtualenv /opt/venv
|
39 |
+
RUN . /opt/venv/bin/activate && \
|
40 |
+
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
|
41 |
+
pip3 install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "bitsandbytes" "gradio" && \
|
42 |
+
pip3 install --no-cache-dir torch torchvision torchaudio
|
43 |
+
|
44 |
+
COPY --from=builder-llamacpp /build/dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl /app
|
45 |
+
RUN . /opt/venv/bin/activate && \
|
46 |
+
pip3 uninstall llama_cpp_python && \
|
47 |
+
pip3 install /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl && \
|
48 |
+
rm /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
|
49 |
+
|
50 |
+
COPY requirements.txt /app/requirements.txt
|
51 |
+
RUN . /opt/venv/bin/activate && \
|
52 |
+
pip3 install --no-cache-dir -r requirements.txt
|
53 |
+
|
54 |
+
RUN cp /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
|
55 |
+
|
56 |
+
COPY . /app/
|
57 |
+
|
58 |
+
RUN mkdir -p /opt/cache/huggingface/hub
|
59 |
+
RUN chown -R appuser:appuser /app && find /app -type d -exec chmod 0755 {} \;
|
60 |
+
RUN chown -R appuser:appuser /home/appuser
|
61 |
+
RUN chmod +x /app/entrypoint.sh && \
|
62 |
+
chmod +x /app/app.py
|
63 |
+
|
64 |
+
ENV TRANSFORMERS_CACHE=/opt/cache/huggingface/hub
|
65 |
+
|
66 |
+
USER appuser
|
67 |
+
|
68 |
+
ENTRYPOINT ["/app/entrypoint.sh"]
|
README.md
CHANGED
@@ -3,7 +3,7 @@ title: Ggml Ui
|
|
3 |
emoji: π
|
4 |
colorFrom: blue
|
5 |
colorTo: gray
|
6 |
-
sdk:
|
7 |
sdk_version: 3.29.0
|
8 |
app_file: chat.py
|
9 |
pinned: false
|
|
|
3 |
emoji: π
|
4 |
colorFrom: blue
|
5 |
colorTo: gray
|
6 |
+
sdk: docker
|
7 |
sdk_version: 3.29.0
|
8 |
app_file: chat.py
|
9 |
pinned: false
|
chat.py
CHANGED
@@ -87,4 +87,4 @@ with blocks:
|
|
87 |
submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
|
88 |
message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
|
89 |
|
90 |
-
blocks.queue(concurrency_count=
|
|
|
87 |
submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
|
88 |
message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
|
89 |
|
90 |
+
blocks.queue(max_size=32, concurrency_count=3).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|
entrypoint.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
source /opt/venv/bin/activate
|
4 |
+
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
5 |
+
|
6 |
+
python3 chat.py
|
requirements.txt
CHANGED
@@ -1,3 +1 @@
|
|
1 |
-
llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.49-rc6/llama_cpp_python-gpu-0.1.49-cp38-cp38-linux_x86_64.whl
|
2 |
pyyaml
|
3 |
-
requests
|
|
|
|
|
1 |
pyyaml
|
|