winglian commited on
Commit
6f7e42d
Β·
1 Parent(s): 29b3b7f

try out docker sdk

Browse files

tweak dockerfile to use local builder
install bnb
add gradio dep
fix venv path
use chat.py, not app.py
explicit gradio app launch

Files changed (5) hide show
  1. Dockerfile +68 -0
  2. README.md +1 -1
  3. chat.py +1 -1
  4. entrypoint.sh +6 -0
  5. requirements.txt +0 -2
Dockerfile ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder-llamacpp
2
+
3
+ RUN apt-get update && \
4
+ apt-get install --no-install-recommends -y git vim build-essential python3 python3-pip python3-dev python3-venv libblas-dev liblapack-dev libopenblas-dev cmake && \
5
+ rm -rf /var/lib/apt/lists/* && \
6
+ pip3 install scikit-build
7
+
8
+ RUN git clone --depth 1 --branch v0.1.49 https://github.com/abetlen/llama-cpp-python.git /build
9
+ RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
10
+
11
+ WORKDIR /build
12
+
13
+ RUN CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py bdist_wheel
14
+ # dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
15
+
16
+ FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
17
+
18
+ LABEL maintainer="Wing Lian <[email protected]>"
19
+
20
+ RUN apt-get update && \
21
+ apt-get install --no-install-recommends -y \
22
+ build-essential curl libportaudio2 libasound-dev git python3 python3-pip make g++ \
23
+ libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
24
+ libblas-dev liblapack-dev libopenblas-dev cmake git-lfs && \
25
+ git lfs install && \
26
+ rm -rf /var/lib/apt/lists/*
27
+
28
+ RUN groupadd -g 1000 appuser && \
29
+ useradd -r -u 1000 -g appuser appuser -m -d /home/appuser
30
+
31
+ RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
32
+ RUN mkdir /app
33
+ RUN mkdir -p /opt/venv
34
+ RUN chown -R appuser:appuser /app
35
+
36
+ WORKDIR /app
37
+
38
+ RUN virtualenv /opt/venv
39
+ RUN . /opt/venv/bin/activate && \
40
+ pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
41
+ pip3 install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "bitsandbytes" "gradio" && \
42
+ pip3 install --no-cache-dir torch torchvision torchaudio
43
+
44
+ COPY --from=builder-llamacpp /build/dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl /app
45
+ RUN . /opt/venv/bin/activate && \
46
+ pip3 uninstall llama_cpp_python && \
47
+ pip3 install /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl && \
48
+ rm /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
49
+
50
+ COPY requirements.txt /app/requirements.txt
51
+ RUN . /opt/venv/bin/activate && \
52
+ pip3 install --no-cache-dir -r requirements.txt
53
+
54
+ RUN cp /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
55
+
56
+ COPY . /app/
57
+
58
+ RUN mkdir -p /opt/cache/huggingface/hub
59
+ RUN chown -R appuser:appuser /app && find /app -type d -exec chmod 0755 {} \;
60
+ RUN chown -R appuser:appuser /home/appuser
61
+ RUN chmod +x /app/entrypoint.sh && \
62
+ chmod +x /app/app.py
63
+
64
+ ENV TRANSFORMERS_CACHE=/opt/cache/huggingface/hub
65
+
66
+ USER appuser
67
+
68
+ ENTRYPOINT ["/app/entrypoint.sh"]
README.md CHANGED
@@ -3,7 +3,7 @@ title: Ggml Ui
3
  emoji: πŸƒ
4
  colorFrom: blue
5
  colorTo: gray
6
- sdk: gradio
7
  sdk_version: 3.29.0
8
  app_file: chat.py
9
  pinned: false
 
3
  emoji: πŸƒ
4
  colorFrom: blue
5
  colorTo: gray
6
+ sdk: docker
7
  sdk_version: 3.29.0
8
  app_file: chat.py
9
  pinned: false
chat.py CHANGED
@@ -87,4 +87,4 @@ with blocks:
87
  submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
88
  message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
89
 
90
- blocks.queue(concurrency_count=10).launch(debug=True)
 
87
  submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
88
  message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
89
 
90
+ blocks.queue(max_size=32, concurrency_count=3).launch(debug=True, server_name="0.0.0.0", server_port=7860)
entrypoint.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ source /opt/venv/bin/activate
4
+ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
5
+
6
+ python3 chat.py
requirements.txt CHANGED
@@ -1,3 +1 @@
1
- llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.49-rc6/llama_cpp_python-gpu-0.1.49-cp38-cp38-linux_x86_64.whl
2
  pyyaml
3
- requests
 
 
1
  pyyaml