limcheekin commited on
Commit
73cc25e
1 Parent(s): 9fcab8c

feat: first import

Browse files
Files changed (2) hide show
  1. Dockerfile +32 -0
  2. start_server.sh +6 -0
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Grab a fresh copy of the Python image
2
+ FROM python:3.10-slim
3
+
4
+ # Install build and runtime dependencies
5
+ RUN apt-get update && \
6
+ apt-get install -y \
7
+ libopenblas-dev \
8
+ ninja-build \
9
+ build-essential \
10
+ curl
11
+
12
+ RUN pip install -U pip setuptools wheel && \
13
+ CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
14
+
15
+ # Download model
16
+ RUN mkdir model && \
17
+ curl -L https://huggingface.co/TheBloke/orca_mini_v2_7B-GGML/resolve/main/orca-mini-v2_7b.ggmlv3.q4_0.bin -o model/ggml-model-q4_0.bin
18
+
19
+ COPY ./start_server.sh ./start_server.sh
20
+
21
+ # Make the server start script executable
22
+ RUN chmod +x ./start_server.sh
23
+
24
+ # Set environment variable for the host
25
+ ENV HOST=0.0.0.0
26
+ ENV PORT=7860
27
+
28
+ # Expose a port for the server
29
+ EXPOSE ${PORT}
30
+
31
+ # Run the server start script
32
+ CMD ["/bin/sh", "./start_server.sh"]
start_server.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ # For mlock support
4
+ ulimit -l unlimited
5
+
6
+ python3 -B -m llama_cpp.server --model model/ggml-model-q4_0.bin