# Grab a fresh copy of the Python image FROM python:3.11-slim # Install build and runtime dependencies RUN apt-get update && \ apt-get install -y \ libopenblas-dev \ ninja-build \ build-essential \ pkg-config \ curl cmake git # RUN pip install -U pip setuptools wheel && \ # CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server] RUN git clone https://github.com/ggerganov/llama.cpp.git llamacpp --depth 1 && \ cd llamacpp && \ cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS && \ cmake --build build --config Release --target main server && \ cp build/bin/* ~/ # Download model RUN mkdir model && \ curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin COPY ./start_server.sh ./ COPY ./main.py ./ COPY ./index.html ./ # Make the server start script executable RUN chmod +x ./start_server.sh # Set environment variable for the host ENV HOST=0.0.0.0 ENV PORT=7860 # Expose a port for the server EXPOSE ${PORT} # Run the server start script CMD ["/bin/sh", "./start_server.sh"]