Text Generation
Transformers
Safetensors
GGUF
llava
remyx
Inference Endpoints
SpaceLLaVA / docker /Dockerfile
salma-remyx's picture
Add SpaceLlaVA Triton Server
0ea9ef5 verified
FROM nvcr.io/nvidia/tritonserver:22.11-py3
WORKDIR /workspace
RUN apt-get update && apt-get install cmake -y
RUN pip install --upgrade pip && pip install --upgrade tensorrt
RUN git clone https://github.com/NVIDIA/TensorRT.git -b main --single-branch \
&& cd TensorRT \
&& git submodule update --init --recursive
ENV TRT_OSSPATH=/workspace/TensorRT
WORKDIR ${TRT_OSSPATH}
RUN mkdir -p build \
&& cd build \
&& cmake .. -DTRT_OUT_DIR=$PWD/out \
&& cd plugin \
&& make -j$(nproc)
ENV PLUGIN_LIBS="${TRT_OSSPATH}/build/out/libnvinfer_plugin.so"
WORKDIR /weights
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/ggml-model-q4_0.gguf
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/mmproj-model-f16.gguf
RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.45 --force-reinstall --no-cache-dir
WORKDIR /models
COPY ./models/ .
WORKDIR /workspace
CMD ["tritonserver", "--model-store=/models"]