remyxai
/

SpaceLLaVA

Text Generation

Inference Endpoints

Model card Files Files and versions Community

SpaceLLaVA / docker /Dockerfile

salma-remyx's picture

Add SpaceLlaVA Triton Server

0ea9ef5 verified 9 months ago

history blame contribute delete

1.1 kB

	FROM nvcr.io/nvidia/tritonserver:22.11-py3

	WORKDIR /workspace

	RUN apt-get update && apt-get install cmake -y

	RUN pip install --upgrade pip && pip install --upgrade tensorrt

	RUN git clone https://github.com/NVIDIA/TensorRT.git -b main --single-branch \
	&& cd TensorRT \
	&& git submodule update --init --recursive

	ENV TRT_OSSPATH=/workspace/TensorRT
	WORKDIR ${TRT_OSSPATH}

	RUN mkdir -p build \
	&& cd build \
	&& cmake .. -DTRT_OUT_DIR=$PWD/out \
	&& cd plugin \
	&& make -j$(nproc)

	ENV PLUGIN_LIBS="${TRT_OSSPATH}/build/out/libnvinfer_plugin.so"

	WORKDIR /weights
	RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/ggml-model-q4_0.gguf
	RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/mmproj-model-f16.gguf

	RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
	RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.45 --force-reinstall --no-cache-dir

	WORKDIR /models
	COPY ./models/ .

	WORKDIR /workspace
	CMD ["tritonserver", "--model-store=/models"]