Spaces:

innovatorved
/

api

Sleeping

Ved Gupta commited on Feb 1, 2024

Commit

b528013

1 Parent(s): a65ba2f

Update Dockerfile and README.md***

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -1,7 +1,7 @@
-FROM quay.io/go-skynet/local-ai:v2.7.0-ffmpeg-core
 RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
-COPY models/gpt-3.5-turbo.yaml models/gpt-3.5-turbo.yaml
 RUN useradd -m -u 1000 user
 USER user
@@ -12,4 +12,4 @@ RUN cp -R . $HOME
 WORKDIR $HOME
 EXPOSE 8080
-CMD ["--models-path", "./models"]

+FROM python:3.9-alpine
+RUN mkdir models
 RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
 RUN useradd -m -u 1000 user
 USER user
 WORKDIR $HOME
 EXPOSE 8080
+CMD ["python3", "-m", "llama_cpp.server", "--model", "models/mistral-7b-instruct-v0.2.Q4_0.gguf"]

README.md CHANGED Viewed

@@ -10,6 +10,10 @@ app_port: 8080
 ```bash
 curl https://innovatorved-api.hf.space/v1/models
 curl https://innovatorved-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{

 ```bash
+CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
+wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
 curl https://innovatorved-api.hf.space/v1/models
 curl https://innovatorved-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{

models/gpt-3.5-turbo.yaml DELETED Viewed

@@ -1,30 +0,0 @@
-name: gpt-3.5-turbo
-# Default model parameters
-parameters:
-  # Relative to the models path
-  model: mistral-7b-instruct-v0.2.Q4_0.gguf
-  # temperature
-  temperature: 0.3
-  # all the OpenAI request options here..
-# Default context size
-context_size: 512
-threads: 10
-# Enable prompt caching
-prompt_cache_path: "alpaca-cache"
-prompt_cache_all: true
-# stopwords (if supported by the backend)
-stopwords:
-- "HUMAN:"
-- "### Response:"
-# define chat roles
-roles:
-  assistant: '### Response:'
-  system: '### System Instruction:'
-  user: '### Instruction:'
-template:
-  # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
-  completion: completion
-  chat: chat

setup.sh DELETED Viewed

@@ -1,6 +0,0 @@
-mkdir models
-wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
-docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4