Ved Gupta commited on
Commit
b528013
·
1 Parent(s): a65ba2f

Update Dockerfile and README.md***

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -3
  2. README.md +4 -0
  3. models/gpt-3.5-turbo.yaml +0 -30
  4. setup.sh +0 -6
Dockerfile CHANGED
@@ -1,7 +1,7 @@
1
- FROM quay.io/go-skynet/local-ai:v2.7.0-ffmpeg-core
2
 
 
3
  RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
4
- COPY models/gpt-3.5-turbo.yaml models/gpt-3.5-turbo.yaml
5
 
6
  RUN useradd -m -u 1000 user
7
  USER user
@@ -12,4 +12,4 @@ RUN cp -R . $HOME
12
  WORKDIR $HOME
13
 
14
  EXPOSE 8080
15
- CMD ["--models-path", "./models"]
 
1
+ FROM python:3.9-alpine
2
 
3
+ RUN mkdir models
4
  RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
 
5
 
6
  RUN useradd -m -u 1000 user
7
  USER user
 
12
  WORKDIR $HOME
13
 
14
  EXPOSE 8080
15
+ CMD ["python3", "-m", "llama_cpp.server", "--model", "models/mistral-7b-instruct-v0.2.Q4_0.gguf"]
README.md CHANGED
@@ -10,6 +10,10 @@ app_port: 8080
10
 
11
 
12
  ```bash
 
 
 
 
13
  curl https://innovatorved-api.hf.space/v1/models
14
 
15
  curl https://innovatorved-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
 
10
 
11
 
12
  ```bash
13
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
14
+ wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
15
+
16
+
17
  curl https://innovatorved-api.hf.space/v1/models
18
 
19
  curl https://innovatorved-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
models/gpt-3.5-turbo.yaml DELETED
@@ -1,30 +0,0 @@
1
- name: gpt-3.5-turbo
2
- # Default model parameters
3
- parameters:
4
- # Relative to the models path
5
- model: mistral-7b-instruct-v0.2.Q4_0.gguf
6
- # temperature
7
- temperature: 0.3
8
- # all the OpenAI request options here..
9
-
10
- # Default context size
11
- context_size: 512
12
- threads: 10
13
-
14
- # Enable prompt caching
15
- prompt_cache_path: "alpaca-cache"
16
- prompt_cache_all: true
17
-
18
- # stopwords (if supported by the backend)
19
- stopwords:
20
- - "HUMAN:"
21
- - "### Response:"
22
- # define chat roles
23
- roles:
24
- assistant: '### Response:'
25
- system: '### System Instruction:'
26
- user: '### Instruction:'
27
- template:
28
- # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
29
- completion: completion
30
- chat: chat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
setup.sh DELETED
@@ -1,6 +0,0 @@
1
- mkdir models
2
-
3
- wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
4
-
5
- docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
6
-