Spaces:
Sleeping
Sleeping
Ved Gupta
commited on
Commit
·
b528013
1
Parent(s):
a65ba2f
Update Dockerfile and README.md***
Browse files- Dockerfile +3 -3
- README.md +4 -0
- models/gpt-3.5-turbo.yaml +0 -30
- setup.sh +0 -6
Dockerfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
FROM
|
2 |
|
|
|
3 |
RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
4 |
-
COPY models/gpt-3.5-turbo.yaml models/gpt-3.5-turbo.yaml
|
5 |
|
6 |
RUN useradd -m -u 1000 user
|
7 |
USER user
|
@@ -12,4 +12,4 @@ RUN cp -R . $HOME
|
|
12 |
WORKDIR $HOME
|
13 |
|
14 |
EXPOSE 8080
|
15 |
-
CMD ["
|
|
|
1 |
+
FROM python:3.9-alpine
|
2 |
|
3 |
+
RUN mkdir models
|
4 |
RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
|
|
5 |
|
6 |
RUN useradd -m -u 1000 user
|
7 |
USER user
|
|
|
12 |
WORKDIR $HOME
|
13 |
|
14 |
EXPOSE 8080
|
15 |
+
CMD ["python3", "-m", "llama_cpp.server", "--model", "models/mistral-7b-instruct-v0.2.Q4_0.gguf"]
|
README.md
CHANGED
@@ -10,6 +10,10 @@ app_port: 8080
|
|
10 |
|
11 |
|
12 |
```bash
|
|
|
|
|
|
|
|
|
13 |
curl https://innovatorved-api.hf.space/v1/models
|
14 |
|
15 |
curl https://innovatorved-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
|
10 |
|
11 |
|
12 |
```bash
|
13 |
+
CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
|
14 |
+
wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
15 |
+
|
16 |
+
|
17 |
curl https://innovatorved-api.hf.space/v1/models
|
18 |
|
19 |
curl https://innovatorved-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
|
models/gpt-3.5-turbo.yaml
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
name: gpt-3.5-turbo
|
2 |
-
# Default model parameters
|
3 |
-
parameters:
|
4 |
-
# Relative to the models path
|
5 |
-
model: mistral-7b-instruct-v0.2.Q4_0.gguf
|
6 |
-
# temperature
|
7 |
-
temperature: 0.3
|
8 |
-
# all the OpenAI request options here..
|
9 |
-
|
10 |
-
# Default context size
|
11 |
-
context_size: 512
|
12 |
-
threads: 10
|
13 |
-
|
14 |
-
# Enable prompt caching
|
15 |
-
prompt_cache_path: "alpaca-cache"
|
16 |
-
prompt_cache_all: true
|
17 |
-
|
18 |
-
# stopwords (if supported by the backend)
|
19 |
-
stopwords:
|
20 |
-
- "HUMAN:"
|
21 |
-
- "### Response:"
|
22 |
-
# define chat roles
|
23 |
-
roles:
|
24 |
-
assistant: '### Response:'
|
25 |
-
system: '### System Instruction:'
|
26 |
-
user: '### Instruction:'
|
27 |
-
template:
|
28 |
-
# template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
|
29 |
-
completion: completion
|
30 |
-
chat: chat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.sh
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
mkdir models
|
2 |
-
|
3 |
-
wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
4 |
-
|
5 |
-
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|