ilhooq commited on
Commit
471608b
1 Parent(s): ef1cfe0

First implementation

Browse files
Files changed (6) hide show
  1. .gitignore +3 -0
  2. Dockerfile +15 -0
  3. main.py +88 -0
  4. requirements.txt +4 -0
  5. test.ipynb +67 -0
  6. vigostral-7b-chat.Q6_K.gguf +1 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .vscode
2
+ .venv
3
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY ./vigostral-7b-chat.Q6_K.gguf /code/vigostral-7b-chat.Q6_K.gguf
13
+ COPY ./main.py /code/main.py
14
+
15
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ import json
3
+
4
+ from ctransformers import AutoModelForCausalLM
5
+ from fastapi import FastAPI, Form
6
+ from fastapi.responses import StreamingResponse
7
+ from pydantic import BaseModel
8
+ from typing import List, Dict
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+
11
+ #Model loading
12
+ model = AutoModelForCausalLM.from_pretrained("vigostral-7b-chat.Q6_K.gguf",
13
+ model_type='llama',
14
+ threads = 3,
15
+ )
16
+
17
+ #Fast API
18
+ app = FastAPI()
19
+
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins = ["*"],
23
+ allow_credentials = True,
24
+ allow_methods = ["*"],
25
+ allow_headers = ["*"],
26
+ )
27
+
28
+ def apply_chat_template(conversation: List):
29
+ formatted_conversation = ""
30
+
31
+ for turn in conversation:
32
+ role = turn.role.upper()
33
+ content = turn.content
34
+ if role == "SYSTEM":
35
+ formatted_conversation += "<s>[INST] <<SYS>>\n" + content + "\n<</SYS>>"
36
+ elif role == "ASSISTANT":
37
+ formatted_conversation += "\n[/INST] " + content + " </s>"
38
+ else:
39
+ formatted_conversation += "[INST] " + content + " [/INST]"
40
+
41
+ return formatted_conversation
42
+
43
+ #Pydantic object
44
+ class Message(BaseModel):
45
+ role: str
46
+ content: str
47
+
48
+ class Validation(BaseModel):
49
+ messages: List[Message]
50
+ model: str
51
+ temperature: float
52
+ presence_penalty: float
53
+ top_p: float
54
+ frequency_penalty: float
55
+ stream: bool
56
+
57
+ @app.post("/chat")
58
+ async def stream(item: Validation):
59
+
60
+ prompt = apply_chat_template(item.messages)
61
+
62
+ def stream_json():
63
+ for text in model(
64
+ prompt,
65
+ temperature=item.temperature,
66
+ top_p=item.top_p,
67
+ presence_penalty=item.presence_penalty,
68
+ frequency_penalty=item.frequency_penalty,
69
+ stream=True
70
+ ):
71
+ yield json.dumps({
72
+ "object":"chat.completion.chunk",
73
+ "choices": [
74
+ {
75
+ "index": 0,
76
+ "delta" : {
77
+ "content": text
78
+ }
79
+ }
80
+ ]
81
+ })
82
+
83
+ return StreamingResponse(stream_json(), media_type="application/json")
84
+
85
+ if __name__ == "__main__":
86
+ uvicorn.run(app, host="0.0.0.0", port=8000)
87
+
88
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ pydantic
3
+ uvicorn
4
+ ctransformers
test.ipynb ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/home/ilhooq/Dev/Python/IA/vigostral-chat/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from ctransformers import AutoModelForCausalLM\n",
19
+ "\n",
20
+ "#Model loading\n",
21
+ "llm = AutoModelForCausalLM.from_pretrained(\"/home/ilhooq/Stockage/IA-models/vigostral-7b-chat.Q6_K.gguf\",\n",
22
+ " model_type='mistral',\n",
23
+ " max_new_tokens = 1096,\n",
24
+ " threads = 3,\n",
25
+ ")"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 3,
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stdout",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "llama\n"
38
+ ]
39
+ }
40
+ ],
41
+ "source": [
42
+ "print(llm.model_type)"
43
+ ]
44
+ }
45
+ ],
46
+ "metadata": {
47
+ "kernelspec": {
48
+ "display_name": ".venv",
49
+ "language": "python",
50
+ "name": "python3"
51
+ },
52
+ "language_info": {
53
+ "codemirror_mode": {
54
+ "name": "ipython",
55
+ "version": 3
56
+ },
57
+ "file_extension": ".py",
58
+ "mimetype": "text/x-python",
59
+ "name": "python",
60
+ "nbconvert_exporter": "python",
61
+ "pygments_lexer": "ipython3",
62
+ "version": "3.12.3"
63
+ }
64
+ },
65
+ "nbformat": 4,
66
+ "nbformat_minor": 2
67
+ }
vigostral-7b-chat.Q6_K.gguf ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/ilhooq/Stockage/IA-models/vigostral-7b-chat.Q6_K.gguf