Spaces:
Runtime error
Runtime error
palondomus
commited on
Commit
·
37cb9fa
0
Parent(s):
CaesarFrenchLLM first test
Browse files- .gitattributes +35 -0
- Dockerfile +14 -0
- README.md +10 -0
- caesarfrenchllm copy.py +65 -0
- caesarfrenchllm.py +64 -0
- main.py +37 -0
- requirements.txt +106 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.10
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
COPY . .
|
13 |
+
|
14 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: CaesarFrenchLLM
|
3 |
+
emoji: 💻
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
---
|
9 |
+
|
10 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
caesarfrenchllm copy.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import os
|
3 |
+
os.environ['TRANSFORMERS_CACHE'] = "T:/CaesarLLModel/.cache"
|
4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer
|
5 |
+
from vigogne.preprocess import generate_inference_chat_prompt
|
6 |
+
|
7 |
+
if __name__ == "__main__":
|
8 |
+
|
9 |
+
base_model_name_or_path = "bofenghuang/vigogne-2-7b-chat"
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path, padding_side="right", use_fast=False,)
|
11 |
+
tokenizer.pad_token = tokenizer.eos_token
|
12 |
+
|
13 |
+
model = AutoModelForCausalLM.from_pretrained(
|
14 |
+
base_model_name_or_path,
|
15 |
+
torch_dtype=torch.float32,
|
16 |
+
device_map="auto",
|
17 |
+
offload_folder="T:/CaesarLLModel/.cache/offload"
|
18 |
+
|
19 |
+
# load_in_8bit=True,
|
20 |
+
# trust_remote_code=True,
|
21 |
+
# low_cpu_mem_usage=True,
|
22 |
+
)
|
23 |
+
|
24 |
+
# lora_model_name_or_path = ""
|
25 |
+
# model = PeftModel.from_pretrained(model, lora_model_name_or_path)
|
26 |
+
|
27 |
+
model.eval()
|
28 |
+
|
29 |
+
#if torch.__version__ >= "2":
|
30 |
+
# model = torch.compile(model)
|
31 |
+
streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
32 |
+
def infer( user_query,temperature=0.1,top_p=1.0,top_k=0,max_new_tokens=512,**kwargs,):
|
33 |
+
prompt = generate_inference_chat_prompt(user_query, tokenizer=tokenizer)
|
34 |
+
input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(model.device)
|
35 |
+
input_length = input_ids.shape[1]
|
36 |
+
|
37 |
+
generated_outputs = model.generate(
|
38 |
+
input_ids=input_ids,
|
39 |
+
generation_config=GenerationConfig(
|
40 |
+
temperature=temperature,
|
41 |
+
do_sample=temperature > 0.0,
|
42 |
+
top_p=top_p,
|
43 |
+
top_k=top_k,
|
44 |
+
max_new_tokens=max_new_tokens,
|
45 |
+
eos_token_id=tokenizer.eos_token_id,
|
46 |
+
pad_token_id=tokenizer.pad_token_id,
|
47 |
+
**kwargs,
|
48 |
+
),
|
49 |
+
streamer=streamer,
|
50 |
+
return_dict_in_generate=True,
|
51 |
+
)
|
52 |
+
generated_tokens = generated_outputs.sequences[0, input_length:]
|
53 |
+
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
54 |
+
return generated_text
|
55 |
+
def chat(**kwargs):
|
56 |
+
history = []
|
57 |
+
while True:
|
58 |
+
user_input = input(">> <|user|>: ")
|
59 |
+
print(">> <|assistant|>: ", end="")
|
60 |
+
model_response = infer([*history, [user_input, ""]], **kwargs)
|
61 |
+
|
62 |
+
history.append([user_input, model_response])
|
63 |
+
return history[-1][1]
|
64 |
+
# print(f">> <|assistant|>: {history[-1][1]}")
|
65 |
+
chat()
|
caesarfrenchllm.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import os
|
3 |
+
#os.environ['TRANSFORMERS_CACHE'] = "T:/CaesarLLModel/.cache"
|
4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer
|
5 |
+
from vigogne.preprocess import generate_inference_chat_prompt
|
6 |
+
class CaesarFrenchLLM:
|
7 |
+
def __init__(self) -> None:
|
8 |
+
self.history = []
|
9 |
+
base_model_name_or_path = "bofenghuang/vigogne-2-7b-chat"
|
10 |
+
self.tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path, padding_side="right", use_fast=False,)
|
11 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
12 |
+
|
13 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
14 |
+
base_model_name_or_path,
|
15 |
+
torch_dtype=torch.float32,
|
16 |
+
device_map="auto",
|
17 |
+
|
18 |
+
|
19 |
+
# load_in_8bit=True,
|
20 |
+
# trust_remote_code=True,
|
21 |
+
# low_cpu_mem_usage=True,
|
22 |
+
)
|
23 |
+
|
24 |
+
# lora_model_name_or_path = ""
|
25 |
+
# model = PeftModel.from_pretrained(model, lora_model_name_or_path)
|
26 |
+
|
27 |
+
self.model.eval()
|
28 |
+
|
29 |
+
if torch.__version__ >= "2":
|
30 |
+
self.model = torch.compile(self.model)
|
31 |
+
self.streamer = TextStreamer(self.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
32 |
+
def infer(self,user_query,temperature=0.1,top_p=1.0,top_k=0,max_new_tokens=512,**kwargs,):
|
33 |
+
prompt = generate_inference_chat_prompt(user_query, tokenizer=self.tokenizer)
|
34 |
+
input_ids = self.tokenizer(prompt, return_tensors="pt")["input_ids"].to(self.model.device)
|
35 |
+
input_length = input_ids.shape[1]
|
36 |
+
|
37 |
+
generated_outputs = self.model.generate(
|
38 |
+
input_ids=input_ids,
|
39 |
+
generation_config=GenerationConfig(
|
40 |
+
temperature=temperature,
|
41 |
+
do_sample=temperature > 0.0,
|
42 |
+
top_p=top_p,
|
43 |
+
top_k=top_k,
|
44 |
+
max_new_tokens=max_new_tokens,
|
45 |
+
eos_token_id=self.tokenizer.eos_token_id,
|
46 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
47 |
+
**kwargs,
|
48 |
+
),
|
49 |
+
streamer=self.streamer,
|
50 |
+
return_dict_in_generate=True,
|
51 |
+
)
|
52 |
+
generated_tokens = generated_outputs.sequences[0, input_length:]
|
53 |
+
generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
54 |
+
return generated_text
|
55 |
+
|
56 |
+
def chat(self,user_input,**kwargs):
|
57 |
+
print(f">> <|user|>: {user_input}")
|
58 |
+
print(">> <|assistant|>: ", end="")
|
59 |
+
model_response = self.infer([*self.history, [user_input, ""]], **kwargs)
|
60 |
+
|
61 |
+
self.history.append([user_input, model_response])
|
62 |
+
return self.history[-1][1]
|
63 |
+
# print(f">> <|assistant|>: {history[-1][1]}")
|
64 |
+
|
main.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
|
3 |
+
from fastapi import FastAPI, UploadFile, WebSocket, WebSocketDisconnect
|
4 |
+
from fastapi.responses import FileResponse
|
5 |
+
from caesarfrenchllm import CaesarFrenchLLM
|
6 |
+
import uvicorn
|
7 |
+
app = FastAPI()
|
8 |
+
caesarmodel = CaesarFrenchLLM()
|
9 |
+
|
10 |
+
@app.get("/")
|
11 |
+
def caesaraihome():
|
12 |
+
return "Welcome to CaesarAI's API's and CaesarAINL."
|
13 |
+
|
14 |
+
@app.websocket("/caesarfrenchllmws")
|
15 |
+
async def caesarfrenchllmws(websocket: WebSocket):
|
16 |
+
# listen for connections
|
17 |
+
await websocket.accept()
|
18 |
+
|
19 |
+
try:
|
20 |
+
while True:
|
21 |
+
contents = await websocket.receive_json()
|
22 |
+
textresp = caesarmodel.chat(contents["message"])
|
23 |
+
final_response = {"message":textresp}
|
24 |
+
await websocket.send_json(final_response) # sends the buffer as bytes
|
25 |
+
|
26 |
+
|
27 |
+
except WebSocketDisconnect:
|
28 |
+
print("Client disconnected")
|
29 |
+
|
30 |
+
|
31 |
+
async def main():
|
32 |
+
config = uvicorn.Config("main:app", port=7860, log_level="info",host="0.0.0.0",reload=True)
|
33 |
+
server = uvicorn.Server(config)
|
34 |
+
await server.serve()
|
35 |
+
|
36 |
+
if __name__ == "__main__":
|
37 |
+
asyncio.run(main())
|
requirements.txt
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==1.4.0
|
2 |
+
accelerate @ git+https://github.com/huggingface/accelerate.git@7befe580c2393b0a3d75a8aa75af79b8e57f84a4
|
3 |
+
aiofiles==23.2.1
|
4 |
+
aiohttp==3.8.5
|
5 |
+
aiosignal==1.3.1
|
6 |
+
altair==5.1.1
|
7 |
+
annotated-types==0.5.0
|
8 |
+
anyio==4.0.0
|
9 |
+
appdirs==1.4.4
|
10 |
+
async-timeout==4.0.3
|
11 |
+
attrs==23.1.0
|
12 |
+
bitsandbytes==0.41.1
|
13 |
+
certifi==2023.7.22
|
14 |
+
charset-normalizer==3.2.0
|
15 |
+
click==8.1.7
|
16 |
+
colorama==0.4.6
|
17 |
+
contourpy==1.1.0
|
18 |
+
cycler==0.11.0
|
19 |
+
datasets==2.14.4
|
20 |
+
dill==0.3.7
|
21 |
+
docker-pycreds==0.4.0
|
22 |
+
einops==0.6.1
|
23 |
+
exceptiongroup==1.1.3
|
24 |
+
fastapi==0.103.0
|
25 |
+
ffmpy==0.3.1
|
26 |
+
filelock==3.12.3
|
27 |
+
fire==0.5.0
|
28 |
+
fonttools==4.42.1
|
29 |
+
frozenlist==1.4.0
|
30 |
+
fsspec==2023.6.0
|
31 |
+
gitdb==4.0.10
|
32 |
+
GitPython==3.1.32
|
33 |
+
gradio==3.41.2
|
34 |
+
gradio_client==0.5.0
|
35 |
+
h11==0.14.0
|
36 |
+
httpcore==0.17.3
|
37 |
+
httpx==0.24.1
|
38 |
+
huggingface-hub==0.16.4
|
39 |
+
idna==3.4
|
40 |
+
importlib-resources==6.0.1
|
41 |
+
Jinja2==3.1.2
|
42 |
+
joblib==1.3.2
|
43 |
+
jsonschema==4.19.0
|
44 |
+
jsonschema-specifications==2023.7.1
|
45 |
+
kiwisolver==1.4.5
|
46 |
+
loralib==0.1.2
|
47 |
+
MarkupSafe==2.1.3
|
48 |
+
matplotlib==3.7.2
|
49 |
+
mpmath==1.3.0
|
50 |
+
multidict==6.0.4
|
51 |
+
multiprocess==0.70.15
|
52 |
+
networkx==3.1
|
53 |
+
nltk==3.8.1
|
54 |
+
numpy==1.25.2
|
55 |
+
openai==0.27.10
|
56 |
+
orjson==3.9.5
|
57 |
+
packaging==23.1
|
58 |
+
pandas==2.1.0
|
59 |
+
pathtools==0.1.2
|
60 |
+
peft @ git+https://github.com/huggingface/peft.git@0c9354bda98eb7f5348699e23ab752e8dca1e60e
|
61 |
+
Pillow==10.0.0
|
62 |
+
protobuf==4.24.2
|
63 |
+
psutil==5.9.5
|
64 |
+
pyarrow==13.0.0
|
65 |
+
pydantic==2.3.0
|
66 |
+
pydantic_core==2.6.3
|
67 |
+
pydub==0.25.1
|
68 |
+
pyparsing==3.0.9
|
69 |
+
python-dateutil==2.8.2
|
70 |
+
python-multipart==0.0.6
|
71 |
+
pytz==2023.3
|
72 |
+
PyYAML==6.0.1
|
73 |
+
referencing==0.30.2
|
74 |
+
regex==2023.8.8
|
75 |
+
requests==2.31.0
|
76 |
+
rouge-score==0.1.2
|
77 |
+
rpds-py==0.10.0
|
78 |
+
safetensors==0.3.3
|
79 |
+
scipy==1.11.2
|
80 |
+
semantic-version==2.10.0
|
81 |
+
sentencepiece==0.1.99
|
82 |
+
sentry-sdk==1.30.0
|
83 |
+
setproctitle==1.3.2
|
84 |
+
six==1.16.0
|
85 |
+
smmap==5.0.0
|
86 |
+
sniffio==1.3.0
|
87 |
+
starlette==0.27.0
|
88 |
+
sympy==1.12
|
89 |
+
tenacity==8.2.3
|
90 |
+
tensorboardX==2.6.2.2
|
91 |
+
termcolor==2.3.0
|
92 |
+
tiktoken==0.4.0
|
93 |
+
tokenizers==0.13.3
|
94 |
+
toolz==0.12.0
|
95 |
+
torch==2.0.1
|
96 |
+
tqdm==4.66.1
|
97 |
+
transformers @ git+https://github.com/huggingface/transformers.git@3b39b906183ed08d9961908eb73104aeea345d11
|
98 |
+
typing_extensions==4.7.1
|
99 |
+
tzdata==2023.3
|
100 |
+
urllib3==2.0.4
|
101 |
+
uvicorn==0.23.2
|
102 |
+
vigogne @ git+https://github.com/bofenghuang/vigogne@76e1cd0b35fd4f9e360aecdf7130c998459df0ff
|
103 |
+
wandb==0.15.9
|
104 |
+
websockets==11.0.3
|
105 |
+
xxhash==3.3.0
|
106 |
+
yarl==1.9.2
|