palondomus commited on
Commit
37cb9fa
·
0 Parent(s):

CaesarFrenchLLM first test

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -0
  2. Dockerfile +14 -0
  3. README.md +10 -0
  4. caesarfrenchllm copy.py +65 -0
  5. caesarfrenchllm.py +64 -0
  6. main.py +37 -0
  7. requirements.txt +106 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: CaesarFrenchLLM
3
+ emoji: 💻
4
+ colorFrom: red
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
caesarfrenchllm copy.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ os.environ['TRANSFORMERS_CACHE'] = "T:/CaesarLLModel/.cache"
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer
5
+ from vigogne.preprocess import generate_inference_chat_prompt
6
+
7
+ if __name__ == "__main__":
8
+
9
+ base_model_name_or_path = "bofenghuang/vigogne-2-7b-chat"
10
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path, padding_side="right", use_fast=False,)
11
+ tokenizer.pad_token = tokenizer.eos_token
12
+
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ base_model_name_or_path,
15
+ torch_dtype=torch.float32,
16
+ device_map="auto",
17
+ offload_folder="T:/CaesarLLModel/.cache/offload"
18
+
19
+ # load_in_8bit=True,
20
+ # trust_remote_code=True,
21
+ # low_cpu_mem_usage=True,
22
+ )
23
+
24
+ # lora_model_name_or_path = ""
25
+ # model = PeftModel.from_pretrained(model, lora_model_name_or_path)
26
+
27
+ model.eval()
28
+
29
+ #if torch.__version__ >= "2":
30
+ # model = torch.compile(model)
31
+ streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
32
+ def infer( user_query,temperature=0.1,top_p=1.0,top_k=0,max_new_tokens=512,**kwargs,):
33
+ prompt = generate_inference_chat_prompt(user_query, tokenizer=tokenizer)
34
+ input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(model.device)
35
+ input_length = input_ids.shape[1]
36
+
37
+ generated_outputs = model.generate(
38
+ input_ids=input_ids,
39
+ generation_config=GenerationConfig(
40
+ temperature=temperature,
41
+ do_sample=temperature > 0.0,
42
+ top_p=top_p,
43
+ top_k=top_k,
44
+ max_new_tokens=max_new_tokens,
45
+ eos_token_id=tokenizer.eos_token_id,
46
+ pad_token_id=tokenizer.pad_token_id,
47
+ **kwargs,
48
+ ),
49
+ streamer=streamer,
50
+ return_dict_in_generate=True,
51
+ )
52
+ generated_tokens = generated_outputs.sequences[0, input_length:]
53
+ generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
54
+ return generated_text
55
+ def chat(**kwargs):
56
+ history = []
57
+ while True:
58
+ user_input = input(">> <|user|>: ")
59
+ print(">> <|assistant|>: ", end="")
60
+ model_response = infer([*history, [user_input, ""]], **kwargs)
61
+
62
+ history.append([user_input, model_response])
63
+ return history[-1][1]
64
+ # print(f">> <|assistant|>: {history[-1][1]}")
65
+ chat()
caesarfrenchllm.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ #os.environ['TRANSFORMERS_CACHE'] = "T:/CaesarLLModel/.cache"
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer
5
+ from vigogne.preprocess import generate_inference_chat_prompt
6
+ class CaesarFrenchLLM:
7
+ def __init__(self) -> None:
8
+ self.history = []
9
+ base_model_name_or_path = "bofenghuang/vigogne-2-7b-chat"
10
+ self.tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path, padding_side="right", use_fast=False,)
11
+ self.tokenizer.pad_token = self.tokenizer.eos_token
12
+
13
+ self.model = AutoModelForCausalLM.from_pretrained(
14
+ base_model_name_or_path,
15
+ torch_dtype=torch.float32,
16
+ device_map="auto",
17
+
18
+
19
+ # load_in_8bit=True,
20
+ # trust_remote_code=True,
21
+ # low_cpu_mem_usage=True,
22
+ )
23
+
24
+ # lora_model_name_or_path = ""
25
+ # model = PeftModel.from_pretrained(model, lora_model_name_or_path)
26
+
27
+ self.model.eval()
28
+
29
+ if torch.__version__ >= "2":
30
+ self.model = torch.compile(self.model)
31
+ self.streamer = TextStreamer(self.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
32
+ def infer(self,user_query,temperature=0.1,top_p=1.0,top_k=0,max_new_tokens=512,**kwargs,):
33
+ prompt = generate_inference_chat_prompt(user_query, tokenizer=self.tokenizer)
34
+ input_ids = self.tokenizer(prompt, return_tensors="pt")["input_ids"].to(self.model.device)
35
+ input_length = input_ids.shape[1]
36
+
37
+ generated_outputs = self.model.generate(
38
+ input_ids=input_ids,
39
+ generation_config=GenerationConfig(
40
+ temperature=temperature,
41
+ do_sample=temperature > 0.0,
42
+ top_p=top_p,
43
+ top_k=top_k,
44
+ max_new_tokens=max_new_tokens,
45
+ eos_token_id=self.tokenizer.eos_token_id,
46
+ pad_token_id=self.tokenizer.pad_token_id,
47
+ **kwargs,
48
+ ),
49
+ streamer=self.streamer,
50
+ return_dict_in_generate=True,
51
+ )
52
+ generated_tokens = generated_outputs.sequences[0, input_length:]
53
+ generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
54
+ return generated_text
55
+
56
+ def chat(self,user_input,**kwargs):
57
+ print(f">> <|user|>: {user_input}")
58
+ print(">> <|assistant|>: ", end="")
59
+ model_response = self.infer([*self.history, [user_input, ""]], **kwargs)
60
+
61
+ self.history.append([user_input, model_response])
62
+ return self.history[-1][1]
63
+ # print(f">> <|assistant|>: {history[-1][1]}")
64
+
main.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from fastapi import FastAPI, UploadFile, WebSocket, WebSocketDisconnect
4
+ from fastapi.responses import FileResponse
5
+ from caesarfrenchllm import CaesarFrenchLLM
6
+ import uvicorn
7
+ app = FastAPI()
8
+ caesarmodel = CaesarFrenchLLM()
9
+
10
+ @app.get("/")
11
+ def caesaraihome():
12
+ return "Welcome to CaesarAI's API's and CaesarAINL."
13
+
14
+ @app.websocket("/caesarfrenchllmws")
15
+ async def caesarfrenchllmws(websocket: WebSocket):
16
+ # listen for connections
17
+ await websocket.accept()
18
+
19
+ try:
20
+ while True:
21
+ contents = await websocket.receive_json()
22
+ textresp = caesarmodel.chat(contents["message"])
23
+ final_response = {"message":textresp}
24
+ await websocket.send_json(final_response) # sends the buffer as bytes
25
+
26
+
27
+ except WebSocketDisconnect:
28
+ print("Client disconnected")
29
+
30
+
31
+ async def main():
32
+ config = uvicorn.Config("main:app", port=7860, log_level="info",host="0.0.0.0",reload=True)
33
+ server = uvicorn.Server(config)
34
+ await server.serve()
35
+
36
+ if __name__ == "__main__":
37
+ asyncio.run(main())
requirements.txt ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate @ git+https://github.com/huggingface/accelerate.git@7befe580c2393b0a3d75a8aa75af79b8e57f84a4
3
+ aiofiles==23.2.1
4
+ aiohttp==3.8.5
5
+ aiosignal==1.3.1
6
+ altair==5.1.1
7
+ annotated-types==0.5.0
8
+ anyio==4.0.0
9
+ appdirs==1.4.4
10
+ async-timeout==4.0.3
11
+ attrs==23.1.0
12
+ bitsandbytes==0.41.1
13
+ certifi==2023.7.22
14
+ charset-normalizer==3.2.0
15
+ click==8.1.7
16
+ colorama==0.4.6
17
+ contourpy==1.1.0
18
+ cycler==0.11.0
19
+ datasets==2.14.4
20
+ dill==0.3.7
21
+ docker-pycreds==0.4.0
22
+ einops==0.6.1
23
+ exceptiongroup==1.1.3
24
+ fastapi==0.103.0
25
+ ffmpy==0.3.1
26
+ filelock==3.12.3
27
+ fire==0.5.0
28
+ fonttools==4.42.1
29
+ frozenlist==1.4.0
30
+ fsspec==2023.6.0
31
+ gitdb==4.0.10
32
+ GitPython==3.1.32
33
+ gradio==3.41.2
34
+ gradio_client==0.5.0
35
+ h11==0.14.0
36
+ httpcore==0.17.3
37
+ httpx==0.24.1
38
+ huggingface-hub==0.16.4
39
+ idna==3.4
40
+ importlib-resources==6.0.1
41
+ Jinja2==3.1.2
42
+ joblib==1.3.2
43
+ jsonschema==4.19.0
44
+ jsonschema-specifications==2023.7.1
45
+ kiwisolver==1.4.5
46
+ loralib==0.1.2
47
+ MarkupSafe==2.1.3
48
+ matplotlib==3.7.2
49
+ mpmath==1.3.0
50
+ multidict==6.0.4
51
+ multiprocess==0.70.15
52
+ networkx==3.1
53
+ nltk==3.8.1
54
+ numpy==1.25.2
55
+ openai==0.27.10
56
+ orjson==3.9.5
57
+ packaging==23.1
58
+ pandas==2.1.0
59
+ pathtools==0.1.2
60
+ peft @ git+https://github.com/huggingface/peft.git@0c9354bda98eb7f5348699e23ab752e8dca1e60e
61
+ Pillow==10.0.0
62
+ protobuf==4.24.2
63
+ psutil==5.9.5
64
+ pyarrow==13.0.0
65
+ pydantic==2.3.0
66
+ pydantic_core==2.6.3
67
+ pydub==0.25.1
68
+ pyparsing==3.0.9
69
+ python-dateutil==2.8.2
70
+ python-multipart==0.0.6
71
+ pytz==2023.3
72
+ PyYAML==6.0.1
73
+ referencing==0.30.2
74
+ regex==2023.8.8
75
+ requests==2.31.0
76
+ rouge-score==0.1.2
77
+ rpds-py==0.10.0
78
+ safetensors==0.3.3
79
+ scipy==1.11.2
80
+ semantic-version==2.10.0
81
+ sentencepiece==0.1.99
82
+ sentry-sdk==1.30.0
83
+ setproctitle==1.3.2
84
+ six==1.16.0
85
+ smmap==5.0.0
86
+ sniffio==1.3.0
87
+ starlette==0.27.0
88
+ sympy==1.12
89
+ tenacity==8.2.3
90
+ tensorboardX==2.6.2.2
91
+ termcolor==2.3.0
92
+ tiktoken==0.4.0
93
+ tokenizers==0.13.3
94
+ toolz==0.12.0
95
+ torch==2.0.1
96
+ tqdm==4.66.1
97
+ transformers @ git+https://github.com/huggingface/transformers.git@3b39b906183ed08d9961908eb73104aeea345d11
98
+ typing_extensions==4.7.1
99
+ tzdata==2023.3
100
+ urllib3==2.0.4
101
+ uvicorn==0.23.2
102
+ vigogne @ git+https://github.com/bofenghuang/vigogne@76e1cd0b35fd4f9e360aecdf7130c998459df0ff
103
+ wandb==0.15.9
104
+ websockets==11.0.3
105
+ xxhash==3.3.0
106
+ yarl==1.9.2