Spaces:
Running
Running
Commit
·
6d3a2f2
1
Parent(s):
f52ef7c
FP8 support??
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
import torch
|
4 |
import base64
|
5 |
|
@@ -43,6 +43,8 @@ def load_model():
|
|
43 |
# model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
44 |
# model_id = "deepseek-ai/deepseek-llm-7b-chat"
|
45 |
model_id = "deepseek-ai/DeepSeek-V3-0324"
|
|
|
|
|
46 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
model_id,
|
@@ -50,7 +52,8 @@ def load_model():
|
|
50 |
# torch_dtype=torch.float32
|
51 |
device_map="auto",
|
52 |
torch_dtype=torch.float16,
|
53 |
-
trust_remote_code = True
|
|
|
54 |
)
|
55 |
# model.to("cpu")
|
56 |
return tokenizer, model
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, FineGrainedFP8Config
|
3 |
import torch
|
4 |
import base64
|
5 |
|
|
|
43 |
# model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
44 |
# model_id = "deepseek-ai/deepseek-llm-7b-chat"
|
45 |
model_id = "deepseek-ai/DeepSeek-V3-0324"
|
46 |
+
|
47 |
+
quantization_config = FineGrainedFP8Config()
|
48 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
49 |
model = AutoModelForCausalLM.from_pretrained(
|
50 |
model_id,
|
|
|
52 |
# torch_dtype=torch.float32
|
53 |
device_map="auto",
|
54 |
torch_dtype=torch.float16,
|
55 |
+
trust_remote_code = True,
|
56 |
+
quantization_config=quantization_config
|
57 |
)
|
58 |
# model.to("cpu")
|
59 |
return tokenizer, model
|