Spaces:
Running
Running
Commit
·
4c090d8
1
Parent(s):
2190f14
oops
Browse files
app.py
CHANGED
@@ -42,7 +42,9 @@ def load_model():
|
|
42 |
# tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
|
43 |
# model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
44 |
# model_id = "deepseek-ai/deepseek-llm-7b-chat"
|
45 |
-
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-
|
|
|
|
|
46 |
|
47 |
quantization_config = FineGrainedFP8Config()
|
48 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
@@ -53,7 +55,8 @@ def load_model():
|
|
53 |
device_map="auto",
|
54 |
torch_dtype=torch.float16,
|
55 |
# quantization_config=quantization_config,
|
56 |
-
trust_remote_code = True
|
|
|
57 |
)
|
58 |
# model.to("cpu")
|
59 |
return tokenizer, model
|
|
|
42 |
# tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
|
43 |
# model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
44 |
# model_id = "deepseek-ai/deepseek-llm-7b-chat"
|
45 |
+
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
46 |
+
# model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
47 |
+
|
48 |
|
49 |
quantization_config = FineGrainedFP8Config()
|
50 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
55 |
device_map="auto",
|
56 |
torch_dtype=torch.float16,
|
57 |
# quantization_config=quantization_config,
|
58 |
+
trust_remote_code = True,
|
59 |
+
attn_implementation="flash_attention_2"
|
60 |
)
|
61 |
# model.to("cpu")
|
62 |
return tokenizer, model
|