davidizzle commited on
Commit
4c090d8
·
1 Parent(s): 2190f14
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -42,7 +42,9 @@ def load_model():
42
  # tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
43
  # model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
44
  # model_id = "deepseek-ai/deepseek-llm-7b-chat"
45
- model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
 
 
46
 
47
  quantization_config = FineGrainedFP8Config()
48
  tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -53,7 +55,8 @@ def load_model():
53
  device_map="auto",
54
  torch_dtype=torch.float16,
55
  # quantization_config=quantization_config,
56
- trust_remote_code = True
 
57
  )
58
  # model.to("cpu")
59
  return tokenizer, model
 
42
  # tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
43
  # model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
44
  # model_id = "deepseek-ai/deepseek-llm-7b-chat"
45
+ model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
46
+ # model_id = "mistralai/Mistral-7B-Instruct-v0.3"
47
+
48
 
49
  quantization_config = FineGrainedFP8Config()
50
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
55
  device_map="auto",
56
  torch_dtype=torch.float16,
57
  # quantization_config=quantization_config,
58
+ trust_remote_code = True,
59
+ attn_implementation="flash_attention_2"
60
  )
61
  # model.to("cpu")
62
  return tokenizer, model