IdoMachlev
commited on
Commit
·
a49a698
1
Parent(s):
d62e007
changed attention implementation to "sdpa" from default "eager"
Browse files- handler.py +3 -1
handler.py
CHANGED
@@ -13,7 +13,9 @@ class EndpointHandler():
|
|
13 |
model_id = "openai/whisper-large-v3-turbo"
|
14 |
|
15 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
16 |
-
model_id, torch_dtype=torch_dtype,
|
|
|
|
|
17 |
)
|
18 |
model.to(device)
|
19 |
|
|
|
13 |
model_id = "openai/whisper-large-v3-turbo"
|
14 |
|
15 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
16 |
+
model_id, torch_dtype=torch_dtype,
|
17 |
+
low_cpu_mem_usage=True, use_safetensors=True,
|
18 |
+
attn_implementation="sdpa"
|
19 |
)
|
20 |
model.to(device)
|
21 |
|