Spaces:
Sleeping
Sleeping
Sofia Casadei
commited on
Commit
·
9a3524a
1
Parent(s):
91537f8
check cuda version
Browse files- Dockerfile +0 -4
- main.py +3 -2
- utils/device.py +15 -0
Dockerfile
CHANGED
@@ -45,9 +45,5 @@ COPY --chown=user . .
|
|
45 |
# Expose FastRTC port (matches HF Spaces default)
|
46 |
EXPOSE 7860
|
47 |
|
48 |
-
# Check gpu and cuda
|
49 |
-
RUN python -c "import torch; print(torch.cuda.is_available())"
|
50 |
-
RUN nvidia-smi
|
51 |
-
|
52 |
# Start the application using uvicorn (FastAPI)
|
53 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
45 |
# Expose FastRTC port (matches HF Spaces default)
|
46 |
EXPOSE 7860
|
47 |
|
|
|
|
|
|
|
|
|
48 |
# Start the application using uvicorn (FastAPI)
|
49 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
CHANGED
@@ -23,7 +23,7 @@ from transformers import (
|
|
23 |
from transformers.utils import is_flash_attn_2_available
|
24 |
|
25 |
from utils.logger_config import setup_logging
|
26 |
-
from utils.device import get_device, get_torch_and_np_dtypes
|
27 |
from utils.turn_server import get_rtc_credentials
|
28 |
|
29 |
|
@@ -36,11 +36,12 @@ device = get_device(force_cpu=False)
|
|
36 |
torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
|
37 |
logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
|
38 |
|
|
|
|
|
39 |
|
40 |
attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
|
41 |
logger.info(f"Using attention: {attention}")
|
42 |
|
43 |
-
|
44 |
model_id = "openai/whisper-large-v3-turbo"
|
45 |
logger.info(f"Loading Whisper model: {model_id}")
|
46 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
|
23 |
from transformers.utils import is_flash_attn_2_available
|
24 |
|
25 |
from utils.logger_config import setup_logging
|
26 |
+
from utils.device import get_device, get_torch_and_np_dtypes, cuda_version_check
|
27 |
from utils.turn_server import get_rtc_credentials
|
28 |
|
29 |
|
|
|
36 |
torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
|
37 |
logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
|
38 |
|
39 |
+
cuda_version, device_name = cuda_version_check()
|
40 |
+
logger.info(f"CUDA Version: {cuda_version}, GPU Device: {device_name}")
|
41 |
|
42 |
attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
|
43 |
logger.info(f"Using attention: {attention}")
|
44 |
|
|
|
45 |
model_id = "openai/whisper-large-v3-turbo"
|
46 |
logger.info(f"Loading Whisper model: {model_id}")
|
47 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
utils/device.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import torch
|
2 |
import numpy as np
|
|
|
3 |
|
4 |
def get_device(force_cpu=False):
|
5 |
if force_cpu:
|
@@ -23,3 +24,17 @@ def get_torch_and_np_dtypes(device, use_bfloat16=False):
|
|
23 |
torch_dtype = torch.float32
|
24 |
np_dtype = np.float32
|
25 |
return torch_dtype, np_dtype
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import torch
|
2 |
import numpy as np
|
3 |
+
import subprocess
|
4 |
|
5 |
def get_device(force_cpu=False):
|
6 |
if force_cpu:
|
|
|
24 |
torch_dtype = torch.float32
|
25 |
np_dtype = np.float32
|
26 |
return torch_dtype, np_dtype
|
27 |
+
|
28 |
+
def cuda_version_check():
|
29 |
+
if torch.cuda.is_available():
|
30 |
+
try:
|
31 |
+
cuda_runtime = subprocess.check_output(["nvcc", "--version"]).decode()
|
32 |
+
cuda_version = cuda_runtime.split()[-2]
|
33 |
+
except Exception:
|
34 |
+
# Fallback to PyTorch's built-in version if nvcc isn't available
|
35 |
+
cuda_version = torch.version.cuda
|
36 |
+
|
37 |
+
device_name = torch.cuda.get_device_name(0)
|
38 |
+
return cuda_version, device_name
|
39 |
+
else:
|
40 |
+
return None, None
|