Sofia Casadei commited on
Commit
9a3524a
·
1 Parent(s): 91537f8

check cuda version

Browse files
Files changed (3) hide show
  1. Dockerfile +0 -4
  2. main.py +3 -2
  3. utils/device.py +15 -0
Dockerfile CHANGED
@@ -45,9 +45,5 @@ COPY --chown=user . .
45
  # Expose FastRTC port (matches HF Spaces default)
46
  EXPOSE 7860
47
 
48
- # Check gpu and cuda
49
- RUN python -c "import torch; print(torch.cuda.is_available())"
50
- RUN nvidia-smi
51
-
52
  # Start the application using uvicorn (FastAPI)
53
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
45
  # Expose FastRTC port (matches HF Spaces default)
46
  EXPOSE 7860
47
 
 
 
 
 
48
  # Start the application using uvicorn (FastAPI)
49
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py CHANGED
@@ -23,7 +23,7 @@ from transformers import (
23
  from transformers.utils import is_flash_attn_2_available
24
 
25
  from utils.logger_config import setup_logging
26
- from utils.device import get_device, get_torch_and_np_dtypes
27
  from utils.turn_server import get_rtc_credentials
28
 
29
 
@@ -36,11 +36,12 @@ device = get_device(force_cpu=False)
36
  torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
37
  logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
38
 
 
 
39
 
40
  attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
41
  logger.info(f"Using attention: {attention}")
42
 
43
-
44
  model_id = "openai/whisper-large-v3-turbo"
45
  logger.info(f"Loading Whisper model: {model_id}")
46
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
 
23
  from transformers.utils import is_flash_attn_2_available
24
 
25
  from utils.logger_config import setup_logging
26
+ from utils.device import get_device, get_torch_and_np_dtypes, cuda_version_check
27
  from utils.turn_server import get_rtc_credentials
28
 
29
 
 
36
  torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
37
  logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
38
 
39
+ cuda_version, device_name = cuda_version_check()
40
+ logger.info(f"CUDA Version: {cuda_version}, GPU Device: {device_name}")
41
 
42
  attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
43
  logger.info(f"Using attention: {attention}")
44
 
 
45
  model_id = "openai/whisper-large-v3-turbo"
46
  logger.info(f"Loading Whisper model: {model_id}")
47
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
utils/device.py CHANGED
@@ -1,5 +1,6 @@
1
  import torch
2
  import numpy as np
 
3
 
4
  def get_device(force_cpu=False):
5
  if force_cpu:
@@ -23,3 +24,17 @@ def get_torch_and_np_dtypes(device, use_bfloat16=False):
23
  torch_dtype = torch.float32
24
  np_dtype = np.float32
25
  return torch_dtype, np_dtype
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import numpy as np
3
+ import subprocess
4
 
5
  def get_device(force_cpu=False):
6
  if force_cpu:
 
24
  torch_dtype = torch.float32
25
  np_dtype = np.float32
26
  return torch_dtype, np_dtype
27
+
28
+ def cuda_version_check():
29
+ if torch.cuda.is_available():
30
+ try:
31
+ cuda_runtime = subprocess.check_output(["nvcc", "--version"]).decode()
32
+ cuda_version = cuda_runtime.split()[-2]
33
+ except Exception:
34
+ # Fallback to PyTorch's built-in version if nvcc isn't available
35
+ cuda_version = torch.version.cuda
36
+
37
+ device_name = torch.cuda.get_device_name(0)
38
+ return cuda_version, device_name
39
+ else:
40
+ return None, None