Spaces:

sofdog
/

realtime-transcription

Sleeping

Sofia Casadei commited on Mar 9

Commit

9a3524a

1 Parent(s): 91537f8

check cuda version

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -45,9 +45,5 @@ COPY --chown=user . .
 # Expose FastRTC port (matches HF Spaces default)
 EXPOSE 7860
-# Check gpu and cuda
-RUN python -c "import torch; print(torch.cuda.is_available())"
-RUN nvidia-smi
 # Start the application using uvicorn (FastAPI)
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 # Expose FastRTC port (matches HF Spaces default)
 EXPOSE 7860
 # Start the application using uvicorn (FastAPI)
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py CHANGED Viewed

@@ -23,7 +23,7 @@ from transformers import (
 from transformers.utils import is_flash_attn_2_available
 from utils.logger_config import setup_logging
-from utils.device import get_device, get_torch_and_np_dtypes
 from utils.turn_server import get_rtc_credentials
@@ -36,11 +36,12 @@ device = get_device(force_cpu=False)
 torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
 logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
 attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
 logger.info(f"Using attention: {attention}")
 model_id = "openai/whisper-large-v3-turbo"
 logger.info(f"Loading Whisper model: {model_id}")
 model = AutoModelForSpeechSeq2Seq.from_pretrained(

 from transformers.utils import is_flash_attn_2_available
 from utils.logger_config import setup_logging
+from utils.device import get_device, get_torch_and_np_dtypes, cuda_version_check
 from utils.turn_server import get_rtc_credentials
 torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
 logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
+cuda_version, device_name = cuda_version_check()
+logger.info(f"CUDA Version: {cuda_version}, GPU Device: {device_name}")
 attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
 logger.info(f"Using attention: {attention}")
 model_id = "openai/whisper-large-v3-turbo"
 logger.info(f"Loading Whisper model: {model_id}")
 model = AutoModelForSpeechSeq2Seq.from_pretrained(

utils/device.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import torch
 import numpy as np
 def get_device(force_cpu=False):
     if force_cpu:
@@ -23,3 +24,17 @@ def get_torch_and_np_dtypes(device, use_bfloat16=False):
         torch_dtype = torch.float32
         np_dtype = np.float32
     return torch_dtype, np_dtype

 import torch
 import numpy as np
+import subprocess
 def get_device(force_cpu=False):
     if force_cpu:
         torch_dtype = torch.float32
         np_dtype = np.float32
     return torch_dtype, np_dtype
+def cuda_version_check():
+    if torch.cuda.is_available():
+        try:
+            cuda_runtime = subprocess.check_output(["nvcc", "--version"]).decode()
+            cuda_version = cuda_runtime.split()[-2]
+        except Exception:
+            # Fallback to PyTorch's built-in version if nvcc isn't available
+            cuda_version = torch.version.cuda
+        device_name = torch.cuda.get_device_name(0)
+        return cuda_version, device_name
+    else:
+        return None, None