Spaces:

opex792
/

whisper

Runtime error

App Files Files Community

opex792 commited on Apr 1

Commit

a7193be

verified ·

1 Parent(s): d049016

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -41

app.py CHANGED Viewed

@@ -4,7 +4,8 @@ import logging
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
 import torch
-import transformers
 import librosa
 import soundfile # Often needed by librosa/transformers for specific formats
 import numpy as np
@@ -14,39 +15,52 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # --- Configuration ---
-# IMPORTANT: whisper-large-v3 is very resource intensive.
-# Consider 'openai/whisper-base' or 'openai/whisper-small' for HF Free Spaces
 MODEL_NAME = "openai/whisper-large-v3"
-# Set device (use GPU if available, otherwise CPU)
-# Note: Free HF Spaces typically don't provide GPUs. CPU will be slow.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
-# Set torch dtype for potential speedup/memory saving (optional)
-# Use float16 on GPU if supported, otherwise float32
-TORCH_DTYPE = torch.float16 if torch.cuda.is_available() and DEVICE != "cpu" else torch.float32 # Ensure float32 on CPU
 logger.info(f"Using device: {DEVICE}")
 logger.info(f"Using dtype: {TORCH_DTYPE}")
-logger.info(f"Loading model: {MODEL_NAME}...")
-# --- Load the Model and Processor ---
-# Use try-except for robust model loading
 try:
-    # Load the pipeline
-    # Add model_kwargs to potentially help with loading, especially safetensors
-    pipe = transformers.pipeline(
         "automatic-speech-recognition",
-        model=MODEL_NAME,
         torch_dtype=TORCH_DTYPE,
         device=DEVICE,
-        model_kwargs={"low_cpu_mem_usage": True} # Helps with large models on CPU
-        # Note: use_safetensors=True is often default now if safetensors lib is installed
-        # If still failing, you could try loading model/processor explicitly first
     )
-    logger.info("Model loaded successfully.")
 except Exception as e:
-    logger.error(f"Error loading model: {e}", exc_info=True)
-    # Exit if model loading fails, as the app is useless without it
-    raise RuntimeError(f"Failed to load model {MODEL_NAME}") from e
 # --- FastAPI App ---
 app = FastAPI()
@@ -81,33 +95,26 @@ async def transcribe_audio(file: UploadFile = File(...)):
     # Process the audio file
     try:
         logger.info("Processing audio...")
-        # Use io.BytesIO to treat the byte content like a file
         audio_stream = io.BytesIO(contents)
-        # Load audio using librosa. Whisper expects 16kHz mono.
-        # Librosa automatically resamples and converts to mono by default.
-        # Specify sr=16000 to ensure the correct sample rate.
         audio_input, sample_rate = librosa.load(audio_stream, sr=16000, mono=True)
         logger.info(f"Audio loaded. Sample rate: {sample_rate}, Duration: {len(audio_input)/sample_rate:.2f}s")
-        # Ensure audio_input is a NumPy array (librosa usually returns this)
         if not isinstance(audio_input, np.ndarray):
              audio_input = np.array(audio_input)
         logger.info("Starting transcription...")
-        # Perform inference
-        # The pipeline can often handle numpy arrays directly
-        # Set chunk_length_s for potentially better handling of long files on limited memory
-        result = pipe(audio_input.copy(), # Pass a copy to avoid potential issues
                       chunk_length_s=30, # Process in 30-second chunks
-                      batch_size=4,      # Adjust batch size based on memory
-                      # Add generation config for more control if needed
-                      # generate_kwargs={"language": "en"} # Example: Force language
                      )
         transcription = result["text"]
         logger.info(f"Transcription successful for {filename}.")
-        # logger.debug(f"Transcription result: {transcription}") # Optional: log full text
         return JSONResponse(content={
             "filename": filename,
@@ -119,18 +126,23 @@ async def transcribe_audio(file: UploadFile = File(...)):
         raise HTTPException(status_code=400, detail=f"Error processing audio file: {e}. Ensure it's a valid audio format.")
     except Exception as e:
         logger.error(f"Transcription failed for {filename}: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
 # Optional: Add health check endpoint
 @app.get("/health")
 async def health_check():
     """ Health check endpoint """
     try:
-        if pipe:
              return {"status": "ok", "model": MODEL_NAME, "device": DEVICE}
         else:
-             # This state shouldn't be reachable if startup fails
-             return {"status": "error", "detail": "Model pipeline not loaded"}
     except Exception as e:
         logger.error(f"Health check failed: {e}", exc_info=True)
         return {"status": "error", "detail": str(e)}
@@ -139,4 +151,4 @@ async def health_check():
 if __name__ == "__main__":
     import uvicorn
     logger.info("Starting Uvicorn server locally...")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
 import torch
+# Import specific classes from transformers
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import librosa
 import soundfile # Often needed by librosa/transformers for specific formats
 import numpy as np
 logger = logging.getLogger(__name__)
 # --- Configuration ---
 MODEL_NAME = "openai/whisper-large-v3"
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
+# Ensure float32 on CPU, float16 on GPU if available
+TORCH_DTYPE = torch.float16 if torch.cuda.is_available() and DEVICE != "cpu" else torch.float32
 logger.info(f"Using device: {DEVICE}")
 logger.info(f"Using dtype: {TORCH_DTYPE}")
+logger.info(f"Loading model and processor for: {MODEL_NAME}...")
+# --- Load the Model and Processor Explicitly ---
 try:
+    logger.info("Loading model...")
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=TORCH_DTYPE,
+        low_cpu_mem_usage=True, # Crucial for large models on limited RAM
+        use_safetensors=True    # Explicitly use safetensors
+        # If using GPU and have flash-attn installed, could add:
+        # attn_implementation="flash_attention_2"
+    )
+    # Move model to device *after* loading with low_cpu_mem_usage
+    model.to(DEVICE)
+    logger.info("Model loaded successfully.")
+    logger.info("Loading processor...")
+    processor = AutoProcessor.from_pretrained(MODEL_NAME)
+    logger.info("Processor loaded successfully.")
+    logger.info("Creating pipeline...")
+    # Create the pipeline using the pre-loaded model and processor components
+    pipe = pipeline(
         "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
         torch_dtype=TORCH_DTYPE,
         device=DEVICE,
+        # Note: chunk_length_s and batch_size are inference-time args,
+        # better applied when calling pipe(), not during initialization.
     )
+    logger.info("Pipeline created successfully.")
 except Exception as e:
+    logger.error(f"Error during model/processor loading or pipeline creation: {e}", exc_info=True)
+    # Exit if loading fails
+    raise RuntimeError(f"Failed to initialize model pipeline for {MODEL_NAME}") from e
 # --- FastAPI App ---
 app = FastAPI()
     # Process the audio file
     try:
         logger.info("Processing audio...")
         audio_stream = io.BytesIO(contents)
+        # Load audio using librosa, ensuring 16kHz mono
         audio_input, sample_rate = librosa.load(audio_stream, sr=16000, mono=True)
         logger.info(f"Audio loaded. Sample rate: {sample_rate}, Duration: {len(audio_input)/sample_rate:.2f}s")
         if not isinstance(audio_input, np.ndarray):
              audio_input = np.array(audio_input)
         logger.info("Starting transcription...")
+        # Perform inference using the pipeline
+        # Apply chunking and batching here for inference
+        result = pipe(audio_input.copy(),
                       chunk_length_s=30, # Process in 30-second chunks
+                      batch_size=4,      # Adjust based on memory (start low, e.g., 1 or 2 for free tier)
+                      return_timestamps=False # Set to True or "word" if needed
+                      # generate_kwargs={"language": "en"} # Optional: Force language
                      )
         transcription = result["text"]
         logger.info(f"Transcription successful for {filename}.")
         return JSONResponse(content={
             "filename": filename,
         raise HTTPException(status_code=400, detail=f"Error processing audio file: {e}. Ensure it's a valid audio format.")
     except Exception as e:
         logger.error(f"Transcription failed for {filename}: {e}", exc_info=True)
+        # Check for common errors like Out of Memory (OOM)
+        if "out of memory" in str(e).lower():
+             logger.error("Potential Out-of-Memory error. The model might be too large for the available resources.")
+             raise HTTPException(status_code=507, detail=f"Transcription failed: Insufficient Memory. Try a smaller model or shorter audio.")
+        else:
+             raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
 # Optional: Add health check endpoint
 @app.get("/health")
 async def health_check():
     """ Health check endpoint """
     try:
+        # Check if the pipeline object exists and seems valid
+        if pipe and pipe.model and pipe.processor:
              return {"status": "ok", "model": MODEL_NAME, "device": DEVICE}
         else:
+             return {"status": "error", "detail": "Model pipeline component missing"}
     except Exception as e:
         logger.error(f"Health check failed: {e}", exc_info=True)
         return {"status": "error", "detail": str(e)}
 if __name__ == "__main__":
     import uvicorn
     logger.info("Starting Uvicorn server locally...")
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True) # Added reload for local dev