Spaces:

MalikIbrar
/

whisper-fastapi

Sleeping

App Files Files Community

MalikIbrar commited on Sep 14

Commit

c41e64b

•

1 Parent(s): 1d22b69

Add application file

Browse files

Files changed (2) hide show

main.py +83 -0
requirements.txt +13 -0

main.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import torch
+import uvicorn
+import librosa
+import soundfile as sf
+from fastapi.middleware.cors import CORSMiddleware
+import os
+import tempfile
+# Initialize FastAPI
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=['*'],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Load the model and processor
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_id = "openai/whisper-large-v3"
+# Check if model exists locally, otherwise download it
+if not os.path.exists(f"./{model_id}"):
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
+    processor = AutoProcessor.from_pretrained(model_id)
+else:
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(f"./{model_id}", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
+    processor = AutoProcessor.from_pretrained(f"./{model_id}")
+model.to(device)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+# API endpoint to upload audio and get the transcribed text
+@app.post("/transcribe")
+async def transcribe_audio(file: UploadFile = File(...)):
+    try:
+        # Create a temporary file to save the uploaded content
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            temp_file.write(await file.read())
+            temp_path = temp_file.name
+        # Load the audio file using librosa
+        audio, sr = librosa.load(temp_path, sr=16000)
+        # Convert to a format that the model can process (in case the file needs reformatting)
+        processed_path = temp_path  # Reuse temp file if format is already correct
+        sf.write(processed_path, audio, 16000)
+        # Pass the processed audio to the pipeline
+        result = pipe(processed_path)
+        # Remove the temp file after processing
+        os.remove(temp_path)
+        # Return the transcribed text
+        return {"text": result["text"]}
+    except Exception as e:
+        # Clean up temp file in case of error
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+        raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the speech-to-text API!"}
+# Running FastAPI with Uvicorn
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+accelerate==0.34.2
+aiohttp==3.10.5
+attrs==24.2.0
+fastapi==0.114.1
+librosa==0.10.2.post1
+numpy==1.26.4
+pandas==2.1.4
+requests==2.32.3
+soundfile==0.12.1
+torch==2.4.1
+transformers==4.44.2
+uvicorn==0.30.6
+python-multipart