Spaces:

MalikIbrar
/

whisper-fastapi

Sleeping

App Files Files Community

whisper-fastapi / main.py

MalikIbrar

hello

fe4fe8c 10 months ago

raw

history blame contribute delete

2.08 kB

	from fastapi import FastAPI, File, UploadFile, HTTPException
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import torch
	import uvicorn
	from fastapi.middleware.cors import CORSMiddleware
	import os
	# Initialize FastAPI
	app = FastAPI()
	app.add_middleware(
	CORSMiddleware,
	allow_origins=['*'],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Load the model and processor
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model_id = "openai/whisper-large-v3"

	# Check if model exists locally, otherwise download it
	if not os.path.exists(f"./{model_id}"):
	model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
	processor = AutoProcessor.from_pretrained(model_id)
	else:
	model = AutoModelForSpeechSeq2Seq.from_pretrained(f"./{model_id}", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
	processor = AutoProcessor.from_pretrained(f"./{model_id}")

	model.to(device)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	# API endpoint to upload audio and get the transcribed text
	@app.post("/transcribe")
	async def transcribe_audio(file: UploadFile = File(...)):
	try:
	# Read the audio file bytes directly from the uploaded file
	audio_bytes = await file.read()

	# Pass the raw audio bytes to the pipeline
	result = pipe(audio_bytes)

	# Return the transcribed text
	return {"text": result["text"]}

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")

	@app.get("/")
	async def root():
	return {"message": "Welcome to the speech-to-text API!"}

	# Running FastAPI with Uvicorn
	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)