Nusri7's picture
Initial commit with FastAPI + Gradio app
8998902
raw
history blame
3.74 kB
import os
import torchaudio
import gradio as gr
import torch
from fastapi import FastAPI, HTTPException, File, UploadFile
from speechbrain.inference import SpeakerRecognition
from fastapi.responses import JSONResponse
# Initialize the speaker verification model
speaker_verification = SpeakerRecognition.from_hparams(
source="speechbrain/spkrec-ecapa-voxceleb",
savedir="tmp_model"
)
# Function to calculate similarity score
def get_similarity(audio1, audio2, sample_rate=16000):
try:
# Convert numpy arrays to tensors
signal1 = torch.tensor(audio1)
signal2 = torch.tensor(audio2)
# Make sure the signals are in the right shape (2D tensor: (1, N))
if signal1.ndimension() == 1:
signal1 = signal1.unsqueeze(0)
if signal2.ndimension() == 1:
signal2 = signal2.unsqueeze(0)
# Get similarity score and prediction
score, prediction = speaker_verification.verify_batch(signal1, signal2)
return float(score), "Yes" if prediction else "No"
except Exception as e:
return None, str(e) # Return error message if any exception
# API function to compare voices
def compare_voices(file1, file2):
try:
# Gradio Audio returns a tuple of (audio, sample_rate)
audio1, _ = file1 # Audio1 is a tuple (numpy_array, sample_rate)
audio2, _ = file2 # Audio2 is a tuple (numpy_array, sample_rate)
# Get similarity score
score, is_same_user = get_similarity(audio1, audio2)
if score is None:
# Return the error message if processing fails
return {"error": is_same_user}
# Return a dictionary with the similarity score and prediction
return {"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user}
except Exception as e:
# Handle unexpected errors
return {"error": str(e)}
# FastAPI app
app = FastAPI()
@app.post("/compare_voices/")
async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile = File(...)):
"""
Compare two audio files and return the similarity score and prediction.
"""
try:
# Process the audio files and return them as numpy arrays
file1_data = await file1.read()
file2_data = await file2.read()
# You need to process these byte strings into numpy arrays
# Assuming the audio is decoded into numpy arrays here (e.g., using torchaudio)
# For example:
# audio1 = torchaudio.load(io.BytesIO(file1_data))[0].numpy()
# audio2 = torchaudio.load(io.BytesIO(file2_data))[0].numpy()
return {"message": "Processing files directly without saving them."}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
# Gradio interface function
def gradio_interface():
return gr.Interface(
fn=compare_voices,
inputs=[
gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
],
outputs="json", # Output results as JSON
live=False # No live interface, just the API
)
# Launch Gradio as a web interface
@app.on_event("startup")
async def startup():
gr.Interface(fn=compare_voices, inputs=[
gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
], outputs="json", live=False).launch(share=True, inline=True)
# Running the FastAPI app with Gradio
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=5000)