|
import gradio as gr |
|
import torch |
|
from nemo.collections.asr.models import EncDecSpeakerLabelModel |
|
import json |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu" ) |
|
|
|
THRESHOLD = 0.60 |
|
|
|
model_name = "nvidia/speakerverification_en_titanet_large" |
|
model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device) |
|
|
|
def create_voice_print(audio): |
|
if not audio: |
|
return json.dumps({ "error": "no se proporciono un audio"}) |
|
|
|
embs1 = model.get_embedding(audio).squeeze() |
|
|
|
X = embs1 / torch.linalg.norm(embs1) |
|
|
|
|
|
return X |
|
|
|
def compare_voice_print(X, Y): |
|
|
|
similarity_score = torch.dot(X, Y) / ((torch.dot(X, X) * torch.dot(Y, Y)) ** 0.5) |
|
similarity_score = (similarity_score + 1) / 2 |
|
return similarity_score.item() |
|
|
|
|
|
def find_matches(file, voice_print): |
|
matches = [] |
|
if not file: |
|
return json.dumps({"error": "No se proporcionó un archivo JSON"}) |
|
|
|
try: |
|
|
|
json_content = json.load(open(file)) |
|
except json.JSONDecodeError: |
|
return json.dumps({"error": "El archivo JSON no es válido"}) |
|
|
|
data = json_content.get("data", []) |
|
|
|
|
|
voice_print = torch.tensor(json.loads(voice_print)) |
|
|
|
for speaker in data: |
|
speaker_voice_print = torch.tensor(json.loads(speaker['voice_print'])) |
|
|
|
similarity_score = compare_voice_print(voice_print, speaker_voice_print) |
|
print(similarity_score) |
|
if similarity_score >= THRESHOLD: |
|
matches.append({ "speaker": speaker, "similarity_score": similarity_score }) |
|
|
|
matches.sort(key=lambda match: match['similarity_score'], reverse=True) |
|
return matches[:3] |
|
|
|
|
|
voice_print_maker = gr.Interface( |
|
fn=create_voice_print, |
|
inputs=[gr.Audio(type="filepath")], |
|
outputs=gr.JSON(), |
|
) |
|
|
|
voice_prints_loader = gr.Interface( |
|
fn=find_matches, |
|
inputs=[ |
|
gr.File(type="filepath", label="Upload JSON file"), |
|
gr.TextArea() |
|
], |
|
outputs=gr.JSON(), |
|
) |
|
|
|
demo = gr.TabbedInterface([voice_print_maker, voice_prints_loader], ["app", "loader"]) |
|
|
|
demo.launch() |
|
|