File size: 2,646 Bytes
99cda37 4e56594 99cda37 d6f585d 4e56594 99cda37 44f0dfc 4e56594 44f0dfc 99cda37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from pathlib import Path
import gradio as gr
import pickle
import torchaudio
import torch
from speechbrain.inference.speaker import EncoderClassifier
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, collect_chunks
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")
with open("gender_classifier.pickle", "rb") as file:
gender_clf = pickle.load(file)
with open("height_estimator_1.pickle", "rb") as file:
male_clf = pickle.load(file)
with open("height_estimator_0.pickle", "rb") as file:
female_clf = pickle.load(file)
article_md = Path("Description.md")
error_message = "No speech detected or signal too short!"
def read_markdown_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
markdown_string = file.read()
return markdown_string
def metric_to_imperial(height):
inches = round(height / 2.54)
return f"{int(inches / 12)}'{inches % 12}\""
def get_speech(wav):
model = load_silero_vad()
speech_timestamps = get_speech_timestamps(wav, model)
return collect_chunks(speech_timestamps, wav)
def estimate_height(gender, vad, filepath, imperial):
if filepath is None:
return error_message
signal = read_audio(filepath)
if vad:
signal = get_speech(signal)
if len(signal) < 1:
return error_message
embedding = torch.squeeze(classifier.encode_batch(signal), 0)
if gender == "Detect" or gender is None:
gender = gender_clf.predict(embedding)
else:
gender = 1 if gender == "Male" else 0
height_estimator = male_clf if gender else female_clf
height = height_estimator.predict(embedding)[0]
if imperial:
height = metric_to_imperial(height)
else:
height = str(round(height)) + " cm"
return f"{'Male' if gender else 'Female'} {height}"
theme = gr.themes.Glass()
with gr.Blocks(theme=theme) as demo:
gr.Interface(
fn=estimate_height, inputs=[
gr.Radio(["Detect", "Male", "Female"], label="Gender of a speaker", value="Detect"),
gr.Checkbox(label="VAD", info="If there is a lot of silence in your audio, maybe try using VAD"),
gr.Audio(label="Audio", type="filepath"),
gr.Checkbox(label="Imperial units")
],
outputs=[gr.Label(label="Prediction")],
title="Speaker height estimator",
description="Demo of estimator trained using [HeightCeleb](https://github.com/stachu86/HeightCeleb) dataset",
allow_flagging="never",
article=read_markdown_file(article_md)
)
demo.launch(False, debug=True) |