pheme / app.py
taras-sereda's picture
app with requirements and voice samples
694ecc6
raw
history blame
1.72 kB
"""PUBLIC
Simple demo app.
Copyright PolyAI Limited.
"""
import time
from pathlib import Path
import gradio as gr
from transformer_infer import PhemeClient, parse_arguments
# TODO
VOICE_OPTIONS = [
"male_voice",
"POD1000000004_S0000246",
"POD1000000018_S0000253",
"POD1000000048_S0000035",
"YOU1000000006_S0000051",
"YOU1000000044_S0000798",
"empress",
]
args = parse_arguments()
model = PhemeClient(args)
def inference(
text,
voice,
top_k,
temperature
):
with open("PhemeVoice.log", "a") as f:
f.write(f"{voice}: {text} \n")
start_time = time.time()
data = model.infer(
text, voice, top_k=top_k, temperature=temperature)
samplerate = 16_000
print("Time taken: ", time.time() - start_time)
yield (samplerate, data)
def main():
title = "Pheme"
description = """Model can generate a variety of conversational voices."""
text = gr.Textbox(
lines=3,
value="Our property has several wedding venues.",
label="Text",
)
voice = gr.Dropdown(
VOICE_OPTIONS, value="empress", label="Select voice:", type="value"
)
temperature = gr.Slider(minimum=.3, maximum=1.5, value=0.7, step=0.05)
top_k = gr.Slider(minimum=10, maximum=250, value=210)
output_audio = gr.Audio(label="audio:", autoplay=True)
interface = gr.Interface(
fn=inference,
inputs=[
text,
voice,
top_k,
temperature,
],
title=title,
description=description,
outputs=[output_audio],
)
interface.queue().launch(share=True)
if __name__ == "__main__":
main()