File size: 1,278 Bytes
94fb3e3
87b7198
 
 
 
 
 
 
 
 
 
 
 
d0fed62
87b7198
46c5cff
87b7198
fa37937
d0fed62
87b7198
38d8048
 
 
 
 
d0fed62
38d8048
87b7198
38d8048
46c5cff
 
 
87b7198
46c5cff
38d8048
 
 
46c5cff
 
705fa3c
87b7198
 
 
 
 
 
94fb3e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
<html>
    <head>
        <script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />
    </head>
    <body>
<gradio-lite>

<gradio-requirements>
transformers_js_py
</gradio-requirements>

<gradio-file name="app.py" entrypoint>
from transformers_js_py import import_transformers_js
import gradio as gr
import numpy as np

transformers_js = await import_transformers_js("3.0.2")
pipeline = transformers_js.pipeline

synthesizer = await pipeline(
    'text-to-speech',
    'Xenova/speecht5_tts',
    { "quantized": False }
)
speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';


async def synthesize(text):
    out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings });
    audio_data_memory_view = out["audio"]
    sampling_rate = out["sampling_rate"]

    audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32)
    audio_data_16bit = (audio_data * 32767).astype(np.int16)

    return sampling_rate, audio_data_16bit


demo = gr.Interface(synthesize, "textbox", "audio")
demo.launch()
</gradio-file>

</gradio-lite>

    </body>
</html>