Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spaces
|
2 |
+
from kokoro import KModel, KPipeline
|
3 |
+
import gradio as gr
|
4 |
+
import os
|
5 |
+
import random
|
6 |
+
import torch
|
7 |
+
|
8 |
+
# GPU Kullanımını KAPATIYORUZ
|
9 |
+
CUDA_AVAILABLE = False # GPU'yu tamamen devre dışı bırak
|
10 |
+
CHAR_LIMIT = 5000 # Karakter limiti (isteğe bağlı)
|
11 |
+
|
12 |
+
# Modelleri Yükle (SADECE CPU Kullanacak)
|
13 |
+
models = {False: KModel().to('cpu').eval()}
|
14 |
+
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
|
15 |
+
pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
|
16 |
+
pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
|
17 |
+
|
18 |
+
# GPU Fonksiyonunu Kaldırıyoruz
|
19 |
+
def forward_cpu(ps, ref_s, speed):
|
20 |
+
return models[False](ps, ref_s, speed)
|
21 |
+
|
22 |
+
# Ses Üretme Fonksiyonu (Tamamen CPU Kullanır)
|
23 |
+
def generate_first(text, voice='af_heart', speed=1):
|
24 |
+
text = text.strip()[:CHAR_LIMIT] # Karakter limitini uygula
|
25 |
+
pipeline = pipelines[voice[0]]
|
26 |
+
pack = pipeline.load_voice(voice)
|
27 |
+
|
28 |
+
for _, ps, _ in pipeline(text, voice, speed):
|
29 |
+
ref_s = pack[len(ps)-1]
|
30 |
+
try:
|
31 |
+
audio = forward_cpu(ps, ref_s, speed) # SADECE CPU Kullan
|
32 |
+
except Exception as e:
|
33 |
+
raise gr.Error(e)
|
34 |
+
return (24000, audio.numpy()), ps
|
35 |
+
return None, ''
|
36 |
+
|
37 |
+
# Arena API
|
38 |
+
def predict(text, voice='af_heart', speed=1):
|
39 |
+
return generate_first(text, voice, speed)[0]
|
40 |
+
|
41 |
+
# Tokenizasyon Fonksiyonu
|
42 |
+
def tokenize_first(text, voice='af_heart'):
|
43 |
+
pipeline = pipelines[voice[0]]
|
44 |
+
for _, ps, _ in pipeline(text, voice):
|
45 |
+
return ps
|
46 |
+
return ''
|
47 |
+
|
48 |
+
# Akış Modu İçin CPU Kullanımı
|
49 |
+
def generate_all(text, voice='af_heart', speed=1):
|
50 |
+
text = text.strip()[:CHAR_LIMIT]
|
51 |
+
pipeline = pipelines[voice[0]]
|
52 |
+
pack = pipeline.load_voice(voice)
|
53 |
+
|
54 |
+
for _, ps, _ in pipeline(text, voice, speed):
|
55 |
+
ref_s = pack[len(ps)-1]
|
56 |
+
try:
|
57 |
+
audio = forward_cpu(ps, ref_s, speed) # SADECE CPU Kullan
|
58 |
+
except Exception as e:
|
59 |
+
raise gr.Error(e)
|
60 |
+
yield 24000, audio.numpy()
|
61 |
+
|
62 |
+
# Rastgele Metin Getirme
|
63 |
+
random_texts = {'en': ["Hello, this is a test.", "How are you?", "Welcome to Kokoro!"]}
|
64 |
+
def get_random_text(voice):
|
65 |
+
return random.choice(random_texts['en'])
|
66 |
+
|
67 |
+
# Ses Seçenekleri
|
68 |
+
CHOICES = {
|
69 |
+
'🇺🇸 🚺 Heart ❤️': 'af_heart',
|
70 |
+
'🇺🇸 🚺 Bella 🔥': 'af_bella',
|
71 |
+
'🇺🇸 🚺 Nicole 🎧': 'af_nicole',
|
72 |
+
'🇺🇸 🚹 Michael': 'am_michael',
|
73 |
+
'🇬🇧 🚺 Emma': 'bf_emma',
|
74 |
+
'🇬🇧 🚹 George': 'bm_george',
|
75 |
+
}
|
76 |
+
for v in CHOICES.values():
|
77 |
+
pipelines[v[0]].load_voice(v)
|
78 |
+
|
79 |
+
# Token Bilgilendirme
|
80 |
+
TOKEN_NOTE = '''
|
81 |
+
💡 Özelleştirilmiş telaffuz için /slashes/ kullanabilirsiniz.
|
82 |
+
'''
|
83 |
+
|
84 |
+
# Arayüz - Generate Sekmesi
|
85 |
+
with gr.Blocks() as generate_tab:
|
86 |
+
out_audio = gr.Audio(label='Çıktı Sesi', interactive=False, autoplay=True)
|
87 |
+
generate_btn = gr.Button('Oluştur', variant='primary')
|
88 |
+
with gr.Accordion('Çıktı Tokenleri', open=True):
|
89 |
+
out_ps = gr.Textbox(interactive=False, show_label=False, info='Ses üretiminde kullanılan tokenler.')
|
90 |
+
tokenize_btn = gr.Button('Tokenize', variant='secondary')
|
91 |
+
|
92 |
+
# Arayüz - Akış Sekmesi
|
93 |
+
with gr.Blocks() as stream_tab:
|
94 |
+
out_stream = gr.Audio(label='Canlı Akış Sesi', interactive=False, streaming=True, autoplay=True)
|
95 |
+
with gr.Row():
|
96 |
+
stream_btn = gr.Button('Akışı Başlat', variant='primary')
|
97 |
+
stop_btn = gr.Button('Durdur', variant='stop')
|
98 |
+
|
99 |
+
# Ana Arayüz
|
100 |
+
BANNER_TEXT = '***Kokoro*** **82M parametreli açık kaynak TTS modelidir.**'
|
101 |
+
with gr.Blocks() as app:
|
102 |
+
with gr.Row():
|
103 |
+
gr.Markdown(BANNER_TEXT, container=True)
|
104 |
+
with gr.Row():
|
105 |
+
with gr.Column():
|
106 |
+
text = gr.Textbox(label='Metin Girişi', info=f"En fazla {CHAR_LIMIT} karakter")
|
107 |
+
with gr.Row():
|
108 |
+
voice = gr.Dropdown(list(CHOICES.items()), value='af_heart', label='Ses Seçimi')
|
109 |
+
speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='Konuşma Hızı')
|
110 |
+
random_btn = gr.Button('Rastgele Metin', variant='secondary')
|
111 |
+
with gr.Column():
|
112 |
+
gr.TabbedInterface([generate_tab, stream_tab], ['Oluştur', 'Akış'])
|
113 |
+
|
114 |
+
# Buton Bağlantıları
|
115 |
+
random_btn.click(fn=get_random_text, inputs=[voice], outputs=[text])
|
116 |
+
generate_btn.click(fn=generate_first, inputs=[text, voice, speed], outputs=[out_audio, out_ps])
|
117 |
+
tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps])
|
118 |
+
stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed], outputs=[out_stream])
|
119 |
+
stop_btn.click(fn=None, cancels=stream_event)
|
120 |
+
|
121 |
+
# Uygulamayı Başlat
|
122 |
+
if __name__ == '__main__':
|
123 |
+
app.queue().launch(show_api=True, ssr_mode=True)
|