Spaces:
Runtime error
Runtime error
Commit
·
b517a28
1
Parent(s):
d052ee9
add coqui.ai voices
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import torch
|
|
6 |
from itertools import chain
|
7 |
import asyncio
|
8 |
from llama_cpp import Llama
|
|
|
9 |
|
10 |
from transformers import (
|
11 |
StoppingCriteriaList,
|
@@ -16,22 +17,31 @@ from transformers import (
|
|
16 |
# https://huggingface.co/gorkemgoknar
|
17 |
|
18 |
#Coqui V1 api render voice, you can also use XTTS
|
19 |
-
COQUI_URL="https://app.coqui.ai/api/v2/samples"
|
|
|
20 |
### Warning each sample will consume your credits
|
21 |
-
COQUI_TOKEN="
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
STOP_LIST=["###","##"]
|
26 |
|
|
|
|
|
|
|
27 |
#stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=64)])
|
28 |
|
29 |
from huggingface_hub import hf_hub_download
|
30 |
hf_hub_download(repo_id="gorkemgoknar/llama2-7f-moviechatbot-ggml-q4", local_dir=".", filename="llama2-7f-fp16-ggml-q4.bin")
|
31 |
model_path="./llama2-7f-fp16-ggml-q4.bin"
|
32 |
|
33 |
-
|
34 |
|
|
|
35 |
|
36 |
# to use with ctransfomers
|
37 |
#llm = AutoModelForCausalLM.from_pretrained("gorkemgoknar/llama2-7f-moviechatbot-ggml-q4",
|
@@ -51,20 +61,32 @@ llm = Llama(model_path=model_path,n_gpu_layers=0, n_ctx=256,n_batch=256)
|
|
51 |
#first you need to create clone voice for characters
|
52 |
|
53 |
voices = {}
|
54 |
-
voices["Gerald"]="
|
55 |
-
voices["Vader"]="
|
56 |
-
voices["Batman"]="
|
57 |
-
voices["Gandalf"]="
|
58 |
-
voices["Morpheus"]="
|
59 |
-
voices["Neo"]="
|
60 |
-
voices["Ig-11"]="
|
61 |
-
voices["Tony Stark"]="
|
62 |
-
voices["Kirk"]="
|
63 |
-
voices["Spock"]="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
def get_audio_url(text,character):
|
66 |
url = COQUI_URL
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
68 |
# voice id of "Baldur Sanjin" from buildin coqui.ai speakers
|
69 |
# more via https://docs.coqui.ai/reference/speakers_retrieve
|
70 |
payload = {
|
@@ -72,6 +94,7 @@ def get_audio_url(text,character):
|
|
72 |
"text": f"{text}",
|
73 |
"emotion": "Neutral", ## You can set Angry, Surprise etc on V1 api.. XTTS auto understands it
|
74 |
"speed": 1,
|
|
|
75 |
}
|
76 |
headers = {
|
77 |
"accept": "application/json",
|
@@ -81,7 +104,8 @@ def get_audio_url(text,character):
|
|
81 |
|
82 |
response = requests.post(url, json=payload, headers=headers)
|
83 |
res = json.loads(response.text)
|
84 |
-
|
|
|
85 |
return res["audio_url"]
|
86 |
|
87 |
|
@@ -158,8 +182,12 @@ def get_answer_from_response(text,character):
|
|
158 |
response= response.split("sierpien")[0] # weird, sierp
|
159 |
response= response.split("\n")[0] # cut at end of line
|
160 |
response= re.split("sierp.+\d+", response)[0] # comes as sierpina 2018 something something
|
161 |
-
|
|
|
|
|
162 |
response= response.split(":")[0]
|
|
|
|
|
163 |
return response
|
164 |
|
165 |
def run_chatter(num_repeat=2, character="kirk",human_character="Mr. Sulu",context="Captain Kirk from U.S.S. Enterprise",
|
@@ -220,7 +248,7 @@ def run_chatter(num_repeat=2, character="kirk",human_character="Mr. Sulu",contex
|
|
220 |
resp_answer = get_answer_from_response(response,human_character)
|
221 |
|
222 |
if withaudio:
|
223 |
-
#
|
224 |
response_audio_url = get_audio_url(resp_answer)
|
225 |
audio_urls.append(response_audio_url)
|
226 |
|
@@ -259,9 +287,24 @@ css="""
|
|
259 |
|
260 |
"""
|
261 |
|
262 |
-
WITH_AUDIO=False
|
263 |
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
print(f"{char1} talks to {char2}")
|
266 |
|
267 |
history = None
|
@@ -273,7 +316,7 @@ async def add_text(char1,char2,runs,context,initial_question,history):
|
|
273 |
initial_question = unnamed_question
|
274 |
if initial_question=="":
|
275 |
initial_question = unnamed_question
|
276 |
-
for i in range(int(runs)
|
277 |
print("char1:",char1," :", initial_question)
|
278 |
returned_history += char1 + " : " + initial_question + "\n"
|
279 |
|
@@ -285,7 +328,7 @@ async def add_text(char1,char2,runs,context,initial_question,history):
|
|
285 |
withaudio=False,
|
286 |
history=history,
|
287 |
answer=last_question,
|
288 |
-
debug_print=
|
289 |
add_answer_to_history=False
|
290 |
)
|
291 |
|
@@ -293,10 +336,41 @@ async def add_text(char1,char2,runs,context,initial_question,history):
|
|
293 |
returned_history += char2 + " : " + last_answer + "\n"
|
294 |
# add last answer to history
|
295 |
history = history + "#" +initial_question + "#"+ last_answer
|
296 |
-
|
297 |
-
if WITH_AUDIO:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
char1_audio_url= get_audio_url(initial_question,char1)
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
|
|
|
|
|
300 |
audios = (
|
301 |
gr.Audio.update() ,
|
302 |
gr.Audio.update() ,
|
@@ -307,34 +381,37 @@ async def add_text(char1,char2,runs,context,initial_question,history):
|
|
307 |
gr.Audio.update() ,
|
308 |
gr.Audio.update()
|
309 |
)
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
audios[i*2] = gr.Audio.update(char1_audio_url, visible=True,label=str(i*2 )+"_"+char1)
|
318 |
audios[i*2 + 1] = gr.Audio.update(char2_audio_url, visible=True,label=str(i*2 + 1)+"_"+char2)
|
319 |
-
|
320 |
-
audios = tuple(audios)
|
321 |
-
|
322 |
-
#This needs to be last before yield
|
323 |
-
initial_question=last_question
|
324 |
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
330 |
|
331 |
|
332 |
|
333 |
|
334 |
history=None
|
335 |
#some selected ones are in for demo use (there are more, get a copy and try it , just do not expect much with this fast finetuned model)
|
336 |
-
CHARACTER_1_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader",
|
337 |
-
CHARACTER_2_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader",
|
338 |
|
339 |
|
340 |
CONTEXT_CHOICES = ["talks friendly",
|
@@ -350,15 +427,18 @@ EXAMPLE_INITIALS=["I challenge you to battle of words!",
|
|
350 |
"how much would a woodchuck chuck if a woodchuck could chuck wood?",
|
351 |
"The world is changing.",
|
352 |
"What do you think about AI?",
|
353 |
-
"Are you real?",
|
354 |
"I went to the supermarket yesterday.",
|
355 |
"Who are you?",
|
356 |
-
"I am richer than you!"
|
357 |
-
|
|
|
|
|
|
|
|
|
358 |
RUN_COUNT = [2,3,4]
|
359 |
|
360 |
-
title = "Metayazar - Movie Chatbot Llama Finetuned"
|
361 |
-
description = "
|
362 |
article = "<p style='text-align: center'><a href='https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/' target='_blank'>AI Goes to Job Interview</a> | <a href='https://www.metayazar.com/' target='_blank'>Metayazar AI Writer</a> |<a href='https://www.linkedin.com/in/goknar/' target='_blank'>Görkem Göknar</a></p>"
|
363 |
|
364 |
|
@@ -377,15 +457,33 @@ def change_run_count(run_count):
|
|
377 |
return_list.append( gr.Audio.update( visible=visible_audios[i]) )
|
378 |
|
379 |
return return_list
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
with gr.Blocks(css=css) as interface:
|
383 |
-
|
|
|
|
|
|
|
384 |
with gr.Row():
|
385 |
drop_char1 = gr.components.Dropdown(CHARACTER_1_CHOICES,label="Character 1",value=CHARACTER_1_CHOICES[0])
|
386 |
drop_char2 = gr.components.Dropdown(CHARACTER_2_CHOICES,label="Character 2",value=CHARACTER_2_CHOICES[1])
|
387 |
run_count = gr.components.Dropdown(RUN_COUNT,label="Line count per character",value="2")
|
388 |
context_choice = gr.components.Dropdown(CONTEXT_CHOICES, label="Context",value=CONTEXT_CHOICES[0])
|
|
|
|
|
389 |
with gr.Row():
|
390 |
txt = gr.Textbox(
|
391 |
show_label=False,
|
@@ -402,8 +500,8 @@ with gr.Blocks(css=css) as interface:
|
|
402 |
label="History",
|
403 |
placeholder="History",
|
404 |
).style(height=50)
|
405 |
-
|
406 |
-
|
407 |
audio1 = gr.Audio(elem_id="audio1",elem_classes="audio",autoplay=False,visible=False)
|
408 |
audio2 = gr.Audio(elem_id="audio2",elem_classes="audio",autoplay=False,visible=False)
|
409 |
audio3 = gr.Audio(elem_id="audio3",elem_classes="audio",autoplay=False,visible=False)
|
@@ -413,15 +511,13 @@ with gr.Blocks(css=css) as interface:
|
|
413 |
audio7 = gr.Audio(elem_id="audio7",elem_classes="audio",autoplay=False,visible=False)
|
414 |
audio8 = gr.Audio(elem_id="audio8",elem_classes="audio",autoplay=False,visible=False)
|
415 |
|
416 |
-
|
417 |
-
|
418 |
-
submit_btn.click(add_text, [drop_char1, drop_char2,run_count, context_choice, txt,history], [txt,history,audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8], api_name="chat")
|
419 |
-
else:
|
420 |
-
# no audio returned
|
421 |
-
submit_btn.click(add_text, [drop_char1, drop_char2,run_count, context_choice, txt,history], [txt,history], api_name="chat")
|
422 |
|
423 |
-
|
|
|
|
|
424 |
|
425 |
interface.queue().launch()
|
426 |
|
427 |
-
|
|
|
6 |
from itertools import chain
|
7 |
import asyncio
|
8 |
from llama_cpp import Llama
|
9 |
+
import datetime
|
10 |
|
11 |
from transformers import (
|
12 |
StoppingCriteriaList,
|
|
|
17 |
# https://huggingface.co/gorkemgoknar
|
18 |
|
19 |
#Coqui V1 api render voice, you can also use XTTS
|
20 |
+
#COQUI_URL="https://app.coqui.ai/api/v2/samples"
|
21 |
+
COQUI_URL="https://app.coqui.ai/api/v2/samples/multilingual/render/"
|
22 |
### Warning each sample will consume your credits
|
23 |
+
COQUI_TOKEN=os.environ.get("COQUI_TOKEN")
|
24 |
|
25 |
+
PER_RUN_MAX_VOICE=int( os.environ.get("PER_RUN_MAX_VOICE") )
|
26 |
+
PER_RUN_COUNTER=0
|
27 |
+
RUN_START_HOUR=datetime.datetime.now().hour
|
28 |
+
|
29 |
+
MAX_NEW_TOKENS = 35
|
30 |
+
GPU_LAYERS = 20
|
31 |
STOP_LIST=["###","##"]
|
32 |
|
33 |
+
LLAMA_VERBOSE=False
|
34 |
+
|
35 |
+
|
36 |
#stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=64)])
|
37 |
|
38 |
from huggingface_hub import hf_hub_download
|
39 |
hf_hub_download(repo_id="gorkemgoknar/llama2-7f-moviechatbot-ggml-q4", local_dir=".", filename="llama2-7f-fp16-ggml-q4.bin")
|
40 |
model_path="./llama2-7f-fp16-ggml-q4.bin"
|
41 |
|
42 |
+
import langid
|
43 |
|
44 |
+
llm = Llama(model_path=model_path,n_gpu_layers=0, n_ctx=256,n_batch=256,verbose=LLAMA_VERBOSE)
|
45 |
|
46 |
# to use with ctransfomers
|
47 |
#llm = AutoModelForCausalLM.from_pretrained("gorkemgoknar/llama2-7f-moviechatbot-ggml-q4",
|
|
|
61 |
#first you need to create clone voice for characters
|
62 |
|
63 |
voices = {}
|
64 |
+
voices["Gerald"]=os.environ.get("VOICE_ID_GERALD")
|
65 |
+
voices["Vader"]=os.environ.get("VOICE_ID_VADER")
|
66 |
+
voices["Batman"]=os.environ.get("VOICE_ID_BATMAN")
|
67 |
+
voices["Gandalf"]=os.environ.get("VOICE_ID_GANDALF")
|
68 |
+
voices["Morpheus"]=os.environ.get("VOICE_ID_MORPHEUS")
|
69 |
+
voices["Neo"]=os.environ.get("VOICE_ID_NEO")
|
70 |
+
voices["Ig-11"]=os.environ.get("VOICE_ID_IG11")
|
71 |
+
voices["Tony Stark"]=os.environ.get("VOICE_ID_TONY")
|
72 |
+
voices["Kirk"]=os.environ.get("VOICE_ID_KIRK")
|
73 |
+
voices["Spock"]=os.environ.get("VOICE_ID_SPOCK")
|
74 |
+
voices["Don"]=os.environ.get("VOICE_ID_DON")
|
75 |
+
voices["Morgan"]=os.environ.get("VOICE_ID_MORGAN")
|
76 |
+
voices["Yoda"]=os.environ.get("VOICE_ID_YODA")
|
77 |
+
voices["Ian"]=os.environ.get("VOICE_ID_IAN")
|
78 |
+
voices["Thanos"]=os.environ.get("VOICE_ID_THANOS")
|
79 |
+
|
80 |
+
|
81 |
|
82 |
def get_audio_url(text,character):
|
83 |
url = COQUI_URL
|
84 |
+
text_language=langid.classify(text)[0]
|
85 |
+
|
86 |
+
supported_languages=["en","de","fr","es","it","pt","pl"]
|
87 |
+
if text_language not in supported_languages:
|
88 |
+
text_language="en"
|
89 |
+
|
90 |
# voice id of "Baldur Sanjin" from buildin coqui.ai speakers
|
91 |
# more via https://docs.coqui.ai/reference/speakers_retrieve
|
92 |
payload = {
|
|
|
94 |
"text": f"{text}",
|
95 |
"emotion": "Neutral", ## You can set Angry, Surprise etc on V1 api.. XTTS auto understands it
|
96 |
"speed": 1,
|
97 |
+
"language": text_language
|
98 |
}
|
99 |
headers = {
|
100 |
"accept": "application/json",
|
|
|
104 |
|
105 |
response = requests.post(url, json=payload, headers=headers)
|
106 |
res = json.loads(response.text)
|
107 |
+
print("Character:",character, "text:",text,)
|
108 |
+
print("Audio response",res)
|
109 |
return res["audio_url"]
|
110 |
|
111 |
|
|
|
182 |
response= response.split("sierpien")[0] # weird, sierp
|
183 |
response= response.split("\n")[0] # cut at end of line
|
184 |
response= re.split("sierp.+\d+", response)[0] # comes as sierpina 2018 something something
|
185 |
+
response= re.split("styczen.+\d+", response)[0] # comes as styczen 2018 something something
|
186 |
+
response= re.split("kwierk.+\d+", response)[0] # comes as kwierk 2018 something something
|
187 |
+
|
188 |
response= response.split(":")[0]
|
189 |
+
if response.startswith('"'):
|
190 |
+
response= response[1:]
|
191 |
return response
|
192 |
|
193 |
def run_chatter(num_repeat=2, character="kirk",human_character="Mr. Sulu",context="Captain Kirk from U.S.S. Enterprise",
|
|
|
248 |
resp_answer = get_answer_from_response(response,human_character)
|
249 |
|
250 |
if withaudio:
|
251 |
+
# No use.. running on main
|
252 |
response_audio_url = get_audio_url(resp_answer)
|
253 |
audio_urls.append(response_audio_url)
|
254 |
|
|
|
287 |
|
288 |
"""
|
289 |
|
|
|
290 |
|
291 |
+
def get_per_run_voice_counter(increase=False):
|
292 |
+
hour_now = datetime.datetime.now().hour
|
293 |
+
global PER_RUN_COUNTER
|
294 |
+
|
295 |
+
print("Per run check: Hour now:", hour_now, " RUN_START_HOUR:",RUN_START_HOUR," PER_RUN_COUNTER",PER_RUN_COUNTER)
|
296 |
+
if hour_now>RUN_START_HOUR:
|
297 |
+
#reset hourly voice calls
|
298 |
+
print("resetting per run voice calls")
|
299 |
+
PER_RUN_COUNTER = 0
|
300 |
+
elif increase:
|
301 |
+
PER_RUN_COUNTER = PER_RUN_COUNTER + 1
|
302 |
+
print("per run voice calls:", PER_RUN_COUNTER)
|
303 |
+
print("Per run check: Hour now:", hour_now, " RUN_START_HOUR:",RUN_START_HOUR," PER_RUN_COUNTER",PER_RUN_COUNTER)
|
304 |
+
return PER_RUN_COUNTER
|
305 |
+
|
306 |
+
|
307 |
+
async def add_text(WITH_AUDIO,char1,char2,runs,context,initial_question,history,VOICE_COUNTER):
|
308 |
print(f"{char1} talks to {char2}")
|
309 |
|
310 |
history = None
|
|
|
316 |
initial_question = unnamed_question
|
317 |
if initial_question=="":
|
318 |
initial_question = unnamed_question
|
319 |
+
for i in range(int(runs)):
|
320 |
print("char1:",char1," :", initial_question)
|
321 |
returned_history += char1 + " : " + initial_question + "\n"
|
322 |
|
|
|
328 |
withaudio=False,
|
329 |
history=history,
|
330 |
answer=last_question,
|
331 |
+
debug_print=False,
|
332 |
add_answer_to_history=False
|
333 |
)
|
334 |
|
|
|
336 |
returned_history += char2 + " : " + last_answer + "\n"
|
337 |
# add last answer to history
|
338 |
history = history + "#" +initial_question + "#"+ last_answer
|
339 |
+
print("WITH_AUDIO",WITH_AUDIO)
|
340 |
+
if int(WITH_AUDIO):
|
341 |
+
use_voice=True
|
342 |
+
else:
|
343 |
+
use_voice=False
|
344 |
+
|
345 |
+
print("Voice Counter:",VOICE_COUNTER)
|
346 |
+
if initial_question=="..." and last_answer=="...":
|
347 |
+
use_voice=False
|
348 |
+
|
349 |
+
global PER_RUN_MAX_VOICE
|
350 |
+
if use_voice:
|
351 |
+
global PER_RUN_MAX_VOICE
|
352 |
+
can_use_voice=get_per_run_voice_counter()<PER_RUN_MAX_VOICE
|
353 |
+
if not can_use_voice:
|
354 |
+
print("Voice limit reached for this hour, try again in an hour")
|
355 |
+
gr.Warning("Hourly overal voice limit reached, try again in an hour... running without voice.")
|
356 |
+
use_voice=False
|
357 |
+
|
358 |
+
if use_voice and (VOICE_COUNTER>VOICE_LIMIT):
|
359 |
+
print("You have reached voiced limit, try with voice later.. running without voice")
|
360 |
+
gr.Warning("You have reached voiced limit.. running without voice")
|
361 |
+
use_voice=False
|
362 |
+
|
363 |
+
if use_voice:
|
364 |
char1_audio_url= get_audio_url(initial_question,char1)
|
365 |
+
VOICE_COUNTER+=1
|
366 |
+
get_per_run_voice_counter(increase=True)
|
367 |
+
|
368 |
+
char2_audio_url= get_audio_url(last_answer,char2)
|
369 |
+
VOICE_COUNTER+=1
|
370 |
+
get_per_run_voice_counter(increase=True)
|
371 |
|
372 |
+
print("Voice Counter:",VOICE_COUNTER)
|
373 |
+
if use_voice:
|
374 |
audios = (
|
375 |
gr.Audio.update() ,
|
376 |
gr.Audio.update() ,
|
|
|
381 |
gr.Audio.update() ,
|
382 |
gr.Audio.update()
|
383 |
)
|
384 |
+
else:
|
385 |
+
audios = (
|
386 |
+
gr.Audio.update(visible=False) ,
|
387 |
+
gr.Audio.update(visible=False) ,
|
388 |
+
gr.Audio.update(visible=False) ,
|
389 |
+
gr.Audio.update(visible=False) ,
|
390 |
+
gr.Audio.update(visible=False) ,
|
391 |
+
gr.Audio.update(visible=False) ,
|
392 |
+
gr.Audio.update(visible=False) ,
|
393 |
+
gr.Audio.update(visible=False)
|
394 |
+
)
|
395 |
+
audios = list(audios)
|
396 |
+
#should now do a loop
|
397 |
+
if use_voice:
|
398 |
audios[i*2] = gr.Audio.update(char1_audio_url, visible=True,label=str(i*2 )+"_"+char1)
|
399 |
audios[i*2 + 1] = gr.Audio.update(char2_audio_url, visible=True,label=str(i*2 + 1)+"_"+char2)
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
+
audios = tuple(audios)
|
402 |
+
|
403 |
+
#This needs to be last before yield
|
404 |
+
initial_question=last_question
|
405 |
+
|
406 |
+
yield gr.update(value=initial_question, interactive=True),returned_history, *audios, VOICE_COUNTER
|
407 |
|
408 |
|
409 |
|
410 |
|
411 |
history=None
|
412 |
#some selected ones are in for demo use (there are more, get a copy and try it , just do not expect much with this fast finetuned model)
|
413 |
+
CHARACTER_1_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader","Yoda","Ig-11","Tony Stark","Batman","Thanos"]
|
414 |
+
CHARACTER_2_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader","Yoda","Ig-11","Tony Stark","Batman","Thanos"]
|
415 |
|
416 |
|
417 |
CONTEXT_CHOICES = ["talks friendly",
|
|
|
427 |
"how much would a woodchuck chuck if a woodchuck could chuck wood?",
|
428 |
"The world is changing.",
|
429 |
"What do you think about AI?",
|
|
|
430 |
"I went to the supermarket yesterday.",
|
431 |
"Who are you?",
|
432 |
+
"I am richer than you!",
|
433 |
+
"Wie geht es dir?",
|
434 |
+
"O que você fez ontem?",
|
435 |
+
"Il fait trop chaud aujourd'hui."]
|
436 |
+
VOICE_CHOICES=["With Coqui.ai Voice",
|
437 |
+
"No voice"]
|
438 |
RUN_COUNT = [2,3,4]
|
439 |
|
440 |
+
title = "Metayazar - Movie Chatbot Llama Finetuned Voice powered by Coqui.ai"
|
441 |
+
description = "Auto-chat your favorite movie characters. Voice via Coqui.ai"
|
442 |
article = "<p style='text-align: center'><a href='https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/' target='_blank'>AI Goes to Job Interview</a> | <a href='https://www.metayazar.com/' target='_blank'>Metayazar AI Writer</a> |<a href='https://www.linkedin.com/in/goknar/' target='_blank'>Görkem Göknar</a></p>"
|
443 |
|
444 |
|
|
|
457 |
return_list.append( gr.Audio.update( visible=visible_audios[i]) )
|
458 |
|
459 |
return return_list
|
460 |
+
|
461 |
+
|
462 |
+
def switch_voice(with_voice, WITH_AUDIO,VOICE_COUNTER):
|
463 |
+
print("update use voice:",with_voice)
|
464 |
+
if (VOICE_COUNTER>VOICE_LIMIT) or (PER_RUN_COUNTER>PER_RUN_MAX_VOICE):
|
465 |
+
gr.Warning("Unfortunately voice limit is reached, try again after another time, or use without voice")
|
466 |
+
WITH_AUDIO=0
|
467 |
+
else:
|
468 |
+
if with_voice==VOICE_CHOICES[0]:
|
469 |
+
WITH_AUDIO=1
|
470 |
+
else:
|
471 |
+
WITH_AUDIO=0
|
472 |
+
|
473 |
+
return with_voice, WITH_AUDIO
|
474 |
|
475 |
with gr.Blocks(css=css) as interface:
|
476 |
+
VOICE_COUNTER=gr.State(value=0)
|
477 |
+
WITH_AUDIO=gr.State(value=1)
|
478 |
+
VOICE_LIMIT=os.environ.get("VOICE_LIMIT")
|
479 |
+
|
480 |
with gr.Row():
|
481 |
drop_char1 = gr.components.Dropdown(CHARACTER_1_CHOICES,label="Character 1",value=CHARACTER_1_CHOICES[0])
|
482 |
drop_char2 = gr.components.Dropdown(CHARACTER_2_CHOICES,label="Character 2",value=CHARACTER_2_CHOICES[1])
|
483 |
run_count = gr.components.Dropdown(RUN_COUNT,label="Line count per character",value="2")
|
484 |
context_choice = gr.components.Dropdown(CONTEXT_CHOICES, label="Context",value=CONTEXT_CHOICES[0])
|
485 |
+
with gr.Row():
|
486 |
+
with_voice = gr.components.Dropdown(VOICE_CHOICES,label="Voice via Coqui.ai (demo)",value=VOICE_CHOICES[0])
|
487 |
with gr.Row():
|
488 |
txt = gr.Textbox(
|
489 |
show_label=False,
|
|
|
500 |
label="History",
|
501 |
placeholder="History",
|
502 |
).style(height=50)
|
503 |
+
|
504 |
+
with gr.Column():
|
505 |
audio1 = gr.Audio(elem_id="audio1",elem_classes="audio",autoplay=False,visible=False)
|
506 |
audio2 = gr.Audio(elem_id="audio2",elem_classes="audio",autoplay=False,visible=False)
|
507 |
audio3 = gr.Audio(elem_id="audio3",elem_classes="audio",autoplay=False,visible=False)
|
|
|
511 |
audio7 = gr.Audio(elem_id="audio7",elem_classes="audio",autoplay=False,visible=False)
|
512 |
audio8 = gr.Audio(elem_id="audio8",elem_classes="audio",autoplay=False,visible=False)
|
513 |
|
514 |
+
with_voice.change(switch_voice,[with_voice,WITH_AUDIO,VOICE_COUNTER],[with_voice,WITH_AUDIO])
|
515 |
+
|
|
|
|
|
|
|
|
|
516 |
|
517 |
+
run_count.change(change_run_count,[run_count],[audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8])
|
518 |
+
submit_btn.click(add_text, [WITH_AUDIO,drop_char1, drop_char2,run_count, context_choice, txt,history,VOICE_COUNTER], [txt,history,audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8,VOICE_COUNTER], api_name="chat")
|
519 |
+
|
520 |
|
521 |
interface.queue().launch()
|
522 |
|
523 |
+
|