Spaces:

gorkemgoknar
/

moviechatbot-v2

Runtime error

App Files Files Community

gorkemgoknar commited on Aug 17, 2023

Commit

b517a28

1 Parent(s): d052ee9

add coqui.ai voices

Browse files

Files changed (1) hide show

app.py +157 -61

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 from itertools import chain
 import asyncio
 from llama_cpp import Llama
 from transformers import (
     StoppingCriteriaList,
@@ -16,22 +17,31 @@ from transformers import (
 # https://huggingface.co/gorkemgoknar
 #Coqui V1 api render voice, you can also use XTTS
-COQUI_URL="https://app.coqui.ai/api/v2/samples"
 ### Warning each sample will consume your credits
-COQUI_TOKEN="YOUR_COQUI_TOKEN_ON_YOUR_ACCOUNT"
-MAX_NEW_TOKENS = 25
-GPU_LAYERS = 0
 STOP_LIST=["###","##"]
 #stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=64)])
 from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="gorkemgoknar/llama2-7f-moviechatbot-ggml-q4", local_dir=".", filename="llama2-7f-fp16-ggml-q4.bin")
 model_path="./llama2-7f-fp16-ggml-q4.bin"
-llm = Llama(model_path=model_path,n_gpu_layers=0, n_ctx=256,n_batch=256)
 # to use with ctransfomers
 #llm = AutoModelForCausalLM.from_pretrained("gorkemgoknar/llama2-7f-moviechatbot-ggml-q4",
@@ -51,20 +61,32 @@ llm = Llama(model_path=model_path,n_gpu_layers=0, n_ctx=256,n_batch=256)
 #first you need to create clone voice for characters
 voices = {}
-voices["Gerald"]="VOICE_ID_OF_GERALD"
-voices["Vader"]="VOICE_ID"
-voices["Batman"]="VOICE_ID"
-voices["Gandalf"]="VOICE_ID"
-voices["Morpheus"]="VOICE_ID"
-voices["Neo"]="VOICE_ID"
-voices["Ig-11"]="VOICE_ID"
-voices["Tony Stark"]="VOICE_ID"
-voices["Kirk"]="VOICE_ID"
-voices["Spock"]="VOICE_ID"
 def get_audio_url(text,character):
     url = COQUI_URL
     # voice id of "Baldur Sanjin" from buildin coqui.ai speakers
     # more via https://docs.coqui.ai/reference/speakers_retrieve
     payload = {
@@ -72,6 +94,7 @@ def get_audio_url(text,character):
         "text": f"{text}",
         "emotion": "Neutral",  ## You can set Angry, Surprise etc on V1 api.. XTTS auto understands it
         "speed": 1,
     }
     headers = {
         "accept": "application/json",
@@ -81,7 +104,8 @@ def get_audio_url(text,character):
     response = requests.post(url, json=payload, headers=headers)
     res = json.loads(response.text)
-    #print(res)
     return res["audio_url"]
@@ -158,8 +182,12 @@ def get_answer_from_response(text,character):
     response= response.split("sierpien")[0] # weird, sierp
     response= response.split("\n")[0] # cut at end of line
     response= re.split("sierp.+\d+", response)[0]  # comes as sierpina 2018 something something
     response= response.split(":")[0]
     return response
 def run_chatter(num_repeat=2, character="kirk",human_character="Mr. Sulu",context="Captain Kirk from U.S.S. Enterprise",
@@ -220,7 +248,7 @@ def run_chatter(num_repeat=2, character="kirk",human_character="Mr. Sulu",contex
         resp_answer = get_answer_from_response(response,human_character)
         if withaudio:
-            #
             response_audio_url = get_audio_url(resp_answer)
             audio_urls.append(response_audio_url)
@@ -259,9 +287,24 @@ css="""
 """
-WITH_AUDIO=False
-async def add_text(char1,char2,runs,context,initial_question,history):
     print(f"{char1} talks to {char2}")
     history = None
@@ -273,7 +316,7 @@ async def add_text(char1,char2,runs,context,initial_question,history):
         initial_question = unnamed_question
     if initial_question=="":
         initial_question = unnamed_question
-    for i in range(int(runs)+1):
         print("char1:",char1," :", initial_question)
         returned_history += char1 + " : " + initial_question + "\n"
@@ -285,7 +328,7 @@ async def add_text(char1,char2,runs,context,initial_question,history):
                                                      withaudio=False,
                                                      history=history,
                                                      answer=last_question,
-                                                     debug_print=True,
                                                      add_answer_to_history=False
                                                      )
@@ -293,10 +336,41 @@ async def add_text(char1,char2,runs,context,initial_question,history):
         returned_history += char2 + " : " + last_answer + "\n"
         # add last answer to history
         history = history + "#" +initial_question + "#"+ last_answer
-        if WITH_AUDIO:
             char1_audio_url= get_audio_url(initial_question,char1)
             audios = (
                 gr.Audio.update() ,
                 gr.Audio.update() ,
@@ -307,34 +381,37 @@ async def add_text(char1,char2,runs,context,initial_question,history):
                 gr.Audio.update() ,
                 gr.Audio.update()
             )
-            char2_audio_url= get_audio_url(last_answer,char2)
-            audios = list(audios)
-            #should now do a loop
             audios[i*2] = gr.Audio.update(char1_audio_url, visible=True,label=str(i*2 )+"_"+char1)
             audios[i*2 + 1] = gr.Audio.update(char2_audio_url, visible=True,label=str(i*2 + 1)+"_"+char2)
-            audios = tuple(audios)
-            #This needs to be last before yield
-            initial_question=last_question
-            yield gr.update(value=initial_question, interactive=True),returned_history, *audios
-        else:
-            #This needs to be last before yield
-            initial_question=last_question
-            yield gr.update(value=initial_question, interactive=True),returned_history
 history=None
 #some selected ones are in for demo use  (there are more, get a copy and try it , just do not expect much with this fast finetuned model)
-CHARACTER_1_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader", "Ig-11","Tony Stark","Batman"]
-CHARACTER_2_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader", "Ig-11","Tony Stark","Batman"]
 CONTEXT_CHOICES = ["talks friendly",
@@ -350,15 +427,18 @@ EXAMPLE_INITIALS=["I challenge you to battle of words!",
                   "how much would a woodchuck chuck if a woodchuck could chuck wood?",
                   "The world is changing.",
                   "What do you think about AI?",
-                  "Are you real?",
                   "I went to the supermarket yesterday.",
                   "Who are you?",
-                  "I am richer than you!"]
 RUN_COUNT = [2,3,4]
-title = "Metayazar - Movie Chatbot Llama Finetuned"
-description = "Chat with your favorite movie characters. Finetuned with Llama2  "
 article = "<p style='text-align: center'><a href='https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/' target='_blank'>AI Goes to Job Interview</a> | <a href='https://www.metayazar.com/' target='_blank'>Metayazar AI Writer</a>  |<a href='https://www.linkedin.com/in/goknar/' target='_blank'>Görkem Göknar</a></p>"
@@ -377,15 +457,33 @@ def change_run_count(run_count):
         return_list.append( gr.Audio.update( visible=visible_audios[i]) )
     return return_list
 with gr.Blocks(css=css) as interface:
     with gr.Row():
         drop_char1 = gr.components.Dropdown(CHARACTER_1_CHOICES,label="Character 1",value=CHARACTER_1_CHOICES[0])
         drop_char2 = gr.components.Dropdown(CHARACTER_2_CHOICES,label="Character 2",value=CHARACTER_2_CHOICES[1])
         run_count = gr.components.Dropdown(RUN_COUNT,label="Line count per character",value="2")
         context_choice = gr.components.Dropdown(CONTEXT_CHOICES, label="Context",value=CONTEXT_CHOICES[0])
     with gr.Row():
         txt = gr.Textbox(
                 show_label=False,
@@ -402,8 +500,8 @@ with gr.Blocks(css=css) as interface:
                     label="History",
                     placeholder="History",
                 ).style(height=50)
-        if WITH_AUDIO:
-            with gr.Column():
                 audio1 = gr.Audio(elem_id="audio1",elem_classes="audio",autoplay=False,visible=False)
                 audio2 = gr.Audio(elem_id="audio2",elem_classes="audio",autoplay=False,visible=False)
                 audio3 = gr.Audio(elem_id="audio3",elem_classes="audio",autoplay=False,visible=False)
@@ -413,15 +511,13 @@ with gr.Blocks(css=css) as interface:
                 audio7 = gr.Audio(elem_id="audio7",elem_classes="audio",autoplay=False,visible=False)
                 audio8 = gr.Audio(elem_id="audio8",elem_classes="audio",autoplay=False,visible=False)
-    if WITH_AUDIO:
-        run_count.change(change_run_count,[run_count],[audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8])
-        submit_btn.click(add_text, [drop_char1, drop_char2,run_count, context_choice, txt,history], [txt,history,audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8], api_name="chat")
-    else:
-        # no audio returned
-        submit_btn.click(add_text, [drop_char1, drop_char2,run_count, context_choice, txt,history], [txt,history], api_name="chat")
 interface.queue().launch()

 from itertools import chain
 import asyncio
 from llama_cpp import Llama
+import datetime
 from transformers import (
     StoppingCriteriaList,
 # https://huggingface.co/gorkemgoknar
 #Coqui V1 api render voice, you can also use XTTS
+#COQUI_URL="https://app.coqui.ai/api/v2/samples"
+COQUI_URL="https://app.coqui.ai/api/v2/samples/multilingual/render/"
 ### Warning each sample will consume your credits
+COQUI_TOKEN=os.environ.get("COQUI_TOKEN")
+PER_RUN_MAX_VOICE=int( os.environ.get("PER_RUN_MAX_VOICE") )
+PER_RUN_COUNTER=0
+RUN_START_HOUR=datetime.datetime.now().hour
+MAX_NEW_TOKENS = 35
+GPU_LAYERS = 20
 STOP_LIST=["###","##"]
+LLAMA_VERBOSE=False
 #stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=64)])
 from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="gorkemgoknar/llama2-7f-moviechatbot-ggml-q4", local_dir=".", filename="llama2-7f-fp16-ggml-q4.bin")
 model_path="./llama2-7f-fp16-ggml-q4.bin"
+import langid
+llm = Llama(model_path=model_path,n_gpu_layers=0, n_ctx=256,n_batch=256,verbose=LLAMA_VERBOSE)
 # to use with ctransfomers
 #llm = AutoModelForCausalLM.from_pretrained("gorkemgoknar/llama2-7f-moviechatbot-ggml-q4",
 #first you need to create clone voice for characters
 voices = {}
+voices["Gerald"]=os.environ.get("VOICE_ID_GERALD")
+voices["Vader"]=os.environ.get("VOICE_ID_VADER")
+voices["Batman"]=os.environ.get("VOICE_ID_BATMAN")
+voices["Gandalf"]=os.environ.get("VOICE_ID_GANDALF")
+voices["Morpheus"]=os.environ.get("VOICE_ID_MORPHEUS")
+voices["Neo"]=os.environ.get("VOICE_ID_NEO")
+voices["Ig-11"]=os.environ.get("VOICE_ID_IG11")
+voices["Tony Stark"]=os.environ.get("VOICE_ID_TONY")
+voices["Kirk"]=os.environ.get("VOICE_ID_KIRK")
+voices["Spock"]=os.environ.get("VOICE_ID_SPOCK")
+voices["Don"]=os.environ.get("VOICE_ID_DON")
+voices["Morgan"]=os.environ.get("VOICE_ID_MORGAN")
+voices["Yoda"]=os.environ.get("VOICE_ID_YODA")
+voices["Ian"]=os.environ.get("VOICE_ID_IAN")
+voices["Thanos"]=os.environ.get("VOICE_ID_THANOS")
 def get_audio_url(text,character):
     url = COQUI_URL
+    text_language=langid.classify(text)[0]
+    supported_languages=["en","de","fr","es","it","pt","pl"]
+    if text_language not in supported_languages:
+        text_language="en"
     # voice id of "Baldur Sanjin" from buildin coqui.ai speakers
     # more via https://docs.coqui.ai/reference/speakers_retrieve
     payload = {
         "text": f"{text}",
         "emotion": "Neutral",  ## You can set Angry, Surprise etc on V1 api.. XTTS auto understands it
         "speed": 1,
+        "language": text_language
     }
     headers = {
         "accept": "application/json",
     response = requests.post(url, json=payload, headers=headers)
     res = json.loads(response.text)
+    print("Character:",character, "text:",text,)
+    print("Audio response",res)
     return res["audio_url"]
     response= response.split("sierpien")[0] # weird, sierp
     response= response.split("\n")[0] # cut at end of line
     response= re.split("sierp.+\d+", response)[0]  # comes as sierpina 2018 something something
+    response= re.split("styczen.+\d+", response)[0]  # comes as styczen 2018 something something
+    response= re.split("kwierk.+\d+", response)[0]  # comes as kwierk 2018 something something
     response= response.split(":")[0]
+    if response.startswith('"'):
+        response= response[1:]
     return response
 def run_chatter(num_repeat=2, character="kirk",human_character="Mr. Sulu",context="Captain Kirk from U.S.S. Enterprise",
         resp_answer = get_answer_from_response(response,human_character)
         if withaudio:
+            # No use.. running on main
             response_audio_url = get_audio_url(resp_answer)
             audio_urls.append(response_audio_url)
 """
+def get_per_run_voice_counter(increase=False):
+    hour_now = datetime.datetime.now().hour
+    global PER_RUN_COUNTER
+    print("Per run check: Hour now:", hour_now, " RUN_START_HOUR:",RUN_START_HOUR," PER_RUN_COUNTER",PER_RUN_COUNTER)
+    if hour_now>RUN_START_HOUR:
+        #reset hourly voice calls
+        print("resetting per run voice calls")
+        PER_RUN_COUNTER = 0
+    elif increase:
+        PER_RUN_COUNTER = PER_RUN_COUNTER + 1
+        print("per run voice calls:", PER_RUN_COUNTER)
+    print("Per run check: Hour now:", hour_now, " RUN_START_HOUR:",RUN_START_HOUR," PER_RUN_COUNTER",PER_RUN_COUNTER)
+    return PER_RUN_COUNTER
+async def add_text(WITH_AUDIO,char1,char2,runs,context,initial_question,history,VOICE_COUNTER):
     print(f"{char1} talks to {char2}")
     history = None
         initial_question = unnamed_question
     if initial_question=="":
         initial_question = unnamed_question
+    for i in range(int(runs)):
         print("char1:",char1," :", initial_question)
         returned_history += char1 + " : " + initial_question + "\n"
                                                      withaudio=False,
                                                      history=history,
                                                      answer=last_question,
+                                                     debug_print=False,
                                                      add_answer_to_history=False
                                                      )
         returned_history += char2 + " : " + last_answer + "\n"
         # add last answer to history
         history = history + "#" +initial_question + "#"+ last_answer
+        print("WITH_AUDIO",WITH_AUDIO)
+        if int(WITH_AUDIO):
+            use_voice=True
+        else:
+            use_voice=False
+        print("Voice Counter:",VOICE_COUNTER)
+        if initial_question=="..." and last_answer=="...":
+            use_voice=False
+        global PER_RUN_MAX_VOICE
+        if use_voice:
+            global PER_RUN_MAX_VOICE
+            can_use_voice=get_per_run_voice_counter()<PER_RUN_MAX_VOICE
+            if not can_use_voice:
+                print("Voice limit reached for this hour, try again in an hour")
+                gr.Warning("Hourly overal voice limit reached, try again in an hour... running without voice.")
+                use_voice=False
+        if use_voice and (VOICE_COUNTER>VOICE_LIMIT):
+            print("You have reached voiced limit, try with voice later.. running without voice")
+            gr.Warning("You have reached voiced limit.. running without voice")
+            use_voice=False
+        if use_voice:
             char1_audio_url= get_audio_url(initial_question,char1)
+            VOICE_COUNTER+=1
+            get_per_run_voice_counter(increase=True)
+            char2_audio_url= get_audio_url(last_answer,char2)
+            VOICE_COUNTER+=1
+            get_per_run_voice_counter(increase=True)
+        print("Voice Counter:",VOICE_COUNTER)
+        if use_voice:
             audios = (
                 gr.Audio.update() ,
                 gr.Audio.update() ,
                 gr.Audio.update() ,
                 gr.Audio.update()
             )
+        else:
+            audios = (
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False) ,
+                gr.Audio.update(visible=False)
+            )
+        audios = list(audios)
+        #should now do a loop
+        if use_voice:
             audios[i*2] = gr.Audio.update(char1_audio_url, visible=True,label=str(i*2 )+"_"+char1)
             audios[i*2 + 1] = gr.Audio.update(char2_audio_url, visible=True,label=str(i*2 + 1)+"_"+char2)
+        audios = tuple(audios)
+        #This needs to be last before yield
+        initial_question=last_question
+        yield gr.update(value=initial_question, interactive=True),returned_history, *audios, VOICE_COUNTER
 history=None
 #some selected ones are in for demo use  (there are more, get a copy and try it , just do not expect much with this fast finetuned model)
+CHARACTER_1_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader","Yoda","Ig-11","Tony Stark","Batman","Thanos"]
+CHARACTER_2_CHOICES = ["Gandalf","Gerald", "Morpheus", "Neo","Kirk","Spock","Vader","Yoda","Ig-11","Tony Stark","Batman","Thanos"]
 CONTEXT_CHOICES = ["talks friendly",
                   "how much would a woodchuck chuck if a woodchuck could chuck wood?",
                   "The world is changing.",
                   "What do you think about AI?",
                   "I went to the supermarket yesterday.",
                   "Who are you?",
+                  "I am richer than you!",
+                  "Wie geht es dir?",
+                  "O que você fez ontem?",
+                  "Il fait trop chaud aujourd'hui."]
+VOICE_CHOICES=["With Coqui.ai Voice",
+               "No voice"]
 RUN_COUNT = [2,3,4]
+title = "Metayazar - Movie Chatbot Llama Finetuned Voice powered by Coqui.ai"
+description = "Auto-chat your favorite movie characters. Voice via Coqui.ai"
 article = "<p style='text-align: center'><a href='https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/' target='_blank'>AI Goes to Job Interview</a> | <a href='https://www.metayazar.com/' target='_blank'>Metayazar AI Writer</a>  |<a href='https://www.linkedin.com/in/goknar/' target='_blank'>Görkem Göknar</a></p>"
         return_list.append( gr.Audio.update( visible=visible_audios[i]) )
     return return_list
+def switch_voice(with_voice, WITH_AUDIO,VOICE_COUNTER):
+    print("update use voice:",with_voice)
+    if (VOICE_COUNTER>VOICE_LIMIT) or (PER_RUN_COUNTER>PER_RUN_MAX_VOICE):
+        gr.Warning("Unfortunately voice limit is reached, try again after another time, or use without voice")
+        WITH_AUDIO=0
+    else:
+        if with_voice==VOICE_CHOICES[0]:
+            WITH_AUDIO=1
+        else:
+            WITH_AUDIO=0
+    return with_voice, WITH_AUDIO
 with gr.Blocks(css=css) as interface:
+    VOICE_COUNTER=gr.State(value=0)
+    WITH_AUDIO=gr.State(value=1)
+    VOICE_LIMIT=os.environ.get("VOICE_LIMIT")
     with gr.Row():
         drop_char1 = gr.components.Dropdown(CHARACTER_1_CHOICES,label="Character 1",value=CHARACTER_1_CHOICES[0])
         drop_char2 = gr.components.Dropdown(CHARACTER_2_CHOICES,label="Character 2",value=CHARACTER_2_CHOICES[1])
         run_count = gr.components.Dropdown(RUN_COUNT,label="Line count per character",value="2")
         context_choice = gr.components.Dropdown(CONTEXT_CHOICES, label="Context",value=CONTEXT_CHOICES[0])
+    with gr.Row():
+        with_voice = gr.components.Dropdown(VOICE_CHOICES,label="Voice via Coqui.ai (demo)",value=VOICE_CHOICES[0])
     with gr.Row():
         txt = gr.Textbox(
                 show_label=False,
                     label="History",
                     placeholder="History",
                 ).style(height=50)
+        with gr.Column():
                 audio1 = gr.Audio(elem_id="audio1",elem_classes="audio",autoplay=False,visible=False)
                 audio2 = gr.Audio(elem_id="audio2",elem_classes="audio",autoplay=False,visible=False)
                 audio3 = gr.Audio(elem_id="audio3",elem_classes="audio",autoplay=False,visible=False)
                 audio7 = gr.Audio(elem_id="audio7",elem_classes="audio",autoplay=False,visible=False)
                 audio8 = gr.Audio(elem_id="audio8",elem_classes="audio",autoplay=False,visible=False)
+    with_voice.change(switch_voice,[with_voice,WITH_AUDIO,VOICE_COUNTER],[with_voice,WITH_AUDIO])
+    run_count.change(change_run_count,[run_count],[audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8])
+    submit_btn.click(add_text, [WITH_AUDIO,drop_char1, drop_char2,run_count, context_choice, txt,history,VOICE_COUNTER], [txt,history,audio1,audio2,audio3,audio4,audio5,audio6,audio7,audio8,VOICE_COUNTER], api_name="chat")
 interface.queue().launch()