Spaces:
Runtime error
Runtime error
import gradio as gr | |
from dotenv import load_dotenv | |
import gradio as gr | |
import openai | |
import os | |
import soundfile | |
from espnet2.bin.tts_inference import Text2Speech | |
def tts(text): | |
text2speech = Text2Speech.from_pretrained("kan-bayashi/ljspeech_vits") | |
speech = text2speech(text)["wav"] | |
speech = speech.numpy() | |
# Save the waverform | |
soundfile.write('example_TTS.wav', speech, 22050, 'PCM_24') | |
return os.path.join(os.path.dirname(__file__), "example_TTS.wav") | |
def transcribe(audio): | |
# print(audio) | |
# global conv | |
file = open(audio, "rb") | |
transcription = openai.Audio.transcribe("whisper-1", file=file) | |
req = transcription['text'] | |
# conv.append(req) | |
return req | |
def conversation(audio): | |
# global conv | |
req = transcribe(audio) | |
completion = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "user", "content": req}] | |
# {"role": "user", "content" : line} for line in conv] | |
) | |
req2 = completion['choices'][0]['message']['content'] | |
fin_text = 'You: ' + req + '\n' + 'AI: ' + req2.strip() | |
tts_data = tts(req2) | |
return fin_text, tts_data | |
def generate_image(audio): | |
text = transcribe(audio) | |
response = openai.Image.create( | |
prompt=text, | |
n=1, | |
size="1024x1024" | |
) | |
return response['data'][0]['url'] | |
with gr.Blocks() as demo: | |
# global conv | |
load_dotenv() | |
openai.api_key=os.getenv('api_key') | |
conv = [] | |
with gr.Tab("Start a conversation"): | |
with gr.Row(): | |
audio_input_conv=gr.Audio(source="microphone", type="filepath") | |
text_output_conv=gr.Textbox(lines=10) | |
audio_output_conv = gr.Audio() | |
with gr.Row(): | |
clear_button_conv = gr.Button("Clear") | |
submit_button_conv = gr.Button("Submit") | |
with gr.Tab("Generate image"): | |
with gr.Row(): | |
audio_input_img=gr.Audio(source="microphone", type="filepath") | |
image_output_img = gr.Image() | |
with gr.Row(): | |
clear_button_img = gr.Button("Clear") | |
submit_button_img = gr.Button("Submit") | |
# image_button = gr.Button("Flip") | |
# with gr.Accordion("Open for More!"): | |
# gr.Markdown("Look at me...") | |
with gr.Accordion("How to use"): | |
gr.Markdown("Record and submit your voice to talk to AI or to generate an image!!") | |
submit_button_conv.click(conversation, inputs=audio_input_conv, outputs=[text_output_conv, audio_output_conv]) | |
clear_button_conv.click(lambda: None, None, audio_input_conv, queue=False) | |
submit_button_img.click(generate_image, inputs=audio_input_img, outputs=image_output_img) | |
clear_button_img.click(lambda: None, None, audio_input_img, queue=False) | |
clear_button_img.click(lambda: None, None, image_output_img, queue=False) | |
# def same_auth(username, password): | |
# return username == password | |
# demo.launch(auth=same_auth) | |
demo.launch() | |