Sreekumar1608's picture
Update app.py
fc6fb92
raw
history blame
1.38 kB
import gradio as gr
import openai
from elevenlabslib import *
from pydub import AudioSegment
from pydub.playback import play
import io
import winsound
openai.api_key = "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"
api_key = "929b67c6e533e94018a438d70c960b60"
from elevenlabslib import ElevenLabsUser
user = ElevenLabsUser(api_key)
messages = ["Respond with voice"]
def transcribe(audio):
global messages
audio_file = open(audio, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
messages.append(f"\nUser: {transcript['text']}")
response = openai.Completion.create(
engine="text-davinci-003",
prompt=messages[-1],
max_tokens=60,
n=1,
stop=None,
temperature=0.5,
)
system_message = response["choices"][0]["text"]
messages.append(f"{system_message}")
voice = user.get_voices_by_name("Bella")[0]
audio = voice.generate_audio_bytes(system_message)
audio = AudioSegment.from_file(io.BytesIO(audio), format="mp3")
audio.export("output.wav", format="wav")
winsound.PlaySound("output.wav", winsound.SND_FILENAME)
chat_transcript = "\n".join(messages)
return chat_transcript
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text",
title="Voice Assistant",
)
iface.launch()