File size: 1,491 Bytes
4618ee7
 
979e0e9
4618ee7
 
979e0e9
4618ee7
979e0e9
4618ee7
605353b
 
4618ee7
 
 
979e0e9
4618ee7
 
 
 
 
 
 
 
 
 
 
 
fc6fb92
4618ee7
 
 
 
 
 
 
 
979e0e9
4618ee7
 
 
 
 
979e0e9
4618ee7
 
 
 
 
 
979e0e9
4618ee7
979e0e9
 
4618ee7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
import openai
import winsound
from elevenlabslib import *
from pydub import AudioSegment
from pydub.playback import play
import io
import config

openai.api_key = "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"
api_key = "eace36ef8e50bc3c96dbce8a0337ace9"
from elevenlabslib import ElevenLabsUser
user = ElevenLabsUser(api_key)

messages = ["Respond in less than 60 words"]

def transcribe(audio):
    global messages

    audio_file = open(audio, "rb")
    transcript = openai.Audio.transcribe("whisper-1", audio_file)

    messages.append(f"\nUser: {transcript['text']}")

    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=messages[-1],
        max_tokens=60,
        n=1,
        stop=None,
        temperature=0.5,
    )

    system_message = response["choices"][0]["text"]
    messages.append(f"{system_message}")

    voice = user.get_voices_by_name("Antoni")[0]
    audio = voice.generate_audio_bytes(system_message)

    audio = AudioSegment.from_file(io.BytesIO(audio), format="mp3")
    audio.export("output.wav", format="wav")

    winsound.PlaySound("output.wav", winsound.SND_FILENAME)

    chat_transcript = "\n".join(messages)
    return chat_transcript

iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(source="microphone", type="filepath", placeholder="Please start speaking..."),
    outputs="text",
    title="Voice Assistant ",
    description="Please ask me your question",
)

iface.launch()