File size: 2,567 Bytes
ad716e2
 
 
327437d
9093d3d
ad716e2
 
 
 
 
 
02595d3
ad716e2
 
 
a8640fd
 
 
 
 
4c851ba
a8640fd
 
 
 
 
 
 
 
 
 
caace45
 
 
 
 
 
 
a8640fd
 
 
 
 
caace45
a8640fd
 
caace45
a8640fd
ad716e2
 
 
 
 
 
a8640fd
 
 
 
 
 
 
 
ad716e2
a8640fd
 
 
 
ad716e2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import gradio as gr
from fastrtc import (ReplyOnPause, Stream, get_stt_model, get_tts_model)
import numpy as np

# messages = [
#     {"role": "user", "content": "Who are you?"},
# ]
# pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct")
# pipe(messages)
chatbot = pipeline("text-generation", model="microsoft/DialoGPT-medium")

# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
stt_model = get_stt_model()
tts_model = get_tts_model()



#stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive")


def echo(audio):
    prompt = stt_model.stt(audio)
    # response = sambanova_client.chat.completions.create(
    #     model="Meta-Llama-3.2-3B-Instruct",
    #     messages=[{"role": "user", "content": prompt}],
    #     max_tokens=200,
    # )
    # prompt = response.choices[0].message.content
    bot_response = chat_with_bot(prompt)
    audio_chunks = []
    for audio_chunk in tts_model.stream_tts_sync(bot_response):
        audio_chunks.append(audio_chunk)
    return audio_chunks[0]
    # for audio_chunk in tts_model.stream_tts_sync(prompt):
    #     yield audio_chunk
def process_audio(audio_input):
    # audio_input is received as a Gradio Audio object, containing a tuple of (sample_rate, numpy array)
    sample_rate, audio_data = audio_input
    
    # Process audio through the stream
    processed_audio = echo((sample_rate, audio_data))
    
    # Return processed audio to Gradio for output
    return sample_rate, processed_audio


def chat_with_bot(user_input):
    # Generate a response from the chatbot model
    response = chatbot(user_input)
    return response[0]['generated_text'] 

# interface = gr.Interface(
#     fn=chat_with_bot,  # Function to call for processing the input
#     inputs=gr.Textbox(label="Enter your message"),  # User input (text)
#     outputs=gr.Textbox(label="Chatbot Response"),  # Model output (text)
#     title="Chat with DialoGPT",  # Optional: Add a title to your interface
#     description="Chat with an AI model powered by DialoGPT!"  # Optional: Add a description
# )

interface = gr.Interface(
    fn=process_audio,  # The function to process audio
    inputs=gr.Audio(type="numpy"),  # Microphone input (audio)
    outputs=gr.Audio(type="numpy"),  # Audio output (processed)
    live=True  # Make the processing live (if needed)
)

interface.launch()