Spaces:

krishna-k
/

meta-3b

Running

File size: 2,567 Bytes

ad716e2
 
 
327437d
9093d3d
ad716e2
 
 
 
 
 
02595d3
ad716e2
 
 
a8640fd
 
 
 
 
4c851ba
a8640fd
 
 
 
 
 
 
 
 
 
caace45
 
 
 
 
 
 
a8640fd
 
 
 
 
caace45
a8640fd
 
caace45
a8640fd
ad716e2
 
 
 
 
 
a8640fd
 
 
 
 
 
 
 
ad716e2
a8640fd
 
 
 
ad716e2

from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import gradio as gr
from fastrtc import (ReplyOnPause, Stream, get_stt_model, get_tts_model)
import numpy as np

# messages = [
#     {"role": "user", "content": "Who are you?"},
# ]
# pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct")
# pipe(messages)
chatbot = pipeline("text-generation", model="microsoft/DialoGPT-medium")

# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
stt_model = get_stt_model()
tts_model = get_tts_model()



#stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive")


def echo(audio):
    prompt = stt_model.stt(audio)
    # response = sambanova_client.chat.completions.create(
    #     model="Meta-Llama-3.2-3B-Instruct",
    #     messages=[{"role": "user", "content": prompt}],
    #     max_tokens=200,
    # )
    # prompt = response.choices[0].message.content
    bot_response = chat_with_bot(prompt)
    audio_chunks = []
    for audio_chunk in tts_model.stream_tts_sync(bot_response):
        audio_chunks.append(audio_chunk)
    return audio_chunks[0]
    # for audio_chunk in tts_model.stream_tts_sync(prompt):
    #     yield audio_chunk
def process_audio(audio_input):
    # audio_input is received as a Gradio Audio object, containing a tuple of (sample_rate, numpy array)
    sample_rate, audio_data = audio_input
    
    # Process audio through the stream
    processed_audio = echo((sample_rate, audio_data))
    
    # Return processed audio to Gradio for output
    return sample_rate, processed_audio


def chat_with_bot(user_input):
    # Generate a response from the chatbot model
    response = chatbot(user_input)
    return response[0]['generated_text'] 

# interface = gr.Interface(
#     fn=chat_with_bot,  # Function to call for processing the input
#     inputs=gr.Textbox(label="Enter your message"),  # User input (text)
#     outputs=gr.Textbox(label="Chatbot Response"),  # Model output (text)
#     title="Chat with DialoGPT",  # Optional: Add a title to your interface
#     description="Chat with an AI model powered by DialoGPT!"  # Optional: Add a description
# )

interface = gr.Interface(
    fn=process_audio,  # The function to process audio
    inputs=gr.Audio(type="numpy"),  # Microphone input (audio)
    outputs=gr.Audio(type="numpy"),  # Audio output (processed)
    live=True  # Make the processing live (if needed)
)

interface.launch()