from transformers import AutoTokenizer, AutoModelForCausalLM from transformers import pipeline import gradio as gr from fastrtc import (ReplyOnPause, Stream, get_stt_model, get_tts_model) import numpy as np # messages = [ # {"role": "user", "content": "Who are you?"}, # ] # pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct") # pipe(messages) chatbot = pipeline("text-generation", model="microsoft/DialoGPT-medium") # tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct") # model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct") stt_model = get_stt_model() tts_model = get_tts_model() stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive") def echo(audio): prompt = stt_model.stt(audio) # response = sambanova_client.chat.completions.create( # model="Meta-Llama-3.2-3B-Instruct", # messages=[{"role": "user", "content": prompt}], # max_tokens=200, # ) # prompt = response.choices[0].message.content prompt = chat_with_bot(prompt) for audio_chunk in tts_model.stream_tts_sync(prompt): yield audio_chunk def process_audio(audio_input): # audio_input is received as a Gradio Audio object, containing a tuple of (sample_rate, numpy array) sample_rate, audio_data = audio_input # Process audio through the stream processed_sample_rate, processed_audio = echo((sample_rate, audio_data)) # Return processed audio to Gradio for output return processed_sample_rate, processed_audio def chat_with_bot(user_input): # Generate a response from the chatbot model response = chatbot(user_input) return response[0]['generated_text'] # interface = gr.Interface( # fn=chat_with_bot, # Function to call for processing the input # inputs=gr.Textbox(label="Enter your message"), # User input (text) # outputs=gr.Textbox(label="Chatbot Response"), # Model output (text) # title="Chat with DialoGPT", # Optional: Add a title to your interface # description="Chat with an AI model powered by DialoGPT!" # Optional: Add a description # ) interface = gr.Interface( fn=process_audio, # The function to process audio inputs=gr.Audio(type="numpy"), # Microphone input (audio) outputs=gr.Audio(type="numpy"), # Audio output (processed) live=True # Make the processing live (if needed) ) interface.launch()