meta-3b / app.py
krishna-k's picture
Update app.py
4c851ba verified
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import gradio as gr
from fastrtc import (ReplyOnPause, Stream, get_stt_model, get_tts_model)
import numpy as np
# messages = [
# {"role": "user", "content": "Who are you?"},
# ]
# pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct")
# pipe(messages)
chatbot = pipeline("text-generation", model="microsoft/DialoGPT-medium")
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
stt_model = get_stt_model()
tts_model = get_tts_model()
#stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive")
def echo(audio):
prompt = stt_model.stt(audio)
# response = sambanova_client.chat.completions.create(
# model="Meta-Llama-3.2-3B-Instruct",
# messages=[{"role": "user", "content": prompt}],
# max_tokens=200,
# )
# prompt = response.choices[0].message.content
bot_response = chat_with_bot(prompt)
audio_chunks = []
for audio_chunk in tts_model.stream_tts_sync(bot_response):
audio_chunks.append(audio_chunk)
return audio_chunks[0]
# for audio_chunk in tts_model.stream_tts_sync(prompt):
# yield audio_chunk
def process_audio(audio_input):
# audio_input is received as a Gradio Audio object, containing a tuple of (sample_rate, numpy array)
sample_rate, audio_data = audio_input
# Process audio through the stream
processed_audio = echo((sample_rate, audio_data))
# Return processed audio to Gradio for output
return sample_rate, processed_audio
def chat_with_bot(user_input):
# Generate a response from the chatbot model
response = chatbot(user_input)
return response[0]['generated_text']
# interface = gr.Interface(
# fn=chat_with_bot, # Function to call for processing the input
# inputs=gr.Textbox(label="Enter your message"), # User input (text)
# outputs=gr.Textbox(label="Chatbot Response"), # Model output (text)
# title="Chat with DialoGPT", # Optional: Add a title to your interface
# description="Chat with an AI model powered by DialoGPT!" # Optional: Add a description
# )
interface = gr.Interface(
fn=process_audio, # The function to process audio
inputs=gr.Audio(type="numpy"), # Microphone input (audio)
outputs=gr.Audio(type="numpy"), # Audio output (processed)
live=True # Make the processing live (if needed)
)
interface.launch()