import streamlit as st import speech_recognition as sr from transformers import pipeline from tts import TTS # Hugging Face TTS model import requests # Load the chatbot model chatbot = pipeline("conversational", model="facebook/blenderbot-400M-distill") # Function to convert speech to text def speech_to_text(): recognizer = sr.Recognizer() with sr.Microphone() as source: st.info("Listening...") audio = recognizer.listen(source) try: text = recognizer.recognize_google(audio) return text except sr.UnknownValueError: return "Sorry, I could not understand the audio." except sr.RequestError: return "Speech recognition service is not available." # Function to generate avatar video def generate_avatar_video(text_response): # Call the API of an avatar service (e.g., D-ID, Synthesia) api_url = "https://api.example.com/generate-avatar" payload = {"text": text_response} response = requests.post(api_url, json=payload) video_url = response.json().get("video_url") return video_url st.title("🗣️ Live Video Chatbot") # Button to start recording if st.button("Speak"): user_input = speech_to_text() if user_input: st.write(f"**You:** {user_input}") # Generate chatbot response bot_response = chatbot(user_input) response_text = bot_response[0]["generated_text"] st.write(f"**Bot:** {response_text}") # Generate avatar video video_url = generate_avatar_video(response_text) # Display the video response st.video(video_url)