# This Gradio app creates a conversation pipeline that includes speech-to-text using the Whisper model, # GPT response generation, and text-to-speech using the Google Text-to-Speech API. # The app uses the microphone input to capture audio, processes it through the pipeline, and returns the GPT response as text and audio. import gradio as gr import openai import whisper import numpy as np import os # Load the Whisper model model = whisper.load_model("base") # OpenAI API key for GPT openai.api_key = 'your_openai_api_key' # Function to convert speech to text using Whisper def speech_to_text(audio): result = model.transcribe(audio) return result['text'] # Function to get GPT response def gpt_response(text): response = openai.Completion.create( engine="gpt-3.5-turbo", prompt=text, max_tokens=100 ) return response.choices[0].text.strip() # Function to convert text to speech using Google Text-to-Speech API def text_to_speech_google(text): from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = client.synthesize_speech( input=input_text, voice=voice, audio_config=audio_config ) # Save the audio response to a file output_path = "output.mp3" with open(output_path, "wb") as out: out.write(response.audio_content) return output_path # Function to handle the entire conversation pipeline def conversation_pipeline(audio): # Step 1: Convert speech to text text = speech_to_text(audio) # Step 2: Get GPT response response_text = gpt_response(text) # Step 3: Convert GPT response to speech response_audio = text_to_speech_google(response_text) return response_text, response_audio # Gradio interface demo = gr.Interface( fn=conversation_pipeline, inputs=gr.Audio(source="microphone", type="filepath"), outputs=[gr.Textbox(label="GPT Response"), gr.Audio(label="GPT Response Audio", type="filepath", autoplay=True)] ) demo.launch(show_error=True)