import assemblyai as aai from elevenlabs import generate, stream from openai import OpenAI class AI_Assistant: def __init__(self): aai.settings.api_key = "ASSEMBLYAI-API-KEY" self.openai_client = OpenAI(api_key="OPENAI-API-KEY") self.elevenlabs_api_key = "ELEVENLABS-API-KEY" self.transcriber = None # Context for food ordering in a restaurant self.full_transcript = [ {"role": "system", "content": "You are a virtual assistant for a restaurant. Help customers with food ordering, menu inquiries, and table reservations."}, ] def start_transcription(self): self.transcriber = aai.RealtimeTranscriber( sample_rate=16000, on_data=self.on_data, on_error=self.on_error, on_open=self.on_open, on_close=self.on_close, end_utterance_silence_threshold=1000, ) self.transcriber.connect() microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000) self.transcriber.stream(microphone_stream) def stop_transcription(self): if self.transcriber: self.transcriber.close() self.transcriber = None def on_open(self, session_opened: aai.RealtimeSessionOpened): print("Session ID:", session_opened.session_id) def on_data(self, transcript: aai.RealtimeTranscript): if not transcript.text: return if isinstance(transcript, aai.RealtimeFinalTranscript): self.generate_ai_response(transcript) else: print(transcript.text, end="\r") def on_error(self, error: aai.RealtimeError): print("An error occurred:", error) def on_close(self): print("Session closed.") def generate_ai_response(self, transcript): self.stop_transcription() self.full_transcript.append({"role": "user", "content": transcript.text}) print(f"\nCustomer: {transcript.text}\n") response = self.openai_client.chat.completions.create( model="gpt-3.5-turbo", messages=self.full_transcript ) ai_response = response.choices[0].message.content self.generate_audio(ai_response) self.start_transcription() print("\nListening for the next input...\n") def generate_audio(self, text): self.full_transcript.append({"role": "assistant", "content": text}) print(f"\nAI Assistant: {text}") audio_stream = generate( api_key=self.elevenlabs_api_key, text=text, voice="Rachel", stream=True ) stream(audio_stream) if __name__ == "__main__": greeting = "Welcome to Gourmet Bistro! My name is Sandy. How may I assist you today?" ai_assistant = AI_Assistant() ai_assistant.generate_audio(greeting) ai_assistant.start_transcription()