import os
import gradio as gr
from google.cloud import speech
from google.cloud import texttospeech
from nlp import process_query
import warnings

# Suppress FutureWarning from huggingface_hub
warnings.filterwarnings("ignore", category=FutureWarning)

# Google credentials setup
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/credentials.json"

# Initialize Google Cloud services
speech_client = speech.SpeechClient()
tts_client = texttospeech.TextToSpeechClient()

# Function for speech-to-text conversion
def speech_to_text(audio):
    audio = speech.RecognitionAudio(content=audio)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code="en-US",  # You can set it dynamically based on user input
    )

    response = speech_client.recognize(config=config, audio=audio)

    # Extract the first result
    if response.results:
        return response.results[0].alternatives[0].transcript
    return ""

# Function for text-to-speech conversion
def text_to_speech(text, language_code="en-US"):
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code=language_code, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
    )
    audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)

    response = tts_client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )
    return response.audio_content

# Function for processing user query
def handle_query(audio):
    # Convert speech to text
    query_text = speech_to_text(audio)
    
    # Check if the query is in regional language and process accordingly
    language = "en"  # Default language
    if query_text:
        # You can extend language detection if needed here.
        response_text = process_query(query_text, language)
        
        # Return the response as text and convert it to speech
        audio_response = text_to_speech(response_text, language_code="en-US")
        return response_text, audio_response
    return "Sorry, I didn't catch that. Could you please repeat?", None

# Gradio Interface
iface = gr.Interface(
    fn=handle_query,
    inputs=gr.Audio(type="microphone"),  # Updated here to capture audio directly from the microphone
    outputs=[gr.Textbox(), gr.Audio()],
    live=True,
    title="Ayurveda AI Voice Assistant",
    description="A voice assistant to help with Ayurvedic queries in multiple languages.",
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()