import logging
import speech_recognition as sr
from pydub import AudioSegment
from io import BytesIO
import os
import google.generativeai as genai
from dotenv import load_dotenv
import base64

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def record_audio(file_path, timeout=20, phrase_time_limit=None):
    """
    Simplified function to record audio from the microphone and save it as an MP3 file.

    Args:
    file_path (str): Path to save the recorded audio file.
    timeout (int): Maximum time to wait for a phrase to start (in seconds).
    phrase_time_lfimit (int): Maximum time for the phrase to be recorded (in seconds).
    """
    recognizer = sr.Recognizer()
    
    try:
        with sr.Microphone() as source:
            logging.info("Adjusting for ambient noise...")
            recognizer.adjust_for_ambient_noise(source, duration=1)
            logging.info("Start speaking now...")
            
            # Record the audio
            audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
            logging.info("Recording complete.")
            
            # Convert the recorded audio to an MP3 file
            wav_data = audio_data.get_wav_data()
            audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
            audio_segment.export(file_path, format="mp3", bitrate="128k")
            
            logging.info(f"Audio saved to {file_path}")

    except Exception as e:
        logging.error(f"An error occurred: {e}")

load_dotenv()
GOOGLE_AI_STUDIO_API_KEY = os.environ.get("GOOGLE_AI_STUDIO_API_KEY")
stt_model = "whisper-large-v3"  # Keep for compatibility

def transcribe_with_groq(stt_model, audio_filepath, GOOGLE_AI_STUDIO_API_KEY=None):
    api_key = GOOGLE_AI_STUDIO_API_KEY or os.environ.get("GOOGLE_AI_STUDIO_API_KEY")
    genai.configure(api_key=api_key)
    
    # Setup Gemini model
    model = genai.GenerativeModel("gemini-2.0-flash")
    
    # Read audio file
    with open(audio_filepath, "rb") as audio_file:
        audio_data = audio_file.read()
    
    # Create content for generation
    contents = [
        {
            "role": "user",
            "parts": [
                {"text": "Please transcribe this audio accurately. Output only the transcription with no additional text."},
                {"inline_data": {"mime_type": "audio/mp3", "data": base64.b64encode(audio_data).decode("utf-8")}}
            ]
        }
    ]
    
    # Generate transcription
    response = model.generate_content(contents)
    
    return response.text