import os
import torch
from transformers import AutoTokenizer
from huggingface_hub import snapshot_download

# Define environment variables
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
os.environ["HF_HOME"] = "/tmp/hf_home"

# Define model options - use smaller models for Spaces deployment
ASR_OPTIONS = {
    "Whisper Small": "openai/whisper-small",
    "Wav2Vec2": "facebook/wav2vec2-base-960h"
}

LLM_OPTIONS = {
    "Llama-2 7B Chat": "meta-llama/Llama-2-7b-chat-hf",
    "Flan-T5 Small": "google/flan-t5-small"
}

TTS_OPTIONS = {
    "VITS": "espnet/kan-bayashi_ljspeech_vits",
    "FastSpeech2": "espnet/kan-bayashi_ljspeech_fastspeech2"
}

def preload_models():
    """
    Preload essential components to optimize startup time
    """
    print("Setting up model environment...")
    
    # Check for GPU availability
    if torch.cuda.is_available():
        print(f"GPU available: {torch.cuda.get_device_name(0)}")
        print(f"Memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        print("No GPU available. Running in CPU mode (performance will be limited).")
    
    try:
        # Download tokenizers first (smaller files)
        for model_name, model_id in LLM_OPTIONS.items():
            print(f"Downloading tokenizer for {model_name}...")
            AutoTokenizer.from_pretrained(model_id)
        
        # We don't preload the full models - they'll be loaded on-demand
        print("Environment setup complete!")
    except Exception as e:
        print(f"Setup error: {e}")
        print("The application will still attempt to run, but might experience delays.")

if __name__ == "__main__":
    preload_models()