import os import torch from transformers import AutoTokenizer from huggingface_hub import snapshot_download # Define environment variables os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache" os.environ["HF_HOME"] = "/tmp/hf_home" # Define model options - use smaller models for Spaces deployment ASR_OPTIONS = { "Whisper Small": "openai/whisper-small", "Wav2Vec2": "facebook/wav2vec2-base-960h" } LLM_OPTIONS = { "Llama-2 7B Chat": "meta-llama/Llama-2-7b-chat-hf", "Flan-T5 Small": "google/flan-t5-small" } TTS_OPTIONS = { "VITS": "espnet/kan-bayashi_ljspeech_vits", "FastSpeech2": "espnet/kan-bayashi_ljspeech_fastspeech2" } def preload_models(): """ Preload essential components to optimize startup time """ print("Setting up model environment...") # Check for GPU availability if torch.cuda.is_available(): print(f"GPU available: {torch.cuda.get_device_name(0)}") print(f"Memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB") else: print("No GPU available. Running in CPU mode (performance will be limited).") try: # Download tokenizers first (smaller files) for model_name, model_id in LLM_OPTIONS.items(): print(f"Downloading tokenizer for {model_name}...") AutoTokenizer.from_pretrained(model_id) # We don't preload the full models - they'll be loaded on-demand print("Environment setup complete!") except Exception as e: print(f"Setup error: {e}") print("The application will still attempt to run, but might experience delays.") if __name__ == "__main__": preload_models()