""" Utility script to pre-generate embedding pickle files for all models. This script will: 1. Load each embedding model 2. Generate embeddings for both emotion and event dictionaries 3. Save the embeddings as pickle files in the 'embeddings' directory Run this script once locally to create all pickle files before uploading to the repository. """ import os from sentence_transformers import SentenceTransformer from tqdm import tqdm from config import CONFIG, EMBEDDING_MODELS from utils import (logger, kitchen_txt_to_dict, save_embeddings_to_pickle, get_embeddings_pickle_path) def generate_embeddings_for_model(model_key, model_info): """Generate and save embeddings for a specific model. Args: model_key: Key of the model in EMBEDDING_MODELS model_info: Model information dictionary Returns: Tuple of (success_emotion, success_event) """ model_id = model_info['id'] print(f"\nProcessing model: {model_key} ({model_id}) - {model_info['size']}") try: # Load the model print(f"Loading {model_key} model...") model = SentenceTransformer(model_id) # Load emoji dictionaries print("Loading emoji dictionaries...") emotion_dict = kitchen_txt_to_dict(CONFIG["emotion_file"]) event_dict = kitchen_txt_to_dict(CONFIG["item_file"]) if not emotion_dict or not event_dict: print("Error: Failed to load emoji dictionaries") return False, False # Generate emotion embeddings print(f"Generating {len(emotion_dict)} emotion embeddings...") emotion_embeddings = {} for emoji, desc in tqdm(emotion_dict.items()): emotion_embeddings[emoji] = model.encode(desc) # Generate event embeddings print(f"Generating {len(event_dict)} event embeddings...") event_embeddings = {} for emoji, desc in tqdm(event_dict.items()): event_embeddings[emoji] = model.encode(desc) # Save embeddings emotion_pickle_path = get_embeddings_pickle_path(model_id, "emotion") event_pickle_path = get_embeddings_pickle_path(model_id, "event") success_emotion = save_embeddings_to_pickle(emotion_embeddings, emotion_pickle_path) success_event = save_embeddings_to_pickle(event_embeddings, event_pickle_path) return success_emotion, success_event except Exception as e: print(f"Error generating embeddings for model {model_key}: {e}") return False, False def main(): """Main function to generate embeddings for all models.""" # Create embeddings directory if it doesn't exist os.makedirs('embeddings', exist_ok=True) print(f"Generating embeddings for {len(EMBEDDING_MODELS)} models...") results = {} # Generate embeddings for each model for model_key, model_info in EMBEDDING_MODELS.items(): success_emotion, success_event = generate_embeddings_for_model(model_key, model_info) results[model_key] = { 'emotion': success_emotion, 'event': success_event } # Print summary print("\n=== Embedding Generation Summary ===") for model_key, result in results.items(): status_emotion = "✓ Success" if result['emotion'] else "✗ Failed" status_event = "✓ Success" if result['event'] else "✗ Failed" print(f"{model_key:<10}: Emotion: {status_emotion}, Event: {status_event}") print("\nDone! Embedding pickle files are stored in the 'embeddings' directory.") print("You can now upload these files to your repository.") if __name__ == "__main__": main()