import time import numpy as np import os import spaces from torch import cuda, backends, version from sentence_transformers import SentenceTransformer from sklearn.pipeline import make_pipeline from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer # Check for torch cuda # If you want to disable cuda for testing purposes #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print("Is CUDA enabled? ", cuda.is_available()) print("Is a CUDA device available on this computer?", backends.cudnn.enabled) if cuda.is_available(): torch_device = "gpu" print("Cuda version installed is: ", version.cuda) high_quality_mode = "Yes" os.system("nvidia-smi") else: torch_device = "cpu" high_quality_mode = "No" @spaces.GPU def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1") -> np.ndarray: """ Create or load embeddings for the given documents. Args: docs (list): List of documents to embed. file_list (list): List of file names to check for existing embeddings. embeddings_out (np.ndarray): Array to store the embeddings. embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No"). high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No"). Returns: np.ndarray: The generated or loaded embeddings. """ if high_quality_mode_opt == "Yes": # Define a list of possible local locations to search for the model local_embeddings_locations = [ "model/embed/", # Potential local location "/model/embed/", # Potential location in Docker container "/home/user/app/model/embed/" # This is inside a Docker container ] # Attempt to load the model from each local location for location in local_embeddings_locations: try: embedding_model = SentenceTransformer(location)#, truncate_dim=512) print(f"Found local model installation at: {location}") break # Exit the loop if the model is found except Exception as e: print(f"Failed to load model from {location}: {e}") continue else: # If the loop completes without finding the model in any local location embedding_model = SentenceTransformer(embeddings_name)#, truncate_dim=512) print("Could not find local model installation. Downloading from Huggingface") else: embedding_model = make_pipeline( TfidfVectorizer(), TruncatedSVD(100, random_state=random_seed) ) # If no embeddings found, make or load in if embeddings_out.size == 0: print("Embeddings not found. Loading or generating new ones.") embeddings_file_names = [string for string in file_list if "embedding" in string.lower()] if embeddings_file_names: embeddings_file_name = embeddings_file_names[0] print("Loading embeddings from file.") embeddings_out = np.load(embeddings_file_name)['arr_0'] # If embedding files have 'super_compress' in the title, they have been multiplied by 100 before save if "compress" in embeddings_file_name: embeddings_out /= 100 if not embeddings_file_names: tic = time.perf_counter() print("Starting to embed documents.") # Custom model # If on CPU, don't resort to embedding models if high_quality_mode_opt == "No": print("Creating simplified 'sparse' embeddings based on TfIDF") # Fit the pipeline to the text data embedding_model.fit(docs) # Transform text data to embeddings embeddings_out = embedding_model.transform(docs) elif high_quality_mode_opt == "Yes": print("Creating dense embeddings based on transformers model") # Convert model to half precision (fp16) embedding_model.half() embeddings_out = embedding_model.encode(sentences=docs, show_progress_bar = True, batch_size = 32)#, precision="int8") # For large toc = time.perf_counter() time_out = f"The embedding took {toc - tic:0.1f} seconds" print(time_out) # If the user has chosen to go with super compressed embedding files to save disk space if embeddings_super_compress == "Yes": embeddings_out = np.round(embeddings_out, 3) embeddings_out *= 100 return embeddings_out, embedding_model else: print("Found pre-loaded embeddings.") return embeddings_out, embedding_model