Misformed config when saving

#17
by Alicimo - opened

When saving the model (e.g., after additional training), the config_class is malformed which causes errors when loading again. Please find below a minimal example which shows alternative models working without errors.

from pathlib import Path
from shutil import rmtree

import torch
from sentence_transformers import SentenceTransformer

model_names = [
    "jinaai/jina-embeddings-v2-base-de",
    "mixedbread-ai/deepset-mxbai-embed-de-large-v1",
    "intfloat/multilingual-e5-base",
]

output_dir =  Path("tmp")
output_dir.mkdir(exist_ok=True)

for model_name in model_names:
    for f in output_dir.iterdir():
        rmtree(str(f))

    model = SentenceTransformer(model_name, trust_remote_code=True)
    try:
        for i in range(5):
            print(f"{model_name}: {i}")
            model.save(f"tmp/{i}")
            del(model)
            torch.cuda.empty_cache()
            model = SentenceTransformer(f"tmp/{i}", trust_remote_code=True)
    except ValueError as e:
        print("Error raised:", e)
        pass
jinaai/jina-embeddings-v2-base-de: 0
Error raised: The model class you are passing has a `config_class` attribute that is not consistent with the config class you passed (model has <class 'transformers_modules.jinaai.jina-bert-implementation.f3ec4cf7de7e561007f27c9efc7148b0bd713f81.configuration_bert.JinaBertConfig'> and you passed <class 'transformers_modules.0.configuration_bert.JinaBertConfig'>. Fix one of those so they match!
mixedbread-ai/deepset-mxbai-embed-de-large-v1: 0
mixedbread-ai/deepset-mxbai-embed-de-large-v1: 1
mixedbread-ai/deepset-mxbai-embed-de-large-v1: 2
mixedbread-ai/deepset-mxbai-embed-de-large-v1: 3
mixedbread-ai/deepset-mxbai-embed-de-large-v1: 4
intfloat/multilingual-e5-base: 0
intfloat/multilingual-e5-base: 1
intfloat/multilingual-e5-base: 2
intfloat/multilingual-e5-base: 3
intfloat/multilingual-e5-base: 4

Sign up or log in to comment