|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from sentence_transformers import SentenceTransformer |
|
import torch |
|
import logging |
|
from config.config import settings |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
class ModelService: |
|
_instance = None |
|
|
|
def __new__(cls): |
|
if cls._instance is None: |
|
cls._instance = super().__new__(cls) |
|
cls._instance._initialized = False |
|
return cls._instance |
|
|
|
def __init__(self): |
|
if not self._initialized: |
|
self._initialized = True |
|
self.tokenizer = None |
|
self.model = None |
|
self.embedder = None |
|
self._load_models() |
|
|
|
def _load_models(self): |
|
try: |
|
logger.info("Loading models...") |
|
|
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME, use_fast=False) |
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
|
logger.info(f"Tokenizer for {settings.MODEL_NAME} loaded successfully.") |
|
|
|
|
|
quantization_device = settings.DEVICE |
|
quantization_bits = settings.QUANTIZATION_BITS |
|
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
settings.MODEL_NAME, |
|
torch_dtype=torch.float16 if quantization_device == "cuda" else torch.float32, |
|
device_map="auto" if quantization_device == "cuda" else None, |
|
|
|
trust_remote_code=True |
|
) |
|
logger.info(f"Model {settings.MODEL_NAME} loaded successfully on {quantization_device}.") |
|
|
|
|
|
self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL, device='cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
|
|
logger.info(f"Embedder {settings.EMBEDDER_MODEL} loaded successfully.") |
|
|
|
except Exception as e: |
|
logger.error(f"Error loading models: {e}") |
|
raise RuntimeError(f"Failed to initialize ModelService: {str(e)}") |
|
|
|
def get_models(self): |
|
""" |
|
Returns the tokenizer, language model, and sentence embedder instances. |
|
""" |
|
if not self.tokenizer or not self.model or not self.embedder: |
|
raise RuntimeError("Models are not fully loaded.") |
|
return self.tokenizer, self.model, self.embedder |
|
|