Spaces:

lamhieu
/

lightweight-embeddings

Running

App Files Files Community

lamhieu commited on Jan 2

Commit

65c747d

1 Parent(s): ea8754a

chore: support other models

Browse files

Files changed (3) hide show

lightweight_embeddings/__init__.py +32 -13
lightweight_embeddings/router.py +75 -168
lightweight_embeddings/service.py +214 -306

lightweight_embeddings/__init__.py CHANGED Viewed

@@ -14,13 +14,27 @@ Supported image model ID:
 - "google/siglip-base-patch16-256-multilingual"
 """
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
 import gradio as gr
 import requests
 import json
 from gradio.routes import mount_gradio_app
 # Application metadata
 __version__ = "1.0.0"
 __author__ = "lamhieu"
@@ -41,17 +55,18 @@ __metadata__ = {
 EMBEDDINGS_API_URL = "http://localhost:7860/v1/embeddings"
 # Markdown description for the main interface
-APP_DESCRIPTION = f"""\
 ## 🚀 **Lightweight Embeddings API**
 The **Lightweight Embeddings API** is a fast, free, and multilingual service designed for generating embeddings and reranking with support for both **text** and **image** inputs. Get started below by exploring our interactive playground or using the cURL examples provided.
----
-### 📦 Features
-- **Multilingual Support**: Process inputs in multiple languages.
-- **Versatile API**: Generate embeddings, perform ranking, and more.
-- **Developer-Friendly**: Quick to integrate with documentation and examples.
 ### 🔗 Links
 - [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
@@ -117,7 +132,11 @@ def create_main_interface():
     # Available model options for the dropdown
     model_options = [
         "multilingual-e5-small",
         "paraphrase-multilingual-MiniLM-L12-v2",
         "bge-m3",
         "google/siglip-base-patch16-256-multilingual",
     ]
@@ -167,7 +186,7 @@ def create_main_interface():
                     -H 'Content-Type: application/json' \\
                     -d '{
                     "model": "multilingual-e5-small",
-                    "input": "Translate this text into Spanish."
                   }'
                   ```
@@ -179,11 +198,11 @@ def create_main_interface():
                     -H 'Content-Type: application/json' \\
                     -d '{
                     "model": "multilingual-e5-small",
-                    "queries": "Find the best match for this query.",
                     "candidates": [
-                      "Candidate A",
-                      "Candidate B",
-                      "Candidate C"
                     ]
                   }'
                   ```

 - "google/siglip-base-patch16-256-multilingual"
 """
 import gradio as gr
 import requests
 import json
+import logging
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
 from gradio.routes import mount_gradio_app
+# Filter out /v1 requests from the access log
+class LogFilter(logging.Filter):
+    def filter(self, record):
+        if record.args and len(record.args) >= 3:
+            if "/v1" in str(record.args[2]):
+                return False
+        return True
+logger = logging.getLogger("uvicorn.access")
+logger.addFilter(LogFilter())
 # Application metadata
 __version__ = "1.0.0"
 __author__ = "lamhieu"
 EMBEDDINGS_API_URL = "http://localhost:7860/v1/embeddings"
 # Markdown description for the main interface
+APP_DESCRIPTION = f"""
+<br />
 ## 🚀 **Lightweight Embeddings API**
 The **Lightweight Embeddings API** is a fast, free, and multilingual service designed for generating embeddings and reranking with support for both **text** and **image** inputs. Get started below by exploring our interactive playground or using the cURL examples provided.
+### ✨ Key Features
+- **Free, Unlimited, and Multilingual**: A fully free API service with no usage limits, capable of processing text in over 100+ languages to support global applications seamlessly.
+- **Advanced Embedding and Reranking**: Generate high-quality text and image-text embeddings using state-of-the-art models, alongside robust reranking capabilities for enhanced results.
+- **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
+- **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
 ### 🔗 Links
 - [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
     # Available model options for the dropdown
     model_options = [
         "multilingual-e5-small",
+        "multilingual-e5-base",
+        "multilingual-e5-large",
+        "snowflake-arctic-embed-l-v2.0",
         "paraphrase-multilingual-MiniLM-L12-v2",
+        "paraphrase-multilingual-mpnet-base-v2",
         "bge-m3",
         "google/siglip-base-patch16-256-multilingual",
     ]
                     -H 'Content-Type: application/json' \\
                     -d '{
                     "model": "multilingual-e5-small",
+                    "input": "That is a happy person"
                   }'
                   ```
                     -H 'Content-Type: application/json' \\
                     -d '{
                     "model": "multilingual-e5-small",
+                    "queries": "That is a happy person",
                     "candidates": [
+                      "That is a happy dog",
+                      "That is a very happy person",
+                      "Today is a sunny day"
                     ]
                   }'
                   ```

lightweight_embeddings/router.py CHANGED Viewed

@@ -1,18 +1,16 @@
-# filename: router.py
 """
-FastAPI Router for Embeddings Service
-This file exposes the EmbeddingsService functionality via a RESTful API
-to generate embeddings and rank candidates.
 Supported Text Model IDs:
 - "multilingual-e5-small"
 - "paraphrase-multilingual-MiniLM-L12-v2"
 - "bge-m3"
-Supported Image Model ID:
-- "google/siglip-base-patch16-256-multilingual"
 """
 from __future__ import annotations
@@ -24,143 +22,87 @@ from enum import Enum
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
-from .service import ModelConfig, TextModelType, EmbeddingsService
 logger = logging.getLogger(__name__)
-# Initialize FastAPI router
 router = APIRouter(
     tags=["v1"],
     responses={404: {"description": "Not found"}},
 )
-class ModelType(str, Enum):
-    """
-    High-level distinction for text vs. image models.
-    """
     TEXT = "text"
     IMAGE = "image"
-def detect_model_type(model_id: str) -> ModelType:
     """
-    Detect whether the provided model ID is for text or image.
-    Supported text model IDs:
-      - "multilingual-e5-small"
-      - "paraphrase-multilingual-MiniLM-L12-v2"
-      - "bge-m3"
-    Supported image model ID:
-      - "google/siglip-base-patch16-256-multilingual"
-      (or any model containing "siglip" in its identifier).
-    Args:
-        model_id: String identifier of the model.
-    Returns:
-        ModelType.TEXT if it matches one of the recognized text model IDs,
-        ModelType.IMAGE if it matches (or contains "siglip").
-    Raises:
-        ValueError: If the model_id is not recognized as either text or image.
     """
-    # Gather all known text model IDs (from TextModelType enum)
-    text_model_ids = {m.value for m in TextModelType}
-    # Simple check: if it's in text_model_ids, it's text;
-    # if 'siglip' is in the model ID, it's recognized as an image model.
-    if model_id in text_model_ids:
-        return ModelType.TEXT
-    elif "siglip" in model_id.lower():
-        return ModelType.IMAGE
-    error_msg = (
-        f"Unsupported model ID: '{model_id}'.\n"
-        "Valid text model IDs are: "
-        "'multilingual-e5-small', 'paraphrase-multilingual-MiniLM-L12-v2', 'bge-m3'.\n"
-        "Valid image model ID contains 'siglip', for example: 'google/siglip-base-patch16-256-multilingual'."
-    )
-    raise ValueError(error_msg)
-# Pydantic Models for request/response
 class EmbeddingRequest(BaseModel):
     """
-    Request body for embedding creation.
-    Model IDs (text):
-      - "multilingual-e5-small"
-      - "paraphrase-multilingual-MiniLM-L12-v2"
-      - "bge-m3"
-    Model ID (image):
-      - "google/siglip-base-patch16-256-multilingual"
     """
     model: str = Field(
         default=TextModelType.MULTILINGUAL_E5_SMALL.value,
         description=(
-            "Model ID to use. Possible text models include: 'multilingual-e5-small', "
-            "'paraphrase-multilingual-MiniLM-L12-v2', 'bge-m3'. "
-            "For images, you can use: 'google/siglip-base-patch16-256-multilingual' "
-            "or any ID containing 'siglip'."
         ),
     )
     input: Union[str, List[str]] = Field(
-        ...,
-        description=(
-            "Input text(s) or image path(s)/URL(s). "
-            "Accepts a single string or a list of strings."
-        ),
     )
 class RankRequest(BaseModel):
     """
-    Request body for ranking candidates against queries.
-    Model IDs (text):
-      - "multilingual-e5-small"
-      - "paraphrase-multilingual-MiniLM-L12-v2"
-      - "bge-m3"
-    Model ID (image):
-      - "google/siglip-base-patch16-256-multilingual"
     """
     model: str = Field(
         default=TextModelType.MULTILINGUAL_E5_SMALL.value,
         description=(
-            "Model ID to use for the queries. Supported text models: "
-            "'multilingual-e5-small', 'paraphrase-multilingual-MiniLM-L12-v2', 'bge-m3'. "
-            "For image queries, use an ID containing 'siglip' such as 'google/siglip-base-patch16-256-multilingual'."
         ),
     )
     queries: Union[str, List[str]] = Field(
-        ...,
-        description=(
-            "Query input(s): can be text(s) or image path(s)/URL(s). "
-            "If using an image model, ensure your inputs reference valid image paths or URLs."
-        ),
     )
     candidates: List[str] = Field(
-        ...,
-        description=(
-            "List of candidate texts to rank against the given queries. "
-            "Currently, all candidates must be text."
-        ),
     )
 class EmbeddingResponse(BaseModel):
     """
-    Response structure for embedding creation.
     """
-    object: str = "list"
     data: List[dict]
     model: str
     usage: dict
@@ -168,14 +110,12 @@ class EmbeddingResponse(BaseModel):
 class RankResponse(BaseModel):
     """
-    Response structure for ranking results.
     """
     probabilities: List[List[float]]
     cosine_similarities: List[List[float]]
-# Initialize the service with default configuration
 service_config = ModelConfig()
 embeddings_service = EmbeddingsService(config=service_config)
@@ -183,114 +123,81 @@ embeddings_service = EmbeddingsService(config=service_config)
 @router.post("/embeddings", response_model=EmbeddingResponse, tags=["embeddings"])
 async def create_embeddings(request: EmbeddingRequest):
     """
-    Generate embeddings for the provided input text(s) or image(s).
-    Supported Model IDs for text:
-      - "multilingual-e5-small"
-      - "paraphrase-multilingual-MiniLM-L12-v2"
-      - "bge-m3"
-    Supported Model ID for image:
-      - "google/siglip-base-patch16-256-multilingual"
-    Steps:
-      1. Detects model type (text or image) based on the model ID.
-      2. Adjusts the service configuration accordingly.
-      3. Produces embeddings via the EmbeddingsService.
-      4. Returns embedding vectors along with usage information.
-    Raises:
-      HTTPException: For any errors during model detection or embedding generation.
     """
     try:
-        modality = detect_model_type(request.model)
-        # Adjust global config based on the detected modality
-        if modality == ModelType.TEXT:
             service_config.text_model_type = TextModelType(request.model)
         else:
-            service_config.image_model_id = request.model
-        # Generate embeddings asynchronously
         embeddings = await embeddings_service.generate_embeddings(
-            input_data=request.input, modality=modality.value
         )
-        # Estimate tokens only if it's text
         total_tokens = 0
-        if modality == ModelType.TEXT:
             total_tokens = embeddings_service.estimate_tokens(request.input)
-        return {
             "object": "list",
-            "data": [
-                {
-                    "object": "embedding",
-                    "index": idx,
-                    "embedding": emb.tolist(),
-                }
-                for idx, emb in enumerate(embeddings)
-            ],
             "model": request.model,
             "usage": {
                 "prompt_tokens": total_tokens,
                 "total_tokens": total_tokens,
             },
         }
     except Exception as e:
-        error_msg = (
-            "Failed to generate embeddings. Please verify your model ID, input data, and server logs.\n"
-            f"Error Details: {str(e)}"
         )
-        logger.error(error_msg)
-        raise HTTPException(status_code=500, detail=error_msg)
 @router.post("/rank", response_model=RankResponse, tags=["rank"])
 async def rank_candidates(request: RankRequest):
     """
-    Rank the given candidate texts against the provided queries.
-    Supported Model IDs for text queries:
-      - "multilingual-e5-small"
-      - "paraphrase-multilingual-MiniLM-L12-v2"
-      - "bge-m3"
-    Supported Model ID for image queries:
-      - "google/siglip-base-patch16-256-multilingual"
-    Steps:
-      1. Detects model type (text or image) based on the query model ID.
-      2. Adjusts the service configuration accordingly.
-      3. Generates embeddings for the queries (text or image).
-      4. Generates embeddings for the candidates (always text).
-      5. Computes cosine similarities and returns softmax-normalized probabilities.
-    Raises:
-      HTTPException: For any errors during model detection or ranking.
     """
     try:
-        modality = detect_model_type(request.model)
-        # Adjust global config based on the detected modality
-        if modality == ModelType.TEXT:
             service_config.text_model_type = TextModelType(request.model)
         else:
-            service_config.image_model_id = request.model
-        # Perform the ranking
         results = await embeddings_service.rank(
             queries=request.queries,
             candidates=request.candidates,
-            modality=modality.value,
         )
         return results
     except Exception as e:
-        error_msg = (
-            "Failed to rank candidates. Please verify your model ID, input data, and server logs.\n"
-            f"Error Details: {str(e)}"
         )
-        logger.error(error_msg)
-        raise HTTPException(status_code=500, detail=error_msg)

 """
+FastAPI Router for Embeddings Service (Revised & Simplified)
+Exposes the EmbeddingsService methods via a RESTful API.
 Supported Text Model IDs:
 - "multilingual-e5-small"
 - "paraphrase-multilingual-MiniLM-L12-v2"
 - "bge-m3"
+Supported Image Model IDs:
+- "siglip-base-patch16-256-multilingual"
+(Extend as needed)
 """
 from __future__ import annotations
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
+from .service import (
+    ModelConfig,
+    TextModelType,
+    ImageModelType,
+    EmbeddingsService,
+)
 logger = logging.getLogger(__name__)
 router = APIRouter(
     tags=["v1"],
     responses={404: {"description": "Not found"}},
 )
+class ModelKind(str, Enum):
     TEXT = "text"
     IMAGE = "image"
+def detect_model_kind(model_id: str) -> ModelKind:
     """
+    Detect whether model_id is for a text or an image model.
+    Raises ValueError if unrecognized.
     """
+    if model_id in [m.value for m in TextModelType]:
+        return ModelKind.TEXT
+    elif model_id in [m.value for m in ImageModelType]:
+        return ModelKind.IMAGE
+    else:
+        raise ValueError(
+            f"Unrecognized model ID: {model_id}.\n"
+            f"Valid text: {[m.value for m in TextModelType]}\n"
+            f"Valid image: {[m.value for m in ImageModelType]}"
+        )
 class EmbeddingRequest(BaseModel):
     """
+    Input to /v1/embeddings
     """
     model: str = Field(
         default=TextModelType.MULTILINGUAL_E5_SMALL.value,
         description=(
+            "Which model ID to use? "
+            "Text: ['multilingual-e5-small', 'multilingual-e5-base', 'multilingual-e5-large', 'snowflake-arctic-embed-l-v2.0', 'paraphrase-multilingual-MiniLM-L12-v2', 'paraphrase-multilingual-mpnet-base-v2', 'bge-m3']. "
+            "Image: ['siglip-base-patch16-256-multilingual']."
         ),
     )
     input: Union[str, List[str]] = Field(
+        ..., description="Text(s) or Image URL(s)/path(s)."
     )
 class RankRequest(BaseModel):
     """
+    Input to /v1/rank
     """
     model: str = Field(
         default=TextModelType.MULTILINGUAL_E5_SMALL.value,
         description=(
+            "Model ID for the queries. "
+            "Text or Image model, e.g. 'siglip-base-patch16-256-multilingual' for images."
         ),
     )
     queries: Union[str, List[str]] = Field(
+        ..., description="Query text or image(s) depending on the model type."
     )
     candidates: List[str] = Field(
+        ..., description="Candidate texts to rank. Must be text."
     )
 class EmbeddingResponse(BaseModel):
     """
+    Response of /v1/embeddings
     """
+    object: str
     data: List[dict]
     model: str
     usage: dict
 class RankResponse(BaseModel):
     """
+    Response of /v1/rank
     """
     probabilities: List[List[float]]
     cosine_similarities: List[List[float]]
 service_config = ModelConfig()
 embeddings_service = EmbeddingsService(config=service_config)
 @router.post("/embeddings", response_model=EmbeddingResponse, tags=["embeddings"])
 async def create_embeddings(request: EmbeddingRequest):
     """
+    Generates embeddings for the given input (text or image).
     """
     try:
+        # 1) Determine if it's text or image
+        mkind = detect_model_kind(request.model)
+        # 2) Update global service config so it uses the correct model
+        if mkind == ModelKind.TEXT:
             service_config.text_model_type = TextModelType(request.model)
         else:
+            service_config.image_model_type = ImageModelType(request.model)
+        # 3) Generate
         embeddings = await embeddings_service.generate_embeddings(
+            input_data=request.input, modality=mkind.value
         )
+        # 4) Estimate tokens for text only
         total_tokens = 0
+        if mkind == ModelKind.TEXT:
             total_tokens = embeddings_service.estimate_tokens(request.input)
+        resp = {
             "object": "list",
+            "data": [],
             "model": request.model,
             "usage": {
                 "prompt_tokens": total_tokens,
                 "total_tokens": total_tokens,
             },
         }
+        for idx, emb in enumerate(embeddings):
+            resp["data"].append(
+                {
+                    "object": "embedding",
+                    "index": idx,
+                    "embedding": emb.tolist(),
+                }
+            )
+        return resp
     except Exception as e:
+        msg = (
+            "Failed to generate embeddings. Check model ID, inputs, etc.\n"
+            f"Details: {str(e)}"
         )
+        logger.error(msg)
+        raise HTTPException(status_code=500, detail=msg)
 @router.post("/rank", response_model=RankResponse, tags=["rank"])
 async def rank_candidates(request: RankRequest):
     """
+    Ranks candidate texts against the given queries (which can be text or image).
     """
     try:
+        mkind = detect_model_kind(request.model)
+        if mkind == ModelKind.TEXT:
             service_config.text_model_type = TextModelType(request.model)
         else:
+            service_config.image_model_type = ImageModelType(request.model)
         results = await embeddings_service.rank(
             queries=request.queries,
             candidates=request.candidates,
+            modality=mkind.value,
         )
         return results
     except Exception as e:
+        msg = (
+            "Failed to rank candidates. Check model ID, inputs, etc.\n"
+            f"Details: {str(e)}"
         )
+        logger.error(msg)
+        raise HTTPException(status_code=500, detail=msg)

lightweight_embeddings/service.py CHANGED Viewed

@@ -1,12 +1,10 @@
-# filename: service.py
 """
-Lightweight Embeddings Service Module
 This module provides a service for generating and comparing embeddings from text and images
 using state-of-the-art transformer models. It supports both CPU and GPU inference.
-Key Features:
 - Text and image embedding generation
 - Cross-modal similarity ranking
 - Batch processing support
@@ -17,8 +15,8 @@ Supported Text Model IDs:
 - "paraphrase-multilingual-MiniLM-L12-v2"
 - "bge-m3"
-Supported Image Model ID (default):
-- "google/siglip-base-patch16-256-multilingual"
 """
 from __future__ import annotations
@@ -37,441 +35,351 @@ from PIL import Image
 from sentence_transformers import SentenceTransformer
 from transformers import AutoProcessor, AutoModel
-# Configure logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
-# Default Model IDs
-TEXT_MODEL_ID = "Xenova/multilingual-e5-small"
-IMAGE_MODEL_ID = "google/siglip-base-patch16-256-multilingual"
 class TextModelType(str, Enum):
     """
     Enumeration of supported text models.
-    Please ensure the ONNX files and Hugging Face model IDs are consistent
-    with your local or remote environment.
     """
     MULTILINGUAL_E5_SMALL = "multilingual-e5-small"
     PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
     BGE_M3 = "bge-m3"
 class ModelInfo(NamedTuple):
     """
-    Simple container for mapping a given text model type
-    to its Hugging Face model repository and the local ONNX file path.
     """
     model_id: str
-    onnx_file: str
 @dataclass
 class ModelConfig:
     """
-    Configuration settings for model providers, backends, and defaults.
     """
-    provider: str = "CPUExecutionProvider"
-    backend: str = "onnx"
-    logit_scale: float = 4.60517
     text_model_type: TextModelType = TextModelType.MULTILINGUAL_E5_SMALL
-    image_model_id: str = IMAGE_MODEL_ID
     @property
     def text_model_info(self) -> ModelInfo:
         """
-        Retrieves the ModelInfo for the currently selected text_model_type.
         """
-        model_configs = {
             TextModelType.MULTILINGUAL_E5_SMALL: ModelInfo(
-                "Xenova/multilingual-e5-small",
-                "onnx/model_quantized.onnx",
             ),
             TextModelType.PARAPHRASE_MULTILINGUAL_MINILM_L12_V2: ModelInfo(
-                "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
-                "onnx/model_quint8_avx2.onnx",
             ),
             TextModelType.BGE_M3: ModelInfo(
-                "BAAI/bge-m3",
-                "model.onnx",
             ),
         }
-        return model_configs[self.text_model_type]
 class EmbeddingsService:
     """
-    Service for generating and comparing text/image embeddings.
-    This service supports multiple text models and a single image model.
-    It provides methods for:
-        - Generating text embeddings
-        - Generating image embeddings
-        - Ranking candidates by similarity
     """
-    def __init__(self, config: Optional[ModelConfig] = None) -> None:
-        """
-        Initialize the EmbeddingsService.
-        Args:
-            config: Optional ModelConfig object to override default settings.
-        """
-        # Determine whether GPU (CUDA) is available
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        # Use the provided config or fall back to defaults
         self.config = config or ModelConfig()
-        # Dictionary to hold multiple text models
         self.text_models: Dict[TextModelType, SentenceTransformer] = {}
-        # Load all models (text + image) into memory
-        self._load_models()
-    def _load_models(self) -> None:
         """
-        Load text and image models into memory.
-        This pre-loads all text models defined in the TextModelType enum
-        and a single image model, enabling quick switching at runtime.
         """
         try:
-            # Load all text models
-            for model_type in TextModelType:
-                model_info = ModelConfig(text_model_type=model_type).text_model_info
-                logger.info(f"Loading text model: {model_info.model_id}")
-                self.text_models[model_type] = SentenceTransformer(
-                    model_info.model_id,
-                    device=self.device,
-                    backend=self.config.backend,
-                    model_kwargs={
-                        "provider": self.config.provider,
-                        "file_name": model_info.onnx_file,
-                    },
-                )
-            logger.info(f"Loading image model: {self.config.image_model_id}")
-            self.image_model = AutoModel.from_pretrained(self.config.image_model_id).to(
-                self.device
-            )
-            self.image_processor = AutoProcessor.from_pretrained(
-                self.config.image_model_id
-            )
-            logger.info(f"All models loaded successfully on {self.device}.")
         except Exception as e:
-            logger.error(
-                "Model loading failed. Please ensure you have valid model IDs and local files.\n"
-                f"Error details: {str(e)}"
-            )
-            raise RuntimeError(f"Failed to load models: {str(e)}") from e
     @staticmethod
     def _validate_text_input(input_text: Union[str, List[str]]) -> List[str]:
         """
-        Validate and standardize the input for text embeddings.
-        Args:
-            input_text: Either a single string or a list of strings.
-        Returns:
-            A list of strings to process.
-        Raises:
-            ValueError: If input_text is empty or not string-based.
         """
         if isinstance(input_text, str):
             return [input_text]
         if not isinstance(input_text, list) or not all(
             isinstance(x, str) for x in input_text
         ):
-            raise ValueError(
-                "Text input must be a single string or a list of strings. "
-                "Found a different data type instead."
-            )
-        if not input_text:
             raise ValueError("Text input list cannot be empty.")
         return input_text
     @staticmethod
     def _validate_modality(modality: str) -> None:
-        """
-        Validate the input modality.
-        Args:
-            modality: Must be either 'text' or 'image'.
-        Raises:
-            ValueError: If modality is neither 'text' nor 'image'.
         """
-        if modality not in ["text", "image"]:
-            raise ValueError(
-                "Invalid modality. Please specify 'text' or 'image' for embeddings."
-            )
-    def _process_image(self, image_path: Union[str, Path]) -> torch.Tensor:
-        """
-        Load and preprocess an image from either a local path or a URL.
-        Args:
-            image_path: Path to the local image file or a URL.
-        Returns:
-            Torch Tensor suitable for model input.
-        Raises:
-            ValueError: If the image file or URL cannot be loaded.
         """
         try:
-            if str(image_path).startswith("http"):
-                response = requests.get(image_path, timeout=10)
-                response.raise_for_status()
-                image_content = BytesIO(response.content)
             else:
-                image_content = image_path
-            image = Image.open(image_content).convert("RGB")
-            processed = self.image_processor(images=image, return_tensors="pt").to(
-                self.device
-            )
-            return processed
         except Exception as e:
-            raise ValueError(
-                f"Failed to process image at '{image_path}'. Check the path/URL and file format.\n"
-                f"Details: {str(e)}"
-            ) from e
     def _generate_text_embeddings(self, texts: List[str]) -> np.ndarray:
         """
-        Helper method to generate text embeddings for a list of texts
-        using the currently configured text model.
-        Args:
-            texts: A list of text strings.
-        Returns:
-            Numpy array of shape (num_texts, embedding_dim).
-        Raises:
-            RuntimeError: If the text model fails to generate embeddings.
         """
         try:
-            logger.info(
-                f"Generating embeddings for {len(texts)} text items using model: "
-                f"{self.config.text_model_type}"
-            )
-            # Select the preloaded text model based on the current config
             model = self.text_models[self.config.text_model_type]
-            embeddings = model.encode(texts)
             return embeddings
         except Exception as e:
-            error_msg = (
-                f"Error generating text embeddings with model: {self.config.text_model_type}. "
-                f"Details: {str(e)}"
-            )
-            logger.error(error_msg)
-            raise RuntimeError(error_msg) from e
     def _generate_image_embeddings(
-        self, input_data: Union[str, List[str]], batch_size: Optional[int]
     ) -> np.ndarray:
         """
-        Helper method to generate image embeddings.
-        Args:
-            input_data: Either a single image path/URL or a list of them.
-            batch_size: Batch size for processing images in chunks.
-                        If None, process all at once.
-        Returns:
-            Numpy array of shape (num_images, embedding_dim).
-        Raises:
-            RuntimeError: If the image model fails to generate embeddings.
         """
         try:
-            if isinstance(input_data, str):
-                # Single image scenario
-                processed = self._process_image(input_data)
                 with torch.no_grad():
-                    embedding = self.image_model.get_image_features(**processed)
-                return embedding.cpu().numpy()
-            # Multiple images scenario
-            logger.info(f"Generating embeddings for {len(input_data)} images.")
             if batch_size is None:
-                # Process all images at once
-                processed_batches = [
-                    self._process_image(img_path) for img_path in input_data
-                ]
                 with torch.no_grad():
-                    # Concatenate all images along the batch dimension
-                    batch_keys = processed_batches[0].keys()
-                    concatenated = {
-                        k: torch.cat([pb[k] for pb in processed_batches], dim=0)
-                        for k in batch_keys
-                    }
-                    embedding = self.image_model.get_image_features(**concatenated)
-                return embedding.cpu().numpy()
-            # Process images in smaller batches
-            embeddings_list = []
-            for i, img_path in enumerate(input_data):
-                if i % batch_size == 0:
-                    logger.debug(
-                        f"Processing image batch {i // batch_size + 1} with size up to {batch_size}."
-                    )
-                processed = self._process_image(img_path)
                 with torch.no_grad():
-                    embedding = self.image_model.get_image_features(**processed)
-                embeddings_list.append(embedding.cpu().numpy())
-            return np.vstack(embeddings_list)
         except Exception as e:
-            error_msg = (
-                f"Error generating image embeddings with model: {self.config.image_model_id}. "
-                f"Details: {str(e)}"
-            )
-            logger.error(error_msg)
-            raise RuntimeError(error_msg) from e
     async def generate_embeddings(
         self,
         input_data: Union[str, List[str]],
-        modality: Literal["text", "image"] = "text",
         batch_size: Optional[int] = None,
     ) -> np.ndarray:
         """
-        Asynchronously generate embeddings for text or image inputs.
-        Args:
-            input_data: A string or list of strings (text/image paths/URLs).
-            modality: "text" for text data or "image" for image data.
-            batch_size: Optional batch size for processing images in chunks.
-        Returns:
-            Numpy array of embeddings.
-        Raises:
-            ValueError: If the modality is invalid.
         """
         self._validate_modality(modality)
         if modality == "text":
-            texts = self._validate_text_input(input_data)
-            return self._generate_text_embeddings(texts)
         else:
-            return self._generate_image_embeddings(input_data, batch_size)
     async def rank(
         self,
         queries: Union[str, List[str]],
         candidates: List[str],
-        modality: Literal["text", "image"] = "text",
         batch_size: Optional[int] = None,
     ) -> Dict[str, List[List[float]]]:
         """
-        Rank a set of candidate texts against one or more queries using cosine similarity
-        and a softmax to produce probability-like scores.
-        Args:
-            queries: Query text(s) or image path(s)/URL(s).
-            candidates: Candidate texts to be ranked.
-                        (Note: This implementation always treats candidates as text.)
-            modality: "text" for text queries or "image" for image queries.
-            batch_size: Batch size if images are processed in chunks.
-        Returns:
-            Dictionary containing:
-                - "probabilities": 2D list of softmax-normalized scores.
-                - "cosine_similarities": 2D list of raw cosine similarity values.
-        Raises:
-            RuntimeError: If the query or candidate embeddings fail to generate.
         """
-        logger.info(
-            f"Ranking {len(candidates)} candidates against "
-            f"{len(queries) if isinstance(queries, list) else 1} query item(s)."
-        )
-        # Generate embeddings for queries
-        query_embeds = await self.generate_embeddings(
-            queries, modality=modality, batch_size=batch_size
-        )
-        # Generate embeddings for candidates (always text)
-        candidate_embeds = await self.generate_embeddings(
-            candidates, modality="text", batch_size=batch_size
-        )
-        # Compute cosine similarity and scaled probabilities
-        cosine_sims = self.cosine_similarity(query_embeds, candidate_embeds)
-        logit_scale = np.exp(self.config.logit_scale)
-        probabilities = self.softmax(logit_scale * cosine_sims)
         return {
-            "probabilities": probabilities.tolist(),
-            "cosine_similarities": cosine_sims.tolist(),
         }
     def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
         """
-        Roughly estimate the total number of tokens in the given text(s).
-        Args:
-            input_data: A string or list of strings representing text input.
-        Returns:
-            Estimated token count (int).
-        Raises:
-            ValueError: If the input is not valid text data.
         """
         texts = self._validate_text_input(input_data)
-        # Very rough approximation: assume ~4 characters per token
         total_chars = sum(len(t) for t in texts)
         return max(1, round(total_chars / 4))
     @staticmethod
     def softmax(scores: np.ndarray) -> np.ndarray:
         """
-        Apply softmax along the last dimension of the scores array.
-        Args:
-            scores: Numpy array of shape (..., num_candidates).
-        Returns:
-            Numpy array of softmax-normalized values, same shape as scores.
         """
-        exp_scores = np.exp(scores - np.max(scores, axis=-1, keepdims=True))
-        return exp_scores / np.sum(exp_scores, axis=-1, keepdims=True)
     @staticmethod
-    def cosine_similarity(
-        query_embeds: np.ndarray, candidate_embeds: np.ndarray
-    ) -> np.ndarray:
         """
-        Compute the cosine similarity between two sets of vectors.
-        Args:
-            query_embeds: Numpy array of shape (num_queries, embed_dim).
-            candidate_embeds: Numpy array of shape (num_candidates, embed_dim).
-        Returns:
-            2D Numpy array of shape (num_queries, num_candidates)
-            containing cosine similarity scores.
         """
-        # Normalize embeddings
-        query_norm = query_embeds / np.linalg.norm(query_embeds, axis=1, keepdims=True)
-        candidate_norm = candidate_embeds / np.linalg.norm(
-            candidate_embeds, axis=1, keepdims=True
-        )
-        return np.dot(query_norm, candidate_norm.T)

 """
+Lightweight Embeddings Service Module (Revised & Simplified)
 This module provides a service for generating and comparing embeddings from text and images
 using state-of-the-art transformer models. It supports both CPU and GPU inference.
+Features:
 - Text and image embedding generation
 - Cross-modal similarity ranking
 - Batch processing support
 - "paraphrase-multilingual-MiniLM-L12-v2"
 - "bge-m3"
+Supported Image Model IDs:
+- "google/siglip-base-patch16-256-multilingual" (default, but extensible)
 """
 from __future__ import annotations
 from sentence_transformers import SentenceTransformer
 from transformers import AutoProcessor, AutoModel
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 class TextModelType(str, Enum):
     """
     Enumeration of supported text models.
+    Adjust as needed for your environment.
     """
     MULTILINGUAL_E5_SMALL = "multilingual-e5-small"
+    MULTILINGUAL_E5_BASE = "multilingual-e5-base"
+    MULTILINGUAL_E5_LARGE = "multilingual-e5-large"
+    SNOWFLAKE_ARCTIC_EMBED_L_V2 = "snowflake-arctic-embed-l-v2.0"
     PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
+    PARAPHRASE_MULTILINGUAL_MPNET_BASE_V2 = "paraphrase-multilingual-mpnet-base-v2"
     BGE_M3 = "bge-m3"
+class ImageModelType(str, Enum):
+    """
+    Enumeration of supported image models.
+    """
+    SIGLIP_BASE_PATCH16_256_MULTILINGUAL = "siglip-base-patch16-256-multilingual"
 class ModelInfo(NamedTuple):
     """
+    Simple container that maps an enum to:
+      - model_id: Hugging Face model ID (or local path)
+      - onnx_file: Path to ONNX file (if available)
     """
     model_id: str
+    onnx_file: Optional[str] = None
 @dataclass
 class ModelConfig:
     """
+    Configuration for text and image models.
     """
     text_model_type: TextModelType = TextModelType.MULTILINGUAL_E5_SMALL
+    image_model_type: ImageModelType = (
+        ImageModelType.SIGLIP_BASE_PATCH16_256_MULTILINGUAL
+    )
+    # If you need extra parameters like `logit_scale`, etc., keep them here
+    logit_scale: float = 4.60517
     @property
     def text_model_info(self) -> ModelInfo:
         """
+        Return ModelInfo for the configured text_model_type.
         """
+        text_configs = {
             TextModelType.MULTILINGUAL_E5_SMALL: ModelInfo(
+                model_id="Xenova/multilingual-e5-small",
+                onnx_file="onnx/model_quantized.onnx",
+            ),
+            TextModelType.MULTILINGUAL_E5_BASE: ModelInfo(
+                model_id="Xenova/multilingual-e5-base",
+                onnx_file="onnx/model_quantized.onnx",
+            ),
+            TextModelType.MULTILINGUAL_E5_LARGE: ModelInfo(
+                model_id="Xenova/multilingual-e5-large",
+                onnx_file="onnx/model_quantized.onnx",
+            ),
+            TextModelType.SNOWFLAKE_ARCTIC_EMBED_L_V2: ModelInfo(
+                model_id="Snowflake/snowflake-arctic-embed-l-v2.0",
+                onnx_file="onnx/model_quantized.onnx",
             ),
             TextModelType.PARAPHRASE_MULTILINGUAL_MINILM_L12_V2: ModelInfo(
+                model_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+                onnx_file="onnx/model_quint8_avx2.onnx",
+            ),
+            TextModelType.PARAPHRASE_MULTILINGUAL_MPNET_BASE_V2: ModelInfo(
+                model_id="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
+                onnx_file="onnx/model_quint8_avx2.onnx",
             ),
             TextModelType.BGE_M3: ModelInfo(
+                model_id="BAAI/bge-m3",
+                onnx_file="onnx/model.onnx",
             ),
         }
+        return text_configs[self.text_model_type]
+    @property
+    def image_model_info(self) -> ModelInfo:
+        """
+        Return ModelInfo for the configured image_model_type.
+        """
+        image_configs = {
+            ImageModelType.SIGLIP_BASE_PATCH16_256_MULTILINGUAL: ModelInfo(
+                model_id="google/siglip-base-patch16-256-multilingual"
+            ),
+        }
+        return image_configs[self.image_model_type]
 class EmbeddingsService:
     """
+    Service for generating text/image embeddings and performing ranking.
     """
+    def __init__(self, config: Optional[ModelConfig] = None):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()
+        # Preloaded text & image models
         self.text_models: Dict[TextModelType, SentenceTransformer] = {}
+        self.image_models: Dict[ImageModelType, AutoModel] = {}
+        self.image_processors: Dict[ImageModelType, AutoProcessor] = {}
+        # Load all models
+        self._load_all_models()
+    def _load_all_models(self) -> None:
         """
+        Pre-load all known text and image models for quick switching.
         """
         try:
+            for t_model_type in TextModelType:
+                info = ModelConfig(text_model_type=t_model_type).text_model_info
+                logger.info("Loading text model: %s", info.model_id)
+                # If you have an ONNX file AND your SentenceTransformer supports ONNX
+                if info.onnx_file:
+                    logger.info("Using ONNX file: %s", info.onnx_file)
+                    # The following 'backend' & 'model_kwargs' parameters
+                    # are recognized only in special/certain distributions of SentenceTransformer
+                    self.text_models[t_model_type] = SentenceTransformer(
+                        info.model_id,
+                        device=self.device,
+                        backend="onnx",  # or "ort" in some custom forks
+                        model_kwargs={
+                            "provider": "CPUExecutionProvider",  # or "CUDAExecutionProvider"
+                            "file_name": info.onnx_file,
+                        },
+                    )
+                else:
+                    # Fallback: standard HF loading
+                    self.text_models[t_model_type] = SentenceTransformer(
+                        info.model_id, device=self.device
+                    )
+            for i_model_type in ImageModelType:
+                model_id = ModelConfig(
+                    image_model_type=i_model_type
+                ).image_model_info.model_id
+                logger.info("Loading image model: %s", model_id)
+                # Typically, for CLIP-like models:
+                model = AutoModel.from_pretrained(model_id).to(self.device)
+                processor = AutoProcessor.from_pretrained(model_id)
+                self.image_models[i_model_type] = model
+                self.image_processors[i_model_type] = processor
+            logger.info("All models loaded successfully.")
         except Exception as e:
+            msg = f"Error loading models: {str(e)}"
+            logger.error(msg)
+            raise RuntimeError(msg) from e
     @staticmethod
     def _validate_text_input(input_text: Union[str, List[str]]) -> List[str]:
         """
+        Ensure input_text is a non-empty string or list of strings.
         """
         if isinstance(input_text, str):
+            if not input_text.strip():
+                raise ValueError("Text input cannot be empty.")
             return [input_text]
         if not isinstance(input_text, list) or not all(
             isinstance(x, str) for x in input_text
         ):
+            raise ValueError("Text input must be a string or a list of strings.")
+        if len(input_text) == 0:
             raise ValueError("Text input list cannot be empty.")
         return input_text
     @staticmethod
     def _validate_modality(modality: str) -> None:
+        if modality not in ("text", "image"):
+            raise ValueError("Unsupported modality. Must be 'text' or 'image'.")
+    def _process_image(self, path_or_url: Union[str, Path]) -> torch.Tensor:
         """
+        Download/Load image from path/URL and apply transformations.
         """
         try:
+            if isinstance(path_or_url, Path) or not path_or_url.startswith("http"):
+                # Local file path
+                img = Image.open(path_or_url).convert("RGB")
             else:
+                # URL
+                resp = requests.get(path_or_url, timeout=10)
+                resp.raise_for_status()
+                img = Image.open(BytesIO(resp.content)).convert("RGB")
+            proc = self.image_processors[self.config.image_model_type]
+            data = proc(images=img, return_tensors="pt").to(self.device)
+            return data
         except Exception as e:
+            raise ValueError(f"Error processing image '{path_or_url}': {str(e)}") from e
     def _generate_text_embeddings(self, texts: List[str]) -> np.ndarray:
         """
+        Generate text embeddings using the currently configured text model.
         """
         try:
             model = self.text_models[self.config.text_model_type]
+            embeddings = model.encode(texts)  # shape: (num_items, emb_dim)
             return embeddings
         except Exception as e:
+            raise RuntimeError(
+                f"Error generating text embeddings for model '{self.config.text_model_type}': {e}"
+            ) from e
     def _generate_image_embeddings(
+        self,
+        images: Union[str, List[str]],
+        batch_size: Optional[int] = None,
     ) -> np.ndarray:
         """
+        Generate image embeddings using the currently configured image model.
+        If `batch_size` is None, all images are processed at once.
         """
         try:
+            model = self.image_models[self.config.image_model_type]
+            # Single image
+            if isinstance(images, str):
+                processed = self._process_image(images)
                 with torch.no_grad():
+                    emb = model.get_image_features(**processed)
+                return emb.cpu().numpy()
+            # Multiple images
             if batch_size is None:
+                # Process them all in one batch
+                tensors = []
+                for img_path in images:
+                    tensors.append(self._process_image(img_path))
+                # Concatenate
+                keys = tensors[0].keys()
+                combined = {k: torch.cat([t[k] for t in tensors], dim=0) for k in keys}
                 with torch.no_grad():
+                    emb = model.get_image_features(**combined)
+                return emb.cpu().numpy()
+            # Process in smaller batches
+            all_embeddings = []
+            for i in range(0, len(images), batch_size):
+                batch_images = images[i : i + batch_size]
+                # Process each sub-batch
+                tensors = []
+                for img_path in batch_images:
+                    tensors.append(self._process_image(img_path))
+                keys = tensors[0].keys()
+                combined = {k: torch.cat([t[k] for t in tensors], dim=0) for k in keys}
                 with torch.no_grad():
+                    emb = model.get_image_features(**combined)
+                all_embeddings.append(emb.cpu().numpy())
+            return np.vstack(all_embeddings)
         except Exception as e:
+            raise RuntimeError(
+                f"Error generating image embeddings for model '{self.config.image_model_type}': {e}"
+            ) from e
     async def generate_embeddings(
         self,
         input_data: Union[str, List[str]],
+        modality: Literal["text", "image"],
         batch_size: Optional[int] = None,
     ) -> np.ndarray:
         """
+        Asynchronously generate embeddings for text or image.
         """
         self._validate_modality(modality)
         if modality == "text":
+            text_list = self._validate_text_input(input_data)
+            return self._generate_text_embeddings(text_list)
         else:
+            return self._generate_image_embeddings(input_data, batch_size=batch_size)
     async def rank(
         self,
         queries: Union[str, List[str]],
         candidates: List[str],
+        modality: Literal["text", "image"],
         batch_size: Optional[int] = None,
     ) -> Dict[str, List[List[float]]]:
         """
+        Rank candidates (always text) against the queries, which may be text or image.
+        Returns dict of { probabilities, cosine_similarities }.
         """
+        # 1) Generate embeddings for queries
+        query_embeds = await self.generate_embeddings(queries, modality, batch_size)
+        # 2) Generate embeddings for text candidates
+        candidate_embeds = await self.generate_embeddings(candidates, "text")
+        # 3) Compute cosine sim
+        sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
+        # 4) Apply logit scale + softmax
+        scaled = np.exp(self.config.logit_scale) * sim_matrix
+        probs = self.softmax(scaled)
         return {
+            "probabilities": probs.tolist(),
+            "cosine_similarities": sim_matrix.tolist(),
         }
     def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
         """
+        Very rough heuristic: ~4 chars per token.
         """
         texts = self._validate_text_input(input_data)
         total_chars = sum(len(t) for t in texts)
         return max(1, round(total_chars / 4))
     @staticmethod
     def softmax(scores: np.ndarray) -> np.ndarray:
         """
+        Standard softmax along the last dimension.
         """
+        exps = np.exp(scores - np.max(scores, axis=-1, keepdims=True))
+        return exps / np.sum(exps, axis=-1, keepdims=True)
     @staticmethod
+    def cosine_similarity(a: np.ndarray, b: np.ndarray) -> np.ndarray:
         """
+        a: (N, D)
+        b: (M, D)
+        Return: (N, M) of cos sim
         """
+        a_norm = a / (np.linalg.norm(a, axis=1, keepdims=True) + 1e-9)
+        b_norm = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-9)
+        return np.dot(a_norm, b_norm.T)