Spaces:

lamhieu
/

lightweight-embeddings

Running

App Files Files Community

lamhieu commited on Jan 3

Commit

073aa83

1 Parent(s): ae67e3d

chore: update something

Browse files

Files changed (4) hide show

lightweight_embeddings/__init__.py +7 -1
lightweight_embeddings/analytics.py +52 -15
lightweight_embeddings/router.py +30 -12
lightweight_embeddings/service.py +26 -12

lightweight_embeddings/__init__.py CHANGED Viewed

@@ -72,10 +72,16 @@ The **Lightweight Embeddings API** is a fast, free, and multilingual service des
 - **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
 - **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
 ### 🔗 Links
 - [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
 """
 # Initialize FastAPI application
 app = FastAPI(
     title="Lightweight Embeddings API",
@@ -159,7 +165,7 @@ def create_main_interface():
     ]
     with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
-        # ...existing code...
         with gr.Row():
             with gr.Column():
                 gr.Markdown("### 🔬 Try the Embeddings Playground")

 - **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
 - **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
+### 🔒 Privacy and Data Transparency
+- **Minimal Data Collection**: This API respects your privacy by design. It does not store or log any input data (text or images) provided by users. Only anonymous usage counts (IDs for statistical purposes) are recorded to monitor and improve the service.
+- **Open Source and Transparent**: The API is fully open source, ensuring transparency and allowing users to inspect the code for themselves. This guarantees trust and confidence in how the service handles your data.
 ### 🔗 Links
 - [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
 """
 # Initialize FastAPI application
 app = FastAPI(
     title="Lightweight Embeddings API",
     ]
     with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
+        gr.Markdown(APP_DESCRIPTION)
         with gr.Row():
             with gr.Column():
                 gr.Markdown("### 🔬 Try the Embeddings Playground")

lightweight_embeddings/analytics.py CHANGED Viewed

@@ -16,9 +16,14 @@ class Analytics:
         """
         self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
         self.redis_client = redis.Redis(connection_pool=self.pool)
-        self.local_buffer = defaultdict(
-            lambda: defaultdict(int)
-        )  # {period: {model_id: count}}
         self.sync_interval = sync_interval
         self.lock = asyncio.Lock()  # Async lock for thread-safe updates
         asyncio.create_task(self._start_sync_task())
@@ -34,26 +39,48 @@ class Analytics:
         year_key = now.strftime("%Y")
         return day_key, week_key, month_key, year_key
-    async def access(self, model_id: str):
         """
-        Records an access for a specific model_id.
         """
         day_key, week_key, month_key, year_key = self._get_period_keys()
         async with self.lock:
-            self.local_buffer[day_key][model_id] += 1
-            self.local_buffer[week_key][model_id] += 1
-            self.local_buffer[month_key][model_id] += 1
-            self.local_buffer[year_key][model_id] += 1
-            self.local_buffer["total"][model_id] += 1
-    async def stats(self) -> Dict[str, Dict[str, int]]:
         """
         Returns statistics for all models from the local buffer.
         """
         async with self.lock:
             return {
-                period: dict(models) for period, models in self.local_buffer.items()
             }
     async def _sync_to_redis(self):
@@ -62,12 +89,22 @@ class Analytics:
         """
         async with self.lock:
             pipeline = self.redis_client.pipeline()
-            for period, models in self.local_buffer.items():
                 for model_id, count in models.items():
-                    redis_key = f"analytics:{period}"
                     pipeline.hincrby(redis_key, model_id, count)
             await pipeline.execute()
-            self.local_buffer.clear()  # Clear the buffer after sync
     async def _start_sync_task(self):
         """

         """
         self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
         self.redis_client = redis.Redis(connection_pool=self.pool)
+        self.local_buffer = {
+            "access": defaultdict(
+                lambda: defaultdict(int)
+            ),  # {period: {model_id: access_count}}
+            "tokens": defaultdict(
+                lambda: defaultdict(int)
+            ),  # {period: {model_id: tokens_count}}
+        }
         self.sync_interval = sync_interval
         self.lock = asyncio.Lock()  # Async lock for thread-safe updates
         asyncio.create_task(self._start_sync_task())
         year_key = now.strftime("%Y")
         return day_key, week_key, month_key, year_key
+    async def access(self, model_id: str, tokens: int):
         """
+        Records an access and token usage for a specific model_id.
+        Parameters:
+        - model_id: The ID of the model being accessed.
+        - tokens: Number of tokens used in this access.
         """
         day_key, week_key, month_key, year_key = self._get_period_keys()
         async with self.lock:
+            # Increment access count
+            self.local_buffer["access"][day_key][model_id] += 1
+            self.local_buffer["access"][week_key][model_id] += 1
+            self.local_buffer["access"][month_key][model_id] += 1
+            self.local_buffer["access"][year_key][model_id] += 1
+            self.local_buffer["access"]["total"][model_id] += 1
+            # Increment token count
+            self.local_buffer["tokens"][day_key][model_id] += tokens
+            self.local_buffer["tokens"][week_key][model_id] += tokens
+            self.local_buffer["tokens"][month_key][model_id] += tokens
+            self.local_buffer["tokens"][year_key][model_id] += tokens
+            self.local_buffer["tokens"]["total"][model_id] += tokens
+    async def stats(self) -> Dict[str, Dict[str, Dict[str, int]]]:
         """
         Returns statistics for all models from the local buffer.
+        Returns:
+        - A dictionary with access counts and token usage for each period.
         """
         async with self.lock:
             return {
+                "access": {
+                    period: dict(models)
+                    for period, models in self.local_buffer["access"].items()
+                },
+                "tokens": {
+                    period: dict(models)
+                    for period, models in self.local_buffer["tokens"].items()
+                },
             }
     async def _sync_to_redis(self):
         """
         async with self.lock:
             pipeline = self.redis_client.pipeline()
+            # Sync access counts
+            for period, models in self.local_buffer["access"].items():
                 for model_id, count in models.items():
+                    redis_key = f"analytics:access:{period}"
                     pipeline.hincrby(redis_key, model_id, count)
+            # Sync token counts
+            for period, models in self.local_buffer["tokens"].items():
+                for model_id, count in models.items():
+                    redis_key = f"analytics:tokens:{period}"
+                    pipeline.hincrby(redis_key, model_id, count)
             await pipeline.execute()
+            self.local_buffer["access"].clear()  # Clear access buffer after sync
+            self.local_buffer["tokens"].clear()  # Clear tokens buffer after sync
     async def _start_sync_task(self):
         """

lightweight_embeddings/router.py CHANGED Viewed

@@ -124,8 +124,8 @@ class RankResponse(BaseModel):
     cosine_similarities: List[List[float]]
-class StatsResponse(BaseModel):
-    """Analytics stats response model"""
     total: Dict[str, int]
     daily: Dict[str, int]
@@ -134,6 +134,13 @@ class StatsResponse(BaseModel):
     yearly: Dict[str, int]
 service_config = ModelConfig()
 embeddings_service = EmbeddingsService(config=service_config)
@@ -235,20 +242,31 @@ async def rank_candidates(request: RankRequest, background_tasks: BackgroundTask
 @router.get("/stats", response_model=StatsResponse, tags=["stats"])
 async def get_stats():
-    """Get usage statistics for all models"""
     try:
-        stats = await analytics.stats()
         return {
-            "total": stats.get("total", {}),
-            "daily": stats.get(datetime.utcnow().strftime("%Y-%m-%d"), {}),
-            "weekly": stats.get(
-                f"{datetime.utcnow().year}-W{datetime.utcnow().strftime('%U')}", {}
-            ),
-            "monthly": stats.get(datetime.utcnow().strftime("%Y-%m"), {}),
-            "yearly": stats.get(datetime.utcnow().strftime("%Y"), {}),
         }
     except Exception as e:
         msg = f"Failed to fetch analytics stats: {str(e)}"
         logger.error(msg)

     cosine_similarities: List[List[float]]
+class StatsBucket(BaseModel):
+    """Helper model for daily/weekly/monthly/yearly stats"""
     total: Dict[str, int]
     daily: Dict[str, int]
     yearly: Dict[str, int]
+class StatsResponse(BaseModel):
+    """Analytics stats response model, including both access and token counts"""
+    access: StatsBucket
+    tokens: StatsBucket
 service_config = ModelConfig()
 embeddings_service = EmbeddingsService(config=service_config)
 @router.get("/stats", response_model=StatsResponse, tags=["stats"])
 async def get_stats():
+    """Get usage statistics for all models, including access and tokens."""
     try:
+        day_key = datetime.utcnow().strftime("%Y-%m-%d")
+        week_key = f"{datetime.utcnow().year}-W{datetime.utcnow().strftime('%U')}"
+        month_key = datetime.utcnow().strftime("%Y-%m")
+        year_key = datetime.utcnow().strftime("%Y")
+        stats_data = await analytics.stats()  # { "access": {...}, "tokens": {...} }
         return {
+            "access": {
+                "total": stats_data["access"].get("total", {}),
+                "daily": stats_data["access"].get(day_key, {}),
+                "weekly": stats_data["access"].get(week_key, {}),
+                "monthly": stats_data["access"].get(month_key, {}),
+                "yearly": stats_data["access"].get(year_key, {}),
+            },
+            "tokens": {
+                "total": stats_data["tokens"].get("total", {}),
+                "daily": stats_data["tokens"].get(day_key, {}),
+                "weekly": stats_data["tokens"].get(week_key, {}),
+                "monthly": stats_data["tokens"].get(month_key, {}),
+                "yearly": stats_data["tokens"].get(year_key, {}),
+            },
         }
     except Exception as e:
         msg = f"Failed to fetch analytics stats: {str(e)}"
         logger.error(msg)

lightweight_embeddings/service.py CHANGED Viewed

@@ -28,7 +28,7 @@ from __future__ import annotations
 import logging
 from enum import Enum
-from typing import List, Union, Literal, Dict, Optional, NamedTuple
 from dataclasses import dataclass
 from pathlib import Path
 from io import BytesIO
@@ -191,7 +191,9 @@ class EmbeddingsService:
                 else:
                     # Fallback: standard HF loading
                     self.text_models[t_model_type] = SentenceTransformer(
-                        info.model_id, device=self.device, trust_remote_code=True,
                     )
             for i_model_type in ImageModelType:
@@ -347,35 +349,47 @@ class EmbeddingsService:
         candidates: List[str],
         modality: Literal["text", "image"],
         batch_size: Optional[int] = None,
-    ) -> Dict[str, List[List[float]]]:
         """
         Rank candidates (always text) against the queries, which may be text or image.
-        Returns dict of { probabilities, cosine_similarities }.
         """
         # 1) Generate embeddings for queries
         query_embeds = await self.generate_embeddings(queries, modality, batch_size)
         # 2) Generate embeddings for text candidates
         candidate_embeds = await self.generate_embeddings(candidates, "text")
-        # 3) Compute cosine sim
         sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
         # 4) Apply logit scale + softmax
         scaled = np.exp(self.config.logit_scale) * sim_matrix
         probs = self.softmax(scaled)
         return {
             "probabilities": probs.tolist(),
             "cosine_similarities": sim_matrix.tolist(),
         }
     def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
-      """
-      Estimate token count using the model's tokenizer.
-      """
-      texts = self._validate_text_input(input_data)
-      model = self.text_models[self.config.text_model_type]
-      tokenized = model.tokenize(texts)
-      return sum(len(ids) for ids in tokenized['input_ids'])
     @staticmethod
     def softmax(scores: np.ndarray) -> np.ndarray:

 import logging
 from enum import Enum
+from typing import List, Union, Literal, Dict, Optional, NamedTuple, Any
 from dataclasses import dataclass
 from pathlib import Path
 from io import BytesIO
                 else:
                     # Fallback: standard HF loading
                     self.text_models[t_model_type] = SentenceTransformer(
+                        info.model_id,
+                        device=self.device,
+                        trust_remote_code=True,
                     )
             for i_model_type in ImageModelType:
         candidates: List[str],
         modality: Literal["text", "image"],
         batch_size: Optional[int] = None,
+    ) -> Dict[str, Any]:
         """
         Rank candidates (always text) against the queries, which may be text or image.
+        Returns dict of { probabilities, cosine_similarities, usage }.
         """
         # 1) Generate embeddings for queries
         query_embeds = await self.generate_embeddings(queries, modality, batch_size)
         # 2) Generate embeddings for text candidates
         candidate_embeds = await self.generate_embeddings(candidates, "text")
+        # 3) Compute cosine similarity
         sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
         # 4) Apply logit scale + softmax
         scaled = np.exp(self.config.logit_scale) * sim_matrix
         probs = self.softmax(scaled)
+        # 5) Compute usage (similar to embeddings)
+        query_tokens = self.estimate_tokens(queries) if modality == "text" else 0
+        candidate_tokens = self.estimate_tokens(candidates) if modality == "text" else 0
+        total_tokens = query_tokens + candidate_tokens
+        usage = {
+            "prompt_tokens": total_tokens,
+            "total_tokens": total_tokens,
+        }
         return {
             "probabilities": probs.tolist(),
             "cosine_similarities": sim_matrix.tolist(),
+            "usage": usage,
         }
     def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
+        """
+        Estimate token count using the model's tokenizer.
+        """
+        texts = self._validate_text_input(input_data)
+        model = self.text_models[self.config.text_model_type]
+        tokenized = model.tokenize(texts)
+        return sum(len(ids) for ids in tokenized["input_ids"])
     @staticmethod
     def softmax(scores: np.ndarray) -> np.ndarray: