Spaces:

lamhieu
/

lightweight-embeddings

Running

lamhieu commited on Jan 4

Commit

aaf7e4c

1 Parent(s): c54a701

chore: support embeddings caching

Files changed (2) hide show

lightweight_embeddings/analytics.py CHANGED Viewed

@@ -19,8 +19,14 @@ class Analytics:
         - redis_url: Redis connection URL (e.g., 'redis://localhost:6379/0')
         - sync_interval: Interval in seconds for syncing with Redis.
         """
-        self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
-        self.redis_client = redis.Redis(connection_pool=self.pool)
         self.local_buffer = {
             "access": defaultdict(
                 lambda: defaultdict(int)
@@ -122,5 +128,4 @@ class Analytics:
                 await self._sync_to_redis()
             except redis.exceptions.ConnectionError as e:
                 logger.error("Redis connection error: %s", e)
-                self.pool.disconnect()  # force reconnect on next request
                 await asyncio.sleep(5)

         - redis_url: Redis connection URL (e.g., 'redis://localhost:6379/0')
         - sync_interval: Interval in seconds for syncing with Redis.
         """
+        self.redis_client = redis.from_url(
+            redis_url,
+            decode_responses=True,
+            health_check_interval=10,
+            socket_connect_timeout=5,
+            retry_on_timeout=True,
+            socket_keepalive=True,
+        )
         self.local_buffer = {
             "access": defaultdict(
                 lambda: defaultdict(int)
                 await self._sync_to_redis()
             except redis.exceptions.ConnectionError as e:
                 logger.error("Redis connection error: %s", e)
                 await asyncio.sleep(5)

lightweight_embeddings/service.py CHANGED Viewed

@@ -32,6 +32,8 @@ from typing import List, Union, Literal, Dict, Optional, NamedTuple, Any
 from dataclasses import dataclass
 from pathlib import Path
 from io import BytesIO
 import requests
 import numpy as np
@@ -153,6 +155,8 @@ class EmbeddingsService:
     """
     def __init__(self, config: Optional[ModelConfig] = None):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()
@@ -262,11 +266,19 @@ class EmbeddingsService:
     def _generate_text_embeddings(self, texts: List[str]) -> np.ndarray:
         """
-        Generate text embeddings using the currently configured text model.
         """
         try:
             model = self.text_models[self.config.text_model_type]
-            embeddings = model.encode(texts)  # shape: (num_items, emb_dim)
             return embeddings
         except Exception as e:
             raise RuntimeError(

 from dataclasses import dataclass
 from pathlib import Path
 from io import BytesIO
+from hashlib import md5
+from cachetools import LRUCache
 import requests
 import numpy as np
     """
     def __init__(self, config: Optional[ModelConfig] = None):
+        self.lru_cache = LRUCache(maxsize=50_000)  # Approximate for ~500MB usage
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()
     def _generate_text_embeddings(self, texts: List[str]) -> np.ndarray:
         """
+        Generate text embeddings using the currently configured text model
+        with an LRU cache for single-text requests.
         """
         try:
+            if len(texts) == 1:
+                key = md5(texts[0].encode("utf-8")).hexdigest()
+                if key in self.lru_cache:
+                    return self.lru_cache[key]
             model = self.text_models[self.config.text_model_type]
+            embeddings = model.encode(texts)
+            if len(texts) == 1:
+                self.lru_cache[key] = embeddings
             return embeddings
         except Exception as e:
             raise RuntimeError(