lamhieu commited on
Commit
073aa83
Β·
1 Parent(s): ae67e3d

chore: update something

Browse files
lightweight_embeddings/__init__.py CHANGED
@@ -72,10 +72,16 @@ The **Lightweight Embeddings API** is a fast, free, and multilingual service des
72
  - **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
73
  - **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
74
 
 
 
 
 
 
75
  ### πŸ”— Links
76
  - [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
77
  """
78
 
 
79
  # Initialize FastAPI application
80
  app = FastAPI(
81
  title="Lightweight Embeddings API",
@@ -159,7 +165,7 @@ def create_main_interface():
159
  ]
160
 
161
  with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
162
- # ...existing code...
163
  with gr.Row():
164
  with gr.Column():
165
  gr.Markdown("### πŸ”¬ Try the Embeddings Playground")
 
72
  - **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
73
  - **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
74
 
75
+ ### πŸ”’ Privacy and Data Transparency
76
+
77
+ - **Minimal Data Collection**: This API respects your privacy by design. It does not store or log any input data (text or images) provided by users. Only anonymous usage counts (IDs for statistical purposes) are recorded to monitor and improve the service.
78
+ - **Open Source and Transparent**: The API is fully open source, ensuring transparency and allowing users to inspect the code for themselves. This guarantees trust and confidence in how the service handles your data.
79
+
80
  ### πŸ”— Links
81
  - [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
82
  """
83
 
84
+
85
  # Initialize FastAPI application
86
  app = FastAPI(
87
  title="Lightweight Embeddings API",
 
165
  ]
166
 
167
  with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
168
+ gr.Markdown(APP_DESCRIPTION)
169
  with gr.Row():
170
  with gr.Column():
171
  gr.Markdown("### πŸ”¬ Try the Embeddings Playground")
lightweight_embeddings/analytics.py CHANGED
@@ -16,9 +16,14 @@ class Analytics:
16
  """
17
  self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
18
  self.redis_client = redis.Redis(connection_pool=self.pool)
19
- self.local_buffer = defaultdict(
20
- lambda: defaultdict(int)
21
- ) # {period: {model_id: count}}
 
 
 
 
 
22
  self.sync_interval = sync_interval
23
  self.lock = asyncio.Lock() # Async lock for thread-safe updates
24
  asyncio.create_task(self._start_sync_task())
@@ -34,26 +39,48 @@ class Analytics:
34
  year_key = now.strftime("%Y")
35
  return day_key, week_key, month_key, year_key
36
 
37
- async def access(self, model_id: str):
38
  """
39
- Records an access for a specific model_id.
 
 
 
 
40
  """
41
  day_key, week_key, month_key, year_key = self._get_period_keys()
42
 
43
  async with self.lock:
44
- self.local_buffer[day_key][model_id] += 1
45
- self.local_buffer[week_key][model_id] += 1
46
- self.local_buffer[month_key][model_id] += 1
47
- self.local_buffer[year_key][model_id] += 1
48
- self.local_buffer["total"][model_id] += 1
 
 
 
 
 
 
 
 
49
 
50
- async def stats(self) -> Dict[str, Dict[str, int]]:
51
  """
52
  Returns statistics for all models from the local buffer.
 
 
 
53
  """
54
  async with self.lock:
55
  return {
56
- period: dict(models) for period, models in self.local_buffer.items()
 
 
 
 
 
 
 
57
  }
58
 
59
  async def _sync_to_redis(self):
@@ -62,12 +89,22 @@ class Analytics:
62
  """
63
  async with self.lock:
64
  pipeline = self.redis_client.pipeline()
65
- for period, models in self.local_buffer.items():
 
 
66
  for model_id, count in models.items():
67
- redis_key = f"analytics:{period}"
68
  pipeline.hincrby(redis_key, model_id, count)
 
 
 
 
 
 
 
69
  await pipeline.execute()
70
- self.local_buffer.clear() # Clear the buffer after sync
 
71
 
72
  async def _start_sync_task(self):
73
  """
 
16
  """
17
  self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
18
  self.redis_client = redis.Redis(connection_pool=self.pool)
19
+ self.local_buffer = {
20
+ "access": defaultdict(
21
+ lambda: defaultdict(int)
22
+ ), # {period: {model_id: access_count}}
23
+ "tokens": defaultdict(
24
+ lambda: defaultdict(int)
25
+ ), # {period: {model_id: tokens_count}}
26
+ }
27
  self.sync_interval = sync_interval
28
  self.lock = asyncio.Lock() # Async lock for thread-safe updates
29
  asyncio.create_task(self._start_sync_task())
 
39
  year_key = now.strftime("%Y")
40
  return day_key, week_key, month_key, year_key
41
 
42
+ async def access(self, model_id: str, tokens: int):
43
  """
44
+ Records an access and token usage for a specific model_id.
45
+
46
+ Parameters:
47
+ - model_id: The ID of the model being accessed.
48
+ - tokens: Number of tokens used in this access.
49
  """
50
  day_key, week_key, month_key, year_key = self._get_period_keys()
51
 
52
  async with self.lock:
53
+ # Increment access count
54
+ self.local_buffer["access"][day_key][model_id] += 1
55
+ self.local_buffer["access"][week_key][model_id] += 1
56
+ self.local_buffer["access"][month_key][model_id] += 1
57
+ self.local_buffer["access"][year_key][model_id] += 1
58
+ self.local_buffer["access"]["total"][model_id] += 1
59
+
60
+ # Increment token count
61
+ self.local_buffer["tokens"][day_key][model_id] += tokens
62
+ self.local_buffer["tokens"][week_key][model_id] += tokens
63
+ self.local_buffer["tokens"][month_key][model_id] += tokens
64
+ self.local_buffer["tokens"][year_key][model_id] += tokens
65
+ self.local_buffer["tokens"]["total"][model_id] += tokens
66
 
67
+ async def stats(self) -> Dict[str, Dict[str, Dict[str, int]]]:
68
  """
69
  Returns statistics for all models from the local buffer.
70
+
71
+ Returns:
72
+ - A dictionary with access counts and token usage for each period.
73
  """
74
  async with self.lock:
75
  return {
76
+ "access": {
77
+ period: dict(models)
78
+ for period, models in self.local_buffer["access"].items()
79
+ },
80
+ "tokens": {
81
+ period: dict(models)
82
+ for period, models in self.local_buffer["tokens"].items()
83
+ },
84
  }
85
 
86
  async def _sync_to_redis(self):
 
89
  """
90
  async with self.lock:
91
  pipeline = self.redis_client.pipeline()
92
+
93
+ # Sync access counts
94
+ for period, models in self.local_buffer["access"].items():
95
  for model_id, count in models.items():
96
+ redis_key = f"analytics:access:{period}"
97
  pipeline.hincrby(redis_key, model_id, count)
98
+
99
+ # Sync token counts
100
+ for period, models in self.local_buffer["tokens"].items():
101
+ for model_id, count in models.items():
102
+ redis_key = f"analytics:tokens:{period}"
103
+ pipeline.hincrby(redis_key, model_id, count)
104
+
105
  await pipeline.execute()
106
+ self.local_buffer["access"].clear() # Clear access buffer after sync
107
+ self.local_buffer["tokens"].clear() # Clear tokens buffer after sync
108
 
109
  async def _start_sync_task(self):
110
  """
lightweight_embeddings/router.py CHANGED
@@ -124,8 +124,8 @@ class RankResponse(BaseModel):
124
  cosine_similarities: List[List[float]]
125
 
126
 
127
- class StatsResponse(BaseModel):
128
- """Analytics stats response model"""
129
 
130
  total: Dict[str, int]
131
  daily: Dict[str, int]
@@ -134,6 +134,13 @@ class StatsResponse(BaseModel):
134
  yearly: Dict[str, int]
135
 
136
 
 
 
 
 
 
 
 
137
  service_config = ModelConfig()
138
  embeddings_service = EmbeddingsService(config=service_config)
139
 
@@ -235,20 +242,31 @@ async def rank_candidates(request: RankRequest, background_tasks: BackgroundTask
235
 
236
  @router.get("/stats", response_model=StatsResponse, tags=["stats"])
237
  async def get_stats():
238
- """Get usage statistics for all models"""
239
  try:
240
- stats = await analytics.stats()
 
 
 
 
 
241
 
242
  return {
243
- "total": stats.get("total", {}),
244
- "daily": stats.get(datetime.utcnow().strftime("%Y-%m-%d"), {}),
245
- "weekly": stats.get(
246
- f"{datetime.utcnow().year}-W{datetime.utcnow().strftime('%U')}", {}
247
- ),
248
- "monthly": stats.get(datetime.utcnow().strftime("%Y-%m"), {}),
249
- "yearly": stats.get(datetime.utcnow().strftime("%Y"), {}),
 
 
 
 
 
 
 
250
  }
251
-
252
  except Exception as e:
253
  msg = f"Failed to fetch analytics stats: {str(e)}"
254
  logger.error(msg)
 
124
  cosine_similarities: List[List[float]]
125
 
126
 
127
+ class StatsBucket(BaseModel):
128
+ """Helper model for daily/weekly/monthly/yearly stats"""
129
 
130
  total: Dict[str, int]
131
  daily: Dict[str, int]
 
134
  yearly: Dict[str, int]
135
 
136
 
137
+ class StatsResponse(BaseModel):
138
+ """Analytics stats response model, including both access and token counts"""
139
+
140
+ access: StatsBucket
141
+ tokens: StatsBucket
142
+
143
+
144
  service_config = ModelConfig()
145
  embeddings_service = EmbeddingsService(config=service_config)
146
 
 
242
 
243
  @router.get("/stats", response_model=StatsResponse, tags=["stats"])
244
  async def get_stats():
245
+ """Get usage statistics for all models, including access and tokens."""
246
  try:
247
+ day_key = datetime.utcnow().strftime("%Y-%m-%d")
248
+ week_key = f"{datetime.utcnow().year}-W{datetime.utcnow().strftime('%U')}"
249
+ month_key = datetime.utcnow().strftime("%Y-%m")
250
+ year_key = datetime.utcnow().strftime("%Y")
251
+
252
+ stats_data = await analytics.stats() # { "access": {...}, "tokens": {...} }
253
 
254
  return {
255
+ "access": {
256
+ "total": stats_data["access"].get("total", {}),
257
+ "daily": stats_data["access"].get(day_key, {}),
258
+ "weekly": stats_data["access"].get(week_key, {}),
259
+ "monthly": stats_data["access"].get(month_key, {}),
260
+ "yearly": stats_data["access"].get(year_key, {}),
261
+ },
262
+ "tokens": {
263
+ "total": stats_data["tokens"].get("total", {}),
264
+ "daily": stats_data["tokens"].get(day_key, {}),
265
+ "weekly": stats_data["tokens"].get(week_key, {}),
266
+ "monthly": stats_data["tokens"].get(month_key, {}),
267
+ "yearly": stats_data["tokens"].get(year_key, {}),
268
+ },
269
  }
 
270
  except Exception as e:
271
  msg = f"Failed to fetch analytics stats: {str(e)}"
272
  logger.error(msg)
lightweight_embeddings/service.py CHANGED
@@ -28,7 +28,7 @@ from __future__ import annotations
28
 
29
  import logging
30
  from enum import Enum
31
- from typing import List, Union, Literal, Dict, Optional, NamedTuple
32
  from dataclasses import dataclass
33
  from pathlib import Path
34
  from io import BytesIO
@@ -191,7 +191,9 @@ class EmbeddingsService:
191
  else:
192
  # Fallback: standard HF loading
193
  self.text_models[t_model_type] = SentenceTransformer(
194
- info.model_id, device=self.device, trust_remote_code=True,
 
 
195
  )
196
 
197
  for i_model_type in ImageModelType:
@@ -347,35 +349,47 @@ class EmbeddingsService:
347
  candidates: List[str],
348
  modality: Literal["text", "image"],
349
  batch_size: Optional[int] = None,
350
- ) -> Dict[str, List[List[float]]]:
351
  """
352
  Rank candidates (always text) against the queries, which may be text or image.
353
- Returns dict of { probabilities, cosine_similarities }.
354
  """
 
355
  # 1) Generate embeddings for queries
356
  query_embeds = await self.generate_embeddings(queries, modality, batch_size)
357
  # 2) Generate embeddings for text candidates
358
  candidate_embeds = await self.generate_embeddings(candidates, "text")
359
 
360
- # 3) Compute cosine sim
361
  sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
 
362
  # 4) Apply logit scale + softmax
363
  scaled = np.exp(self.config.logit_scale) * sim_matrix
364
  probs = self.softmax(scaled)
365
 
 
 
 
 
 
 
 
 
 
366
  return {
367
  "probabilities": probs.tolist(),
368
  "cosine_similarities": sim_matrix.tolist(),
 
369
  }
370
 
371
  def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
372
- """
373
- Estimate token count using the model's tokenizer.
374
- """
375
- texts = self._validate_text_input(input_data)
376
- model = self.text_models[self.config.text_model_type]
377
- tokenized = model.tokenize(texts)
378
- return sum(len(ids) for ids in tokenized['input_ids'])
379
 
380
  @staticmethod
381
  def softmax(scores: np.ndarray) -> np.ndarray:
 
28
 
29
  import logging
30
  from enum import Enum
31
+ from typing import List, Union, Literal, Dict, Optional, NamedTuple, Any
32
  from dataclasses import dataclass
33
  from pathlib import Path
34
  from io import BytesIO
 
191
  else:
192
  # Fallback: standard HF loading
193
  self.text_models[t_model_type] = SentenceTransformer(
194
+ info.model_id,
195
+ device=self.device,
196
+ trust_remote_code=True,
197
  )
198
 
199
  for i_model_type in ImageModelType:
 
349
  candidates: List[str],
350
  modality: Literal["text", "image"],
351
  batch_size: Optional[int] = None,
352
+ ) -> Dict[str, Any]:
353
  """
354
  Rank candidates (always text) against the queries, which may be text or image.
355
+ Returns dict of { probabilities, cosine_similarities, usage }.
356
  """
357
+
358
  # 1) Generate embeddings for queries
359
  query_embeds = await self.generate_embeddings(queries, modality, batch_size)
360
  # 2) Generate embeddings for text candidates
361
  candidate_embeds = await self.generate_embeddings(candidates, "text")
362
 
363
+ # 3) Compute cosine similarity
364
  sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
365
+
366
  # 4) Apply logit scale + softmax
367
  scaled = np.exp(self.config.logit_scale) * sim_matrix
368
  probs = self.softmax(scaled)
369
 
370
+ # 5) Compute usage (similar to embeddings)
371
+ query_tokens = self.estimate_tokens(queries) if modality == "text" else 0
372
+ candidate_tokens = self.estimate_tokens(candidates) if modality == "text" else 0
373
+ total_tokens = query_tokens + candidate_tokens
374
+ usage = {
375
+ "prompt_tokens": total_tokens,
376
+ "total_tokens": total_tokens,
377
+ }
378
+
379
  return {
380
  "probabilities": probs.tolist(),
381
  "cosine_similarities": sim_matrix.tolist(),
382
+ "usage": usage,
383
  }
384
 
385
  def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
386
+ """
387
+ Estimate token count using the model's tokenizer.
388
+ """
389
+ texts = self._validate_text_input(input_data)
390
+ model = self.text_models[self.config.text_model_type]
391
+ tokenized = model.tokenize(texts)
392
+ return sum(len(ids) for ids in tokenized["input_ids"])
393
 
394
  @staticmethod
395
  def softmax(scores: np.ndarray) -> np.ndarray: