Spaces:
Running
Running
chore: update something
Browse files
lightweight_embeddings/__init__.py
CHANGED
@@ -72,10 +72,16 @@ The **Lightweight Embeddings API** is a fast, free, and multilingual service des
|
|
72 |
- **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
|
73 |
- **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
|
74 |
|
|
|
|
|
|
|
|
|
|
|
75 |
### π Links
|
76 |
- [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
|
77 |
"""
|
78 |
|
|
|
79 |
# Initialize FastAPI application
|
80 |
app = FastAPI(
|
81 |
title="Lightweight Embeddings API",
|
@@ -159,7 +165,7 @@ def create_main_interface():
|
|
159 |
]
|
160 |
|
161 |
with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
|
162 |
-
|
163 |
with gr.Row():
|
164 |
with gr.Column():
|
165 |
gr.Markdown("### π¬ Try the Embeddings Playground")
|
|
|
72 |
- **Optimized and Flexible**: Built for speed with lightweight transformer models, efficient backends for rapid inference on low-resource systems, and support for diverse use cases with models.
|
73 |
- **Production-Ready with Ease of Use**: Deploy effortlessly using Docker for a hassle-free setup, and experiment interactively through a **Gradio-powered playground** with comprehensive REST API documentation.
|
74 |
|
75 |
+
### π Privacy and Data Transparency
|
76 |
+
|
77 |
+
- **Minimal Data Collection**: This API respects your privacy by design. It does not store or log any input data (text or images) provided by users. Only anonymous usage counts (IDs for statistical purposes) are recorded to monitor and improve the service.
|
78 |
+
- **Open Source and Transparent**: The API is fully open source, ensuring transparency and allowing users to inspect the code for themselves. This guarantees trust and confidence in how the service handles your data.
|
79 |
+
|
80 |
### π Links
|
81 |
- [Documentation]({__metadata__["docs"]}) | [GitHub]({__metadata__["github"]}) | [Playground]({__metadata__["spaces"]})
|
82 |
"""
|
83 |
|
84 |
+
|
85 |
# Initialize FastAPI application
|
86 |
app = FastAPI(
|
87 |
title="Lightweight Embeddings API",
|
|
|
165 |
]
|
166 |
|
167 |
with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
|
168 |
+
gr.Markdown(APP_DESCRIPTION)
|
169 |
with gr.Row():
|
170 |
with gr.Column():
|
171 |
gr.Markdown("### π¬ Try the Embeddings Playground")
|
lightweight_embeddings/analytics.py
CHANGED
@@ -16,9 +16,14 @@ class Analytics:
|
|
16 |
"""
|
17 |
self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
|
18 |
self.redis_client = redis.Redis(connection_pool=self.pool)
|
19 |
-
self.local_buffer =
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
22 |
self.sync_interval = sync_interval
|
23 |
self.lock = asyncio.Lock() # Async lock for thread-safe updates
|
24 |
asyncio.create_task(self._start_sync_task())
|
@@ -34,26 +39,48 @@ class Analytics:
|
|
34 |
year_key = now.strftime("%Y")
|
35 |
return day_key, week_key, month_key, year_key
|
36 |
|
37 |
-
async def access(self, model_id: str):
|
38 |
"""
|
39 |
-
Records an access for a specific model_id.
|
|
|
|
|
|
|
|
|
40 |
"""
|
41 |
day_key, week_key, month_key, year_key = self._get_period_keys()
|
42 |
|
43 |
async with self.lock:
|
44 |
-
|
45 |
-
self.local_buffer[
|
46 |
-
self.local_buffer[
|
47 |
-
self.local_buffer[
|
48 |
-
self.local_buffer["
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
async def stats(self) -> Dict[str, Dict[str, int]]:
|
51 |
"""
|
52 |
Returns statistics for all models from the local buffer.
|
|
|
|
|
|
|
53 |
"""
|
54 |
async with self.lock:
|
55 |
return {
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
}
|
58 |
|
59 |
async def _sync_to_redis(self):
|
@@ -62,12 +89,22 @@ class Analytics:
|
|
62 |
"""
|
63 |
async with self.lock:
|
64 |
pipeline = self.redis_client.pipeline()
|
65 |
-
|
|
|
|
|
66 |
for model_id, count in models.items():
|
67 |
-
redis_key = f"analytics:{period}"
|
68 |
pipeline.hincrby(redis_key, model_id, count)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
await pipeline.execute()
|
70 |
-
self.local_buffer.clear() # Clear
|
|
|
71 |
|
72 |
async def _start_sync_task(self):
|
73 |
"""
|
|
|
16 |
"""
|
17 |
self.pool = redis.ConnectionPool.from_url(redis_url, decode_responses=True)
|
18 |
self.redis_client = redis.Redis(connection_pool=self.pool)
|
19 |
+
self.local_buffer = {
|
20 |
+
"access": defaultdict(
|
21 |
+
lambda: defaultdict(int)
|
22 |
+
), # {period: {model_id: access_count}}
|
23 |
+
"tokens": defaultdict(
|
24 |
+
lambda: defaultdict(int)
|
25 |
+
), # {period: {model_id: tokens_count}}
|
26 |
+
}
|
27 |
self.sync_interval = sync_interval
|
28 |
self.lock = asyncio.Lock() # Async lock for thread-safe updates
|
29 |
asyncio.create_task(self._start_sync_task())
|
|
|
39 |
year_key = now.strftime("%Y")
|
40 |
return day_key, week_key, month_key, year_key
|
41 |
|
42 |
+
async def access(self, model_id: str, tokens: int):
|
43 |
"""
|
44 |
+
Records an access and token usage for a specific model_id.
|
45 |
+
|
46 |
+
Parameters:
|
47 |
+
- model_id: The ID of the model being accessed.
|
48 |
+
- tokens: Number of tokens used in this access.
|
49 |
"""
|
50 |
day_key, week_key, month_key, year_key = self._get_period_keys()
|
51 |
|
52 |
async with self.lock:
|
53 |
+
# Increment access count
|
54 |
+
self.local_buffer["access"][day_key][model_id] += 1
|
55 |
+
self.local_buffer["access"][week_key][model_id] += 1
|
56 |
+
self.local_buffer["access"][month_key][model_id] += 1
|
57 |
+
self.local_buffer["access"][year_key][model_id] += 1
|
58 |
+
self.local_buffer["access"]["total"][model_id] += 1
|
59 |
+
|
60 |
+
# Increment token count
|
61 |
+
self.local_buffer["tokens"][day_key][model_id] += tokens
|
62 |
+
self.local_buffer["tokens"][week_key][model_id] += tokens
|
63 |
+
self.local_buffer["tokens"][month_key][model_id] += tokens
|
64 |
+
self.local_buffer["tokens"][year_key][model_id] += tokens
|
65 |
+
self.local_buffer["tokens"]["total"][model_id] += tokens
|
66 |
|
67 |
+
async def stats(self) -> Dict[str, Dict[str, Dict[str, int]]]:
|
68 |
"""
|
69 |
Returns statistics for all models from the local buffer.
|
70 |
+
|
71 |
+
Returns:
|
72 |
+
- A dictionary with access counts and token usage for each period.
|
73 |
"""
|
74 |
async with self.lock:
|
75 |
return {
|
76 |
+
"access": {
|
77 |
+
period: dict(models)
|
78 |
+
for period, models in self.local_buffer["access"].items()
|
79 |
+
},
|
80 |
+
"tokens": {
|
81 |
+
period: dict(models)
|
82 |
+
for period, models in self.local_buffer["tokens"].items()
|
83 |
+
},
|
84 |
}
|
85 |
|
86 |
async def _sync_to_redis(self):
|
|
|
89 |
"""
|
90 |
async with self.lock:
|
91 |
pipeline = self.redis_client.pipeline()
|
92 |
+
|
93 |
+
# Sync access counts
|
94 |
+
for period, models in self.local_buffer["access"].items():
|
95 |
for model_id, count in models.items():
|
96 |
+
redis_key = f"analytics:access:{period}"
|
97 |
pipeline.hincrby(redis_key, model_id, count)
|
98 |
+
|
99 |
+
# Sync token counts
|
100 |
+
for period, models in self.local_buffer["tokens"].items():
|
101 |
+
for model_id, count in models.items():
|
102 |
+
redis_key = f"analytics:tokens:{period}"
|
103 |
+
pipeline.hincrby(redis_key, model_id, count)
|
104 |
+
|
105 |
await pipeline.execute()
|
106 |
+
self.local_buffer["access"].clear() # Clear access buffer after sync
|
107 |
+
self.local_buffer["tokens"].clear() # Clear tokens buffer after sync
|
108 |
|
109 |
async def _start_sync_task(self):
|
110 |
"""
|
lightweight_embeddings/router.py
CHANGED
@@ -124,8 +124,8 @@ class RankResponse(BaseModel):
|
|
124 |
cosine_similarities: List[List[float]]
|
125 |
|
126 |
|
127 |
-
class
|
128 |
-
"""
|
129 |
|
130 |
total: Dict[str, int]
|
131 |
daily: Dict[str, int]
|
@@ -134,6 +134,13 @@ class StatsResponse(BaseModel):
|
|
134 |
yearly: Dict[str, int]
|
135 |
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
service_config = ModelConfig()
|
138 |
embeddings_service = EmbeddingsService(config=service_config)
|
139 |
|
@@ -235,20 +242,31 @@ async def rank_candidates(request: RankRequest, background_tasks: BackgroundTask
|
|
235 |
|
236 |
@router.get("/stats", response_model=StatsResponse, tags=["stats"])
|
237 |
async def get_stats():
|
238 |
-
"""Get usage statistics for all models"""
|
239 |
try:
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
return {
|
243 |
-
"
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
}
|
251 |
-
|
252 |
except Exception as e:
|
253 |
msg = f"Failed to fetch analytics stats: {str(e)}"
|
254 |
logger.error(msg)
|
|
|
124 |
cosine_similarities: List[List[float]]
|
125 |
|
126 |
|
127 |
+
class StatsBucket(BaseModel):
|
128 |
+
"""Helper model for daily/weekly/monthly/yearly stats"""
|
129 |
|
130 |
total: Dict[str, int]
|
131 |
daily: Dict[str, int]
|
|
|
134 |
yearly: Dict[str, int]
|
135 |
|
136 |
|
137 |
+
class StatsResponse(BaseModel):
|
138 |
+
"""Analytics stats response model, including both access and token counts"""
|
139 |
+
|
140 |
+
access: StatsBucket
|
141 |
+
tokens: StatsBucket
|
142 |
+
|
143 |
+
|
144 |
service_config = ModelConfig()
|
145 |
embeddings_service = EmbeddingsService(config=service_config)
|
146 |
|
|
|
242 |
|
243 |
@router.get("/stats", response_model=StatsResponse, tags=["stats"])
|
244 |
async def get_stats():
|
245 |
+
"""Get usage statistics for all models, including access and tokens."""
|
246 |
try:
|
247 |
+
day_key = datetime.utcnow().strftime("%Y-%m-%d")
|
248 |
+
week_key = f"{datetime.utcnow().year}-W{datetime.utcnow().strftime('%U')}"
|
249 |
+
month_key = datetime.utcnow().strftime("%Y-%m")
|
250 |
+
year_key = datetime.utcnow().strftime("%Y")
|
251 |
+
|
252 |
+
stats_data = await analytics.stats() # { "access": {...}, "tokens": {...} }
|
253 |
|
254 |
return {
|
255 |
+
"access": {
|
256 |
+
"total": stats_data["access"].get("total", {}),
|
257 |
+
"daily": stats_data["access"].get(day_key, {}),
|
258 |
+
"weekly": stats_data["access"].get(week_key, {}),
|
259 |
+
"monthly": stats_data["access"].get(month_key, {}),
|
260 |
+
"yearly": stats_data["access"].get(year_key, {}),
|
261 |
+
},
|
262 |
+
"tokens": {
|
263 |
+
"total": stats_data["tokens"].get("total", {}),
|
264 |
+
"daily": stats_data["tokens"].get(day_key, {}),
|
265 |
+
"weekly": stats_data["tokens"].get(week_key, {}),
|
266 |
+
"monthly": stats_data["tokens"].get(month_key, {}),
|
267 |
+
"yearly": stats_data["tokens"].get(year_key, {}),
|
268 |
+
},
|
269 |
}
|
|
|
270 |
except Exception as e:
|
271 |
msg = f"Failed to fetch analytics stats: {str(e)}"
|
272 |
logger.error(msg)
|
lightweight_embeddings/service.py
CHANGED
@@ -28,7 +28,7 @@ from __future__ import annotations
|
|
28 |
|
29 |
import logging
|
30 |
from enum import Enum
|
31 |
-
from typing import List, Union, Literal, Dict, Optional, NamedTuple
|
32 |
from dataclasses import dataclass
|
33 |
from pathlib import Path
|
34 |
from io import BytesIO
|
@@ -191,7 +191,9 @@ class EmbeddingsService:
|
|
191 |
else:
|
192 |
# Fallback: standard HF loading
|
193 |
self.text_models[t_model_type] = SentenceTransformer(
|
194 |
-
info.model_id,
|
|
|
|
|
195 |
)
|
196 |
|
197 |
for i_model_type in ImageModelType:
|
@@ -347,35 +349,47 @@ class EmbeddingsService:
|
|
347 |
candidates: List[str],
|
348 |
modality: Literal["text", "image"],
|
349 |
batch_size: Optional[int] = None,
|
350 |
-
) -> Dict[str,
|
351 |
"""
|
352 |
Rank candidates (always text) against the queries, which may be text or image.
|
353 |
-
Returns dict of { probabilities, cosine_similarities }.
|
354 |
"""
|
|
|
355 |
# 1) Generate embeddings for queries
|
356 |
query_embeds = await self.generate_embeddings(queries, modality, batch_size)
|
357 |
# 2) Generate embeddings for text candidates
|
358 |
candidate_embeds = await self.generate_embeddings(candidates, "text")
|
359 |
|
360 |
-
# 3) Compute cosine
|
361 |
sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
|
|
|
362 |
# 4) Apply logit scale + softmax
|
363 |
scaled = np.exp(self.config.logit_scale) * sim_matrix
|
364 |
probs = self.softmax(scaled)
|
365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
return {
|
367 |
"probabilities": probs.tolist(),
|
368 |
"cosine_similarities": sim_matrix.tolist(),
|
|
|
369 |
}
|
370 |
|
371 |
def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
|
380 |
@staticmethod
|
381 |
def softmax(scores: np.ndarray) -> np.ndarray:
|
|
|
28 |
|
29 |
import logging
|
30 |
from enum import Enum
|
31 |
+
from typing import List, Union, Literal, Dict, Optional, NamedTuple, Any
|
32 |
from dataclasses import dataclass
|
33 |
from pathlib import Path
|
34 |
from io import BytesIO
|
|
|
191 |
else:
|
192 |
# Fallback: standard HF loading
|
193 |
self.text_models[t_model_type] = SentenceTransformer(
|
194 |
+
info.model_id,
|
195 |
+
device=self.device,
|
196 |
+
trust_remote_code=True,
|
197 |
)
|
198 |
|
199 |
for i_model_type in ImageModelType:
|
|
|
349 |
candidates: List[str],
|
350 |
modality: Literal["text", "image"],
|
351 |
batch_size: Optional[int] = None,
|
352 |
+
) -> Dict[str, Any]:
|
353 |
"""
|
354 |
Rank candidates (always text) against the queries, which may be text or image.
|
355 |
+
Returns dict of { probabilities, cosine_similarities, usage }.
|
356 |
"""
|
357 |
+
|
358 |
# 1) Generate embeddings for queries
|
359 |
query_embeds = await self.generate_embeddings(queries, modality, batch_size)
|
360 |
# 2) Generate embeddings for text candidates
|
361 |
candidate_embeds = await self.generate_embeddings(candidates, "text")
|
362 |
|
363 |
+
# 3) Compute cosine similarity
|
364 |
sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
|
365 |
+
|
366 |
# 4) Apply logit scale + softmax
|
367 |
scaled = np.exp(self.config.logit_scale) * sim_matrix
|
368 |
probs = self.softmax(scaled)
|
369 |
|
370 |
+
# 5) Compute usage (similar to embeddings)
|
371 |
+
query_tokens = self.estimate_tokens(queries) if modality == "text" else 0
|
372 |
+
candidate_tokens = self.estimate_tokens(candidates) if modality == "text" else 0
|
373 |
+
total_tokens = query_tokens + candidate_tokens
|
374 |
+
usage = {
|
375 |
+
"prompt_tokens": total_tokens,
|
376 |
+
"total_tokens": total_tokens,
|
377 |
+
}
|
378 |
+
|
379 |
return {
|
380 |
"probabilities": probs.tolist(),
|
381 |
"cosine_similarities": sim_matrix.tolist(),
|
382 |
+
"usage": usage,
|
383 |
}
|
384 |
|
385 |
def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
|
386 |
+
"""
|
387 |
+
Estimate token count using the model's tokenizer.
|
388 |
+
"""
|
389 |
+
texts = self._validate_text_input(input_data)
|
390 |
+
model = self.text_models[self.config.text_model_type]
|
391 |
+
tokenized = model.tokenize(texts)
|
392 |
+
return sum(len(ids) for ids in tokenized["input_ids"])
|
393 |
|
394 |
@staticmethod
|
395 |
def softmax(scores: np.ndarray) -> np.ndarray:
|