Spaces:
Running
Running
from typing import List, Optional, Union, Dict, Tuple, Literal | |
from pydantic import BaseModel, validator | |
from .completion import CompletionRequest | |
from .embedding import EmbeddingRequest | |
class ModelConfig(BaseModel): | |
model_name: str | |
litellm_params: Union[CompletionRequest, EmbeddingRequest] | |
tpm: int | |
rpm: int | |
class RouterConfig(BaseModel): | |
model_list: List[ModelConfig] | |
redis_url: Optional[str] = None | |
redis_host: Optional[str] = None | |
redis_port: Optional[int] = None | |
redis_password: Optional[str] = None | |
cache_responses: Optional[bool] = False | |
cache_kwargs: Optional[Dict] = {} | |
caching_groups: Optional[List[Tuple[str, List[str]]]] = None | |
client_ttl: Optional[int] = 3600 | |
num_retries: Optional[int] = 0 | |
timeout: Optional[float] = None | |
default_litellm_params: Optional[Dict[str, str]] = {} | |
set_verbose: Optional[bool] = False | |
fallbacks: Optional[List] = [] | |
allowed_fails: Optional[int] = None | |
context_window_fallbacks: Optional[List] = [] | |
model_group_alias: Optional[Dict[str, List[str]]] = {} | |
retry_after: Optional[int] = 0 | |
routing_strategy: Literal[ | |
"simple-shuffle", | |
"least-busy", | |
"usage-based-routing", | |
"latency-based-routing", | |
] = "simple-shuffle" | |