|
{ |
|
"replicate": {}, |
|
"sambanova": { |
|
"DeepSeek-R1": 32768, |
|
"DeepSeek-R1-Distill-Llama-70B": 131072, |
|
"DeepSeek-V3-0324": 32768, |
|
"E5-Mistral-7B-Instruct": 4096, |
|
"Llama-4-Maverick-17B-128E-Instruct": 131072, |
|
"Llama-4-Scout-17B-16E-Instruct": 8192, |
|
"Meta-Llama-3.1-405B-Instruct": 16384, |
|
"Meta-Llama-3.1-8B-Instruct": 16384, |
|
"Meta-Llama-3.2-1B-Instruct": 16384, |
|
"Meta-Llama-3.2-3B-Instruct": 4096, |
|
"Meta-Llama-3.3-70B-Instruct": 131072, |
|
"Meta-Llama-Guard-3-8B": 16384, |
|
"QwQ-32B": 16384, |
|
"Qwen2-Audio-7B-Instruct": 4096, |
|
"Qwen3-32B": 8192 |
|
}, |
|
"nebius": { |
|
"meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072, |
|
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072, |
|
"meta-llama/Meta-Llama-3.1-70B-Instruct-fast": 131072, |
|
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072, |
|
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131072, |
|
"meta-llama/Llama-Guard-3-8B": 131072, |
|
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF-fast": 131072, |
|
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 131072, |
|
"nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072, |
|
"mistralai/Mistral-Nemo-Instruct-2407-fast": 128000, |
|
"mistralai/Mistral-Nemo-Instruct-2407": 128000, |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1-fast": 32768, |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768, |
|
"mistralai/Mixtral-8x22B-Instruct-v0.1-fast": 65536, |
|
"mistralai/Mixtral-8x22B-Instruct-v0.1": 65536, |
|
"allenai/OLMo-7B-Instruct-hf": 2048, |
|
"microsoft/Phi-3-mini-4k-instruct-fast": 4096, |
|
"microsoft/Phi-3-mini-4k-instruct": 4096, |
|
"microsoft/Phi-3-medium-128k-instruct-fast": 131072, |
|
"microsoft/Phi-3-medium-128k-instruct": 131072, |
|
"google/gemma-2-2b-it-fast": 8192, |
|
"google/gemma-2-2b-it": 8192, |
|
"google/gemma-2-9b-it-fast": 8192, |
|
"google/gemma-2-9b-it": 8192, |
|
"google/gemma-2-27b-it-fast": 8192, |
|
"google/gemma-2-27b-it": 8192, |
|
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct-fast": 128000, |
|
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 128000, |
|
"Qwen/Qwen2.5-Coder-7B-fast": 32768, |
|
"Qwen/Qwen2.5-Coder-7B": 32768, |
|
"Qwen/Qwen2.5-Coder-7B-Instruct-fast": 32768, |
|
"Qwen/Qwen2.5-Coder-7B-Instruct": 32768, |
|
"Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072, |
|
"Qwen/Qwen2.5-Coder-32B-Instruct": 131072, |
|
"Qwen/Qwen2.5-32B-Instruct-fast": 131072, |
|
"Qwen/Qwen2.5-32B-Instruct": 131072, |
|
"Qwen/Qwen2.5-72B-Instruct-fast": 131072, |
|
"Qwen/Qwen2.5-72B-Instruct": 131072, |
|
"Qwen/Qwen2-VL-72B-Instruct": 32768, |
|
"Qwen/Qwen2-VL-7B-Instruct": 32768, |
|
"llava-hf/llava-1.5-7b-hf": 4096, |
|
"llava-hf/llava-1.5-13b-hf": 4096, |
|
"aaditya/Llama3-OpenBioLLM-8B": 8192, |
|
"aaditya/Llama3-OpenBioLLM-70B": 8192, |
|
"BAAI/bge-en-icl": 32768, |
|
"BAAI/bge-multilingual-gemma2": 8192, |
|
"intfloat/e5-mistral-7b-instruct": 32768, |
|
"cognitivecomputations/dolphin-2.9.2-mixtral-8x22b": 65536, |
|
"microsoft/Phi-3.5-MoE-instruct": 131072, |
|
"microsoft/Phi-3.5-mini-instruct": 131072, |
|
"Qwen/Qwen2.5-1.5B-Instruct": 32768, |
|
"meta-llama/Llama-3.3-70B-Instruct": 131072, |
|
"meta-llama/Llama-3.3-70B-Instruct-fast": 131072, |
|
"meta-llama/Llama-3.2-1B-Instruct": 131072, |
|
"meta-llama/Llama-3.2-3B-Instruct": 131072, |
|
"Qwen/QwQ-32B-Preview": 32768, |
|
"Qwen/QVQ-72B-preview": 128000, |
|
"microsoft/phi-4": 16384, |
|
"deepseek-ai/DeepSeek-V3": 163840, |
|
"deepseek-ai/DeepSeek-R1": 163840, |
|
"NousResearch/Hermes-3-Llama-405B": 131072, |
|
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, |
|
"deepseek-ai/DeepSeek-R1-fast": 163840, |
|
"Qwen/QwQ-32B-fast": 131072, |
|
"Qwen/QwQ-32B": 131072, |
|
"Qwen/Qwen3-235B-A22B": 40960, |
|
"Qwen/Qwen3-30B-A3B": 40960, |
|
"Qwen/Qwen3-30B-A3B-fast": 40960, |
|
"Qwen/Qwen3-32B": 40960, |
|
"Qwen/Qwen3-14B": 40960, |
|
"Qwen/Qwen3-4B-fast": 40960, |
|
"nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072, |
|
"mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072, |
|
"google/gemma-3-27b-it": 110000, |
|
"google/gemma-3-27b-it-fast": 110000, |
|
"Qwen/Qwen2.5-VL-72B-Instruct": 32000, |
|
"deepseek-ai/DeepSeek-V3-0324": 163840, |
|
"deepseek-ai/DeepSeek-V3-0324-fast": 163840, |
|
"black-forest-labs/flux-dev": 0, |
|
"black-forest-labs/flux-schnell": 0, |
|
"stability-ai/sdxl": 0 |
|
}, |
|
"novita": { |
|
"deepseek/deepseek-prover-v2-671b": 160000, |
|
"qwen/qwen3-235b-a22b-fp8": 128000, |
|
"qwen/qwen3-30b-a3b-fp8": 128000, |
|
"qwen/qwen3-32b-fp8": 128000, |
|
"deepseek/deepseek-v3-0324": 128000, |
|
"qwen/qwen2.5-vl-72b-instruct": 96000, |
|
"deepseek/deepseek-v3-turbo": 64000, |
|
"deepseek/deepseek-r1-turbo": 64000, |
|
"meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576, |
|
"google/gemma-3-27b-it": 32000, |
|
"qwen/qwq-32b": 32768, |
|
"Sao10K/L3-8B-Stheno-v3.2": 8192, |
|
"gryphe/mythomax-l2-13b": 4096, |
|
"meta-llama/llama-4-scout-17b-16e-instruct": 131072, |
|
"deepseek/deepseek-r1-distill-llama-8b": 32000, |
|
"deepseek/deepseek_v3": 64000, |
|
"meta-llama/llama-3.1-8b-instruct": 16384, |
|
"deepseek/deepseek-r1-distill-qwen-14b": 64000, |
|
"meta-llama/llama-3.3-70b-instruct": 131072, |
|
"qwen/qwen-2.5-72b-instruct": 32000, |
|
"mistralai/mistral-nemo": 131072, |
|
"deepseek/deepseek-r1-distill-qwen-32b": 64000, |
|
"meta-llama/llama-3-8b-instruct": 8192, |
|
"microsoft/wizardlm-2-8x22b": 65535, |
|
"deepseek/deepseek-r1-distill-llama-70b": 32000, |
|
"meta-llama/llama-3.1-70b-instruct": 32768, |
|
"google/gemma-2-9b-it": 8192, |
|
"mistralai/mistral-7b-instruct": 32768, |
|
"meta-llama/llama-3-70b-instruct": 8192, |
|
"deepseek/deepseek-r1": 64000, |
|
"nousresearch/hermes-2-pro-llama-3-8b": 8192, |
|
"sao10k/l3-70b-euryale-v2.1": 8192, |
|
"cognitivecomputations/dolphin-mixtral-8x22b": 16000, |
|
"jondurbin/airoboros-l2-70b": 4096, |
|
"sophosympatheia/midnight-rose-70b": 4096, |
|
"sao10k/l3-8b-lunaris": 8192, |
|
"qwen/qwen3-0.6b-fp8": 32000, |
|
"qwen/qwen3-1.7b-fp8": 32000, |
|
"qwen/qwen3-8b-fp8": 128000, |
|
"qwen/qwen3-4b-fp8": 128000, |
|
"qwen/qwen3-14b-fp8": 128000, |
|
"thudm/glm-4-9b-0414": 32000, |
|
"thudm/glm-z1-9b-0414": 32000, |
|
"thudm/glm-z1-32b-0414": 32000, |
|
"thudm/glm-4-32b-0414": 32000, |
|
"thudm/glm-z1-rumination-32b-0414": 32000, |
|
"qwen/qwen2.5-7b-instruct": 32000, |
|
"meta-llama/llama-3.2-1b-instruct": 131000, |
|
"meta-llama/llama-3.2-11b-vision-instruct": 32768, |
|
"meta-llama/llama-3.2-3b-instruct": 32768, |
|
"meta-llama/llama-3.1-8b-instruct-bf16": 8192, |
|
"sao10k/l31-70b-euryale-v2.2": 8192 |
|
}, |
|
"fal": { |
|
"fal/model-name": 4096 |
|
}, |
|
"cerebras": { |
|
"cerebras/model-name": 8192 |
|
}, |
|
"hf-inference": { |
|
"google/gemma-2-9b-it": 8192, |
|
"meta-llama/Meta-Llama-3-8B-Instruct": 8192 |
|
}, |
|
"hyperbolic": { |
|
"Qwen/Qwen2.5-72B-Instruct": 131072, |
|
"Qwen/Qwen2.5-VL-72B-Instruct": 32768, |
|
"meta-llama/Meta-Llama-3-70B-Instruct": 8192, |
|
"deepseek-ai/DeepSeek-V3": 131072, |
|
"deepseek-ai/DeepSeek-V3-0324": 163840, |
|
"meta-llama/Llama-3.3-70B-Instruct": 131072, |
|
"Qwen/QwQ-32B-Preview": 32768, |
|
"Qwen/Qwen2.5-Coder-32B-Instruct": 32768, |
|
"meta-llama/Llama-3.2-3B-Instruct": 131072, |
|
"NousResearch/Hermes-3-Llama-3.1-70B": 12288, |
|
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131000, |
|
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072, |
|
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072, |
|
"mistralai/Pixtral-12B-2409": 32768, |
|
"Qwen/Qwen2.5-VL-7B-Instruct": 32768, |
|
"meta-llama/Meta-Llama-3.1-405B": 32768, |
|
"meta-llama/Meta-Llama-3.1-405B-FP8": 32768, |
|
"deepseek-ai/DeepSeek-R1": 163840, |
|
"Qwen/QwQ-32B": 131072 |
|
}, |
|
"cohere": { |
|
"embed-english-light-v3.0": 512, |
|
"embed-multilingual-v2.0": 256, |
|
"rerank-v3.5": 4096, |
|
"embed-v4.0": 8192, |
|
"rerank-english-v3.0": 4096, |
|
"command-r-08-2024": 132096, |
|
"embed-english-light-v3.0-image": 0, |
|
"embed-english-v3.0-image": 0, |
|
"command-a-03-2025": 288000, |
|
"command-nightly": 288000, |
|
"command-r-plus-08-2024": 132096, |
|
"c4ai-aya-vision-32b": 16384, |
|
"command-r": 132096, |
|
"command-r7b-12-2024": 132000, |
|
"command-r7b-arabic-02-2025": 128000, |
|
"command-light-nightly": 4096, |
|
"embed-english-v3.0": 512, |
|
"embed-multilingual-light-v3.0-image": 0, |
|
"embed-multilingual-v3.0-image": 0, |
|
"c4ai-aya-expanse-32b": 128000, |
|
"command": 4096 |
|
}, |
|
"together": { |
|
"Qwen/QwQ-32B": 131072, |
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576, |
|
"meta-llama/Llama-Guard-4-12B": 1048576, |
|
"togethercomputer/m2-bert-80M-32k-retrieval": 32768, |
|
"google/gemma-2-9b-it": 8192, |
|
"cartesia/sonic": 0, |
|
"Qwen/Qwen2.5-7B-Instruct-Turbo": 32768, |
|
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192, |
|
"meta-llama-llama-2-70b-hf": 4096, |
|
"BAAI/bge-base-en-v1.5": 512, |
|
"Gryphe/MythoMax-L2-13b": 4096, |
|
"deepseek-ai/DeepSeek-V3": 131072, |
|
"mistralai/Mistral-7B-Instruct-v0.1": 32768, |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768, |
|
"google/gemma-2-27b-it": 8192, |
|
"Qwen/Qwen2-VL-72B-Instruct": 32768, |
|
"meta-llama/LlamaGuard-2-8b": 8192, |
|
"cartesia/sonic-2": 0, |
|
"togethercomputer/m2-bert-80M-8k-retrieval": 8192, |
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072, |
|
"scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192, |
|
"togethercomputer/Refuel-Llm-V2-Small": 8192, |
|
"togethercomputer/MoA-1": 32768, |
|
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192, |
|
"Qwen/Qwen3-235B-A22B-fp8-tput": 40960, |
|
"google/gemma-2b-it": 8192, |
|
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072, |
|
"Gryphe/MythoMax-L2-13b-Lite": 4096, |
|
"scb10x/scb10x-llama3-1-typhoon2-8b-instruct": 8192, |
|
"meta-llama/Meta-Llama-Guard-3-8B": 8192, |
|
"intfloat/multilingual-e5-large-instruct": 514, |
|
"deepseek-ai/DeepSeek-R1": 163840, |
|
"arcee-ai/arcee-blitz": 32768, |
|
"arcee_ai/arcee-spotlight": 131072, |
|
"arcee-ai/caller": 32768, |
|
"arcee-ai/coder-large": 32768, |
|
"arcee-ai/maestro-reasoning": 131072, |
|
"arcee-ai/virtuoso-large": 131072, |
|
"arcee-ai/virtuoso-medium-v2": 131072, |
|
"mistralai/Mistral-Small-24B-Instruct-2501": 32768, |
|
"meta-llama/Llama-3-8b-chat-hf": 8192, |
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576, |
|
"togethercomputer/MoA-1-Turbo": 32768, |
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072, |
|
"Qwen/Qwen3-235B-A22B-fp8": 40960, |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768, |
|
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, |
|
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072, |
|
"meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192, |
|
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072, |
|
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072, |
|
"mistralai/Mixtral-8x7B-v0.1": 32768, |
|
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072, |
|
"mistralai/Mistral-7B-Instruct-v0.2": 32768, |
|
"deepseek-ai/DeepSeek-V3-p-dp": 131072, |
|
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072, |
|
"Qwen/Qwen2.5-Coder-32B-Instruct": 16384, |
|
"Qwen/Qwen2-72B-Instruct": 32768, |
|
"meta-llama/Llama-3-70b-chat-hf": 8192, |
|
"mistralai/Mistral-7B-Instruct-v0.3": 32768, |
|
"Salesforce/Llama-Rank-V1": 8192, |
|
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768, |
|
"meta-llama/Llama-Vision-Free": 131072, |
|
"meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072, |
|
"meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072, |
|
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815, |
|
"togethercomputer/Refuel-Llm-V2": 16384, |
|
"Alibaba-NLP/gte-modernbert-base": 8192, |
|
"Qwen/Qwen2.5-72B-Instruct-Turbo": 131072, |
|
"perplexity-ai/r1-1776": 163840, |
|
"meta-llama/Llama-2-70b-hf": 4096, |
|
"Qwen/Qwen2.5-VL-72B-Instruct": 32768 |
|
}, |
|
"fireworks-ai": { |
|
"accounts/fireworks/models/qwq-32b": 131072, |
|
"accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576, |
|
"accounts/fireworks/models/qwen3-30b-a3b": 40000, |
|
"accounts/fireworks/models/llama4-scout-instruct-basic": 1048576, |
|
"accounts/fireworks/models/firesearch-ocr-v6": 131072, |
|
"accounts/fireworks/models/deepseek-v3": 131072, |
|
"accounts/fireworks/models/llama-v3p1-8b-instruct": 131072, |
|
"accounts/fireworks/models/llama-v3p1-70b-instruct": 131072, |
|
"accounts/fireworks/models/deepseek-v3-0324": 163840, |
|
"accounts/fireworks/models/qwen3-235b-a22b": 128000, |
|
"accounts/fireworks/models/deepseek-r1-basic": 163840, |
|
"accounts/fireworks/models/llama-v3p3-70b-instruct": 131072, |
|
"accounts/fireworks/models/deepseek-r1": 163840, |
|
"accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000, |
|
"accounts/fireworks/models/qwen2-vl-72b-instruct": 32768, |
|
"accounts/fireworks/models/llama-guard-3-8b": 131072, |
|
"accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072, |
|
"accounts/perplexity/models/r1-1776": 163840, |
|
"accounts/fireworks/models/llama-v3p1-405b-instruct": 131072, |
|
"accounts/fireworks/models/mixtral-8x22b-instruct": 65536, |
|
"accounts/fireworks/models/qwen2p5-72b-instruct": 32768 |
|
} |
|
} |