ka1kuk commited on
Commit
7db0ae4
·
verified ·
1 Parent(s): 20a7d21

Upload 235 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. litellm/__init__.py +557 -0
  2. litellm/_logging.py +30 -0
  3. litellm/_redis.py +93 -0
  4. litellm/_version.py +6 -0
  5. litellm/budget_manager.py +206 -0
  6. litellm/caching.py +678 -0
  7. litellm/cost.json +5 -0
  8. litellm/deprecated_litellm_server/.env.template +43 -0
  9. litellm/deprecated_litellm_server/Dockerfile +10 -0
  10. litellm/deprecated_litellm_server/README.md +3 -0
  11. litellm/deprecated_litellm_server/__init__.py +2 -0
  12. litellm/deprecated_litellm_server/main.py +193 -0
  13. litellm/deprecated_litellm_server/requirements.txt +7 -0
  14. litellm/deprecated_litellm_server/server_utils.py +85 -0
  15. litellm/exceptions.py +200 -0
  16. litellm/integrations/__init__.py +1 -0
  17. litellm/integrations/aispend.py +177 -0
  18. litellm/integrations/berrispend.py +184 -0
  19. litellm/integrations/custom_logger.py +130 -0
  20. litellm/integrations/dynamodb.py +92 -0
  21. litellm/integrations/helicone.py +114 -0
  22. litellm/integrations/langfuse.py +191 -0
  23. litellm/integrations/langsmith.py +75 -0
  24. litellm/integrations/litedebugger.py +262 -0
  25. litellm/integrations/llmonitor.py +127 -0
  26. litellm/integrations/prompt_layer.py +72 -0
  27. litellm/integrations/s3.py +150 -0
  28. litellm/integrations/supabase.py +117 -0
  29. litellm/integrations/traceloop.py +114 -0
  30. litellm/integrations/weights_biases.py +223 -0
  31. litellm/llms/__init__.py +1 -0
  32. litellm/llms/ai21.py +212 -0
  33. litellm/llms/aleph_alpha.py +304 -0
  34. litellm/llms/anthropic.py +215 -0
  35. litellm/llms/azure.py +799 -0
  36. litellm/llms/base.py +45 -0
  37. litellm/llms/baseten.py +164 -0
  38. litellm/llms/bedrock.py +799 -0
  39. litellm/llms/cloudflare.py +176 -0
  40. litellm/llms/cohere.py +293 -0
  41. litellm/llms/custom_httpx/azure_dall_e_2.py +136 -0
  42. litellm/llms/custom_httpx/bedrock_async.py +0 -0
  43. litellm/llms/gemini.py +222 -0
  44. litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt +2523 -0
  45. litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt +0 -0
  46. litellm/llms/huggingface_restapi.py +750 -0
  47. litellm/llms/maritalk.py +189 -0
  48. litellm/llms/nlp_cloud.py +243 -0
  49. litellm/llms/ollama.py +400 -0
  50. litellm/llms/ollama_chat.py +333 -0
litellm/__init__.py ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### INIT VARIABLES ###
2
+ import threading, requests
3
+ from typing import Callable, List, Optional, Dict, Union, Any
4
+ from litellm.caching import Cache
5
+ from litellm._logging import set_verbose
6
+ from litellm.proxy._types import KeyManagementSystem
7
+ import httpx
8
+
9
+ input_callback: List[Union[str, Callable]] = []
10
+ success_callback: List[Union[str, Callable]] = []
11
+ failure_callback: List[Union[str, Callable]] = []
12
+ callbacks: List[Callable] = []
13
+ _async_input_callback: List[
14
+ Callable
15
+ ] = [] # internal variable - async custom callbacks are routed here.
16
+ _async_success_callback: List[
17
+ Union[str, Callable]
18
+ ] = [] # internal variable - async custom callbacks are routed here.
19
+ _async_failure_callback: List[
20
+ Callable
21
+ ] = [] # internal variable - async custom callbacks are routed here.
22
+ pre_call_rules: List[Callable] = []
23
+ post_call_rules: List[Callable] = []
24
+ email: Optional[
25
+ str
26
+ ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
27
+ token: Optional[
28
+ str
29
+ ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
30
+ telemetry = True
31
+ max_tokens = 256 # OpenAI Defaults
32
+ drop_params = False
33
+ retry = True
34
+ api_key: Optional[str] = None
35
+ openai_key: Optional[str] = None
36
+ azure_key: Optional[str] = None
37
+ anthropic_key: Optional[str] = None
38
+ replicate_key: Optional[str] = None
39
+ cohere_key: Optional[str] = None
40
+ maritalk_key: Optional[str] = None
41
+ ai21_key: Optional[str] = None
42
+ openrouter_key: Optional[str] = None
43
+ huggingface_key: Optional[str] = None
44
+ vertex_project: Optional[str] = None
45
+ vertex_location: Optional[str] = None
46
+ togetherai_api_key: Optional[str] = None
47
+ cloudflare_api_key: Optional[str] = None
48
+ baseten_key: Optional[str] = None
49
+ aleph_alpha_key: Optional[str] = None
50
+ nlp_cloud_key: Optional[str] = None
51
+ use_client: bool = False
52
+ logging: bool = True
53
+ caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
54
+ caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
55
+ cache: Optional[
56
+ Cache
57
+ ] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
58
+ model_alias_map: Dict[str, str] = {}
59
+ model_group_alias_map: Dict[str, str] = {}
60
+ max_budget: float = 0.0 # set the max budget across all providers
61
+ _openai_completion_params = [
62
+ "functions",
63
+ "function_call",
64
+ "temperature",
65
+ "temperature",
66
+ "top_p",
67
+ "n",
68
+ "stream",
69
+ "stop",
70
+ "max_tokens",
71
+ "presence_penalty",
72
+ "frequency_penalty",
73
+ "logit_bias",
74
+ "user",
75
+ "request_timeout",
76
+ "api_base",
77
+ "api_version",
78
+ "api_key",
79
+ "deployment_id",
80
+ "organization",
81
+ "base_url",
82
+ "default_headers",
83
+ "timeout",
84
+ "response_format",
85
+ "seed",
86
+ "tools",
87
+ "tool_choice",
88
+ "max_retries",
89
+ ]
90
+ _litellm_completion_params = [
91
+ "metadata",
92
+ "acompletion",
93
+ "caching",
94
+ "mock_response",
95
+ "api_key",
96
+ "api_version",
97
+ "api_base",
98
+ "force_timeout",
99
+ "logger_fn",
100
+ "verbose",
101
+ "custom_llm_provider",
102
+ "litellm_logging_obj",
103
+ "litellm_call_id",
104
+ "use_client",
105
+ "id",
106
+ "fallbacks",
107
+ "azure",
108
+ "headers",
109
+ "model_list",
110
+ "num_retries",
111
+ "context_window_fallback_dict",
112
+ "roles",
113
+ "final_prompt_value",
114
+ "bos_token",
115
+ "eos_token",
116
+ "request_timeout",
117
+ "complete_response",
118
+ "self",
119
+ "client",
120
+ "rpm",
121
+ "tpm",
122
+ "input_cost_per_token",
123
+ "output_cost_per_token",
124
+ "hf_model_name",
125
+ "model_info",
126
+ "proxy_server_request",
127
+ "preset_cache_key",
128
+ ]
129
+ _current_cost = 0 # private variable, used if max budget is set
130
+ error_logs: Dict = {}
131
+ add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
132
+ client_session: Optional[httpx.Client] = None
133
+ aclient_session: Optional[httpx.AsyncClient] = None
134
+ model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
135
+ model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
136
+ suppress_debug_info = False
137
+ dynamodb_table_name: Optional[str] = None
138
+ s3_callback_params: Optional[Dict] = None
139
+ #### RELIABILITY ####
140
+ request_timeout: Optional[float] = 6000
141
+ num_retries: Optional[int] = None # per model endpoint
142
+ fallbacks: Optional[List] = None
143
+ context_window_fallbacks: Optional[List] = None
144
+ allowed_fails: int = 0
145
+ num_retries_per_request: Optional[
146
+ int
147
+ ] = None # for the request overall (incl. fallbacks + model retries)
148
+ ####### SECRET MANAGERS #####################
149
+ secret_manager_client: Optional[
150
+ Any
151
+ ] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
152
+ _google_kms_resource_name: Optional[str] = None
153
+ _key_management_system: Optional[KeyManagementSystem] = None
154
+ #############################################
155
+
156
+
157
+ def get_model_cost_map(url: str):
158
+ try:
159
+ with requests.get(
160
+ url, timeout=5
161
+ ) as response: # set a 5 second timeout for the get request
162
+ response.raise_for_status() # Raise an exception if the request is unsuccessful
163
+ content = response.json()
164
+ return content
165
+ except Exception as e:
166
+ import importlib.resources
167
+ import json
168
+
169
+ with importlib.resources.open_text(
170
+ "litellm", "model_prices_and_context_window_backup.json"
171
+ ) as f:
172
+ content = json.load(f)
173
+ return content
174
+
175
+
176
+ model_cost = get_model_cost_map(url=model_cost_map_url)
177
+ custom_prompt_dict: Dict[str, dict] = {}
178
+
179
+
180
+ ####### THREAD-SPECIFIC DATA ###################
181
+ class MyLocal(threading.local):
182
+ def __init__(self):
183
+ self.user = "Hello World"
184
+
185
+
186
+ _thread_context = MyLocal()
187
+
188
+
189
+ def identify(event_details):
190
+ # Store user in thread local data
191
+ if "user" in event_details:
192
+ _thread_context.user = event_details["user"]
193
+
194
+
195
+ ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
196
+ api_base = None
197
+ headers = None
198
+ api_version = None
199
+ organization = None
200
+ config_path = None
201
+ ####### COMPLETION MODELS ###################
202
+ open_ai_chat_completion_models: List = []
203
+ open_ai_text_completion_models: List = []
204
+ cohere_models: List = []
205
+ anthropic_models: List = []
206
+ openrouter_models: List = []
207
+ vertex_language_models: List = []
208
+ vertex_vision_models: List = []
209
+ vertex_chat_models: List = []
210
+ vertex_code_chat_models: List = []
211
+ vertex_text_models: List = []
212
+ vertex_code_text_models: List = []
213
+ ai21_models: List = []
214
+ nlp_cloud_models: List = []
215
+ aleph_alpha_models: List = []
216
+ bedrock_models: List = []
217
+ deepinfra_models: List = []
218
+ perplexity_models: List = []
219
+ for key, value in model_cost.items():
220
+ if value.get("litellm_provider") == "openai":
221
+ open_ai_chat_completion_models.append(key)
222
+ elif value.get("litellm_provider") == "text-completion-openai":
223
+ open_ai_text_completion_models.append(key)
224
+ elif value.get("litellm_provider") == "cohere":
225
+ cohere_models.append(key)
226
+ elif value.get("litellm_provider") == "anthropic":
227
+ anthropic_models.append(key)
228
+ elif value.get("litellm_provider") == "openrouter":
229
+ openrouter_models.append(key)
230
+ elif value.get("litellm_provider") == "vertex_ai-text-models":
231
+ vertex_text_models.append(key)
232
+ elif value.get("litellm_provider") == "vertex_ai-code-text-models":
233
+ vertex_code_text_models.append(key)
234
+ elif value.get("litellm_provider") == "vertex_ai-language-models":
235
+ vertex_language_models.append(key)
236
+ elif value.get("litellm_provider") == "vertex_ai-vision-models":
237
+ vertex_vision_models.append(key)
238
+ elif value.get("litellm_provider") == "vertex_ai-chat-models":
239
+ vertex_chat_models.append(key)
240
+ elif value.get("litellm_provider") == "vertex_ai-code-chat-models":
241
+ vertex_code_chat_models.append(key)
242
+ elif value.get("litellm_provider") == "ai21":
243
+ ai21_models.append(key)
244
+ elif value.get("litellm_provider") == "nlp_cloud":
245
+ nlp_cloud_models.append(key)
246
+ elif value.get("litellm_provider") == "aleph_alpha":
247
+ aleph_alpha_models.append(key)
248
+ elif value.get("litellm_provider") == "bedrock":
249
+ bedrock_models.append(key)
250
+ elif value.get("litellm_provider") == "deepinfra":
251
+ deepinfra_models.append(key)
252
+ elif value.get("litellm_provider") == "perplexity":
253
+ perplexity_models.append(key)
254
+
255
+ # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
256
+ openai_compatible_endpoints: List = [
257
+ "api.perplexity.ai",
258
+ "api.endpoints.anyscale.com/v1",
259
+ "api.deepinfra.com/v1/openai",
260
+ "api.mistral.ai/v1",
261
+ ]
262
+
263
+ # this is maintained for Exception Mapping
264
+ openai_compatible_providers: List = [
265
+ "anyscale",
266
+ "mistral",
267
+ "deepinfra",
268
+ "perplexity",
269
+ "xinference",
270
+ ]
271
+
272
+
273
+ # well supported replicate llms
274
+ replicate_models: List = [
275
+ # llama replicate supported LLMs
276
+ "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
277
+ "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
278
+ "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
279
+ # Vicuna
280
+ "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
281
+ "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
282
+ # Flan T-5
283
+ "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
284
+ # Others
285
+ "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
286
+ "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
287
+ ]
288
+
289
+ huggingface_models: List = [
290
+ "meta-llama/Llama-2-7b-hf",
291
+ "meta-llama/Llama-2-7b-chat-hf",
292
+ "meta-llama/Llama-2-13b-hf",
293
+ "meta-llama/Llama-2-13b-chat-hf",
294
+ "meta-llama/Llama-2-70b-hf",
295
+ "meta-llama/Llama-2-70b-chat-hf",
296
+ "meta-llama/Llama-2-7b",
297
+ "meta-llama/Llama-2-7b-chat",
298
+ "meta-llama/Llama-2-13b",
299
+ "meta-llama/Llama-2-13b-chat",
300
+ "meta-llama/Llama-2-70b",
301
+ "meta-llama/Llama-2-70b-chat",
302
+ ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
303
+
304
+ together_ai_models: List = [
305
+ # llama llms - chat
306
+ "togethercomputer/llama-2-70b-chat",
307
+ # llama llms - language / instruct
308
+ "togethercomputer/llama-2-70b",
309
+ "togethercomputer/LLaMA-2-7B-32K",
310
+ "togethercomputer/Llama-2-7B-32K-Instruct",
311
+ "togethercomputer/llama-2-7b",
312
+ # falcon llms
313
+ "togethercomputer/falcon-40b-instruct",
314
+ "togethercomputer/falcon-7b-instruct",
315
+ # alpaca
316
+ "togethercomputer/alpaca-7b",
317
+ # chat llms
318
+ "HuggingFaceH4/starchat-alpha",
319
+ # code llms
320
+ "togethercomputer/CodeLlama-34b",
321
+ "togethercomputer/CodeLlama-34b-Instruct",
322
+ "togethercomputer/CodeLlama-34b-Python",
323
+ "defog/sqlcoder",
324
+ "NumbersStation/nsql-llama-2-7B",
325
+ "WizardLM/WizardCoder-15B-V1.0",
326
+ "WizardLM/WizardCoder-Python-34B-V1.0",
327
+ # language llms
328
+ "NousResearch/Nous-Hermes-Llama2-13b",
329
+ "Austism/chronos-hermes-13b",
330
+ "upstage/SOLAR-0-70b-16bit",
331
+ "WizardLM/WizardLM-70B-V1.0",
332
+ ] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
333
+
334
+
335
+ baseten_models: List = [
336
+ "qvv0xeq",
337
+ "q841o8w",
338
+ "31dxrj3",
339
+ ] # FALCON 7B # WizardLM # Mosaic ML
340
+
341
+
342
+ # used for Cost Tracking & Token counting
343
+ # https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/
344
+ # Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting
345
+ azure_llms = {
346
+ "gpt-35-turbo": "azure/gpt-35-turbo",
347
+ "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k",
348
+ "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct",
349
+ }
350
+
351
+ azure_embedding_models = {
352
+ "ada": "azure/ada",
353
+ }
354
+
355
+ petals_models = [
356
+ "petals-team/StableBeluga2",
357
+ ]
358
+
359
+ ollama_models = ["llama2"]
360
+
361
+ maritalk_models = ["maritalk"]
362
+
363
+ model_list = (
364
+ open_ai_chat_completion_models
365
+ + open_ai_text_completion_models
366
+ + cohere_models
367
+ + anthropic_models
368
+ + replicate_models
369
+ + openrouter_models
370
+ + huggingface_models
371
+ + vertex_chat_models
372
+ + vertex_text_models
373
+ + ai21_models
374
+ + together_ai_models
375
+ + baseten_models
376
+ + aleph_alpha_models
377
+ + nlp_cloud_models
378
+ + ollama_models
379
+ + bedrock_models
380
+ + deepinfra_models
381
+ + perplexity_models
382
+ + maritalk_models
383
+ )
384
+
385
+ provider_list: List = [
386
+ "openai",
387
+ "custom_openai",
388
+ "text-completion-openai",
389
+ "cohere",
390
+ "anthropic",
391
+ "replicate",
392
+ "huggingface",
393
+ "together_ai",
394
+ "openrouter",
395
+ "vertex_ai",
396
+ "palm",
397
+ "gemini",
398
+ "ai21",
399
+ "baseten",
400
+ "azure",
401
+ "sagemaker",
402
+ "bedrock",
403
+ "vllm",
404
+ "nlp_cloud",
405
+ "petals",
406
+ "oobabooga",
407
+ "ollama",
408
+ "ollama_chat",
409
+ "deepinfra",
410
+ "perplexity",
411
+ "anyscale",
412
+ "mistral",
413
+ "maritalk",
414
+ "voyage",
415
+ "cloudflare",
416
+ "xinference",
417
+ "custom", # custom apis
418
+ ]
419
+
420
+ models_by_provider: dict = {
421
+ "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
422
+ "cohere": cohere_models,
423
+ "anthropic": anthropic_models,
424
+ "replicate": replicate_models,
425
+ "huggingface": huggingface_models,
426
+ "together_ai": together_ai_models,
427
+ "baseten": baseten_models,
428
+ "openrouter": openrouter_models,
429
+ "vertex_ai": vertex_chat_models + vertex_text_models,
430
+ "ai21": ai21_models,
431
+ "bedrock": bedrock_models,
432
+ "petals": petals_models,
433
+ "ollama": ollama_models,
434
+ "deepinfra": deepinfra_models,
435
+ "perplexity": perplexity_models,
436
+ "maritalk": maritalk_models,
437
+ }
438
+
439
+ # mapping for those models which have larger equivalents
440
+ longer_context_model_fallback_dict: dict = {
441
+ # openai chat completion models
442
+ "gpt-3.5-turbo": "gpt-3.5-turbo-16k",
443
+ "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
444
+ "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
445
+ "gpt-4": "gpt-4-32k",
446
+ "gpt-4-0314": "gpt-4-32k-0314",
447
+ "gpt-4-0613": "gpt-4-32k-0613",
448
+ # anthropic
449
+ "claude-instant-1": "claude-2",
450
+ "claude-instant-1.2": "claude-2",
451
+ # vertexai
452
+ "chat-bison": "chat-bison-32k",
453
+ "chat-bison@001": "chat-bison-32k",
454
+ "codechat-bison": "codechat-bison-32k",
455
+ "codechat-bison@001": "codechat-bison-32k",
456
+ # openrouter
457
+ "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
458
+ "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
459
+ }
460
+
461
+ ####### EMBEDDING MODELS ###################
462
+ open_ai_embedding_models: List = ["text-embedding-ada-002"]
463
+ cohere_embedding_models: List = [
464
+ "embed-english-v3.0",
465
+ "embed-english-light-v3.0",
466
+ "embed-multilingual-v3.0",
467
+ "embed-english-v2.0",
468
+ "embed-english-light-v2.0",
469
+ "embed-multilingual-v2.0",
470
+ ]
471
+ bedrock_embedding_models: List = [
472
+ "amazon.titan-embed-text-v1",
473
+ "cohere.embed-english-v3",
474
+ "cohere.embed-multilingual-v3",
475
+ ]
476
+
477
+ all_embedding_models = (
478
+ open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
479
+ )
480
+
481
+ ####### IMAGE GENERATION MODELS ###################
482
+ openai_image_generation_models = ["dall-e-2", "dall-e-3"]
483
+
484
+
485
+ from .timeout import timeout
486
+ from .utils import (
487
+ client,
488
+ exception_type,
489
+ get_optional_params,
490
+ modify_integration,
491
+ token_counter,
492
+ cost_per_token,
493
+ completion_cost,
494
+ get_litellm_params,
495
+ Logging,
496
+ acreate,
497
+ get_model_list,
498
+ get_max_tokens,
499
+ get_model_info,
500
+ register_prompt_template,
501
+ validate_environment,
502
+ check_valid_key,
503
+ get_llm_provider,
504
+ register_model,
505
+ encode,
506
+ decode,
507
+ _calculate_retry_after,
508
+ _should_retry,
509
+ get_secret,
510
+ )
511
+ from .llms.huggingface_restapi import HuggingfaceConfig
512
+ from .llms.anthropic import AnthropicConfig
513
+ from .llms.replicate import ReplicateConfig
514
+ from .llms.cohere import CohereConfig
515
+ from .llms.ai21 import AI21Config
516
+ from .llms.together_ai import TogetherAIConfig
517
+ from .llms.cloudflare import CloudflareConfig
518
+ from .llms.palm import PalmConfig
519
+ from .llms.gemini import GeminiConfig
520
+ from .llms.nlp_cloud import NLPCloudConfig
521
+ from .llms.aleph_alpha import AlephAlphaConfig
522
+ from .llms.petals import PetalsConfig
523
+ from .llms.vertex_ai import VertexAIConfig
524
+ from .llms.sagemaker import SagemakerConfig
525
+ from .llms.ollama import OllamaConfig
526
+ from .llms.maritalk import MaritTalkConfig
527
+ from .llms.bedrock import (
528
+ AmazonTitanConfig,
529
+ AmazonAI21Config,
530
+ AmazonAnthropicConfig,
531
+ AmazonCohereConfig,
532
+ AmazonLlamaConfig,
533
+ )
534
+ from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
535
+ from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
536
+ from .main import * # type: ignore
537
+ from .integrations import *
538
+ from .exceptions import (
539
+ AuthenticationError,
540
+ InvalidRequestError,
541
+ BadRequestError,
542
+ NotFoundError,
543
+ RateLimitError,
544
+ ServiceUnavailableError,
545
+ OpenAIError,
546
+ ContextWindowExceededError,
547
+ ContentPolicyViolationError,
548
+ BudgetExceededError,
549
+ APIError,
550
+ Timeout,
551
+ APIConnectionError,
552
+ APIResponseValidationError,
553
+ UnprocessableEntityError,
554
+ )
555
+ from .budget_manager import BudgetManager
556
+ from .proxy.proxy_cli import run_server
557
+ from .router import Router
litellm/_logging.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ set_verbose = False
4
+
5
+ # Create a handler for the logger (you may need to adapt this based on your needs)
6
+ handler = logging.StreamHandler()
7
+ handler.setLevel(logging.DEBUG)
8
+
9
+ # Create a formatter and set it for the handler
10
+
11
+ formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s")
12
+
13
+ handler.setFormatter(formatter)
14
+
15
+
16
+ def print_verbose(print_statement):
17
+ try:
18
+ if set_verbose:
19
+ print(print_statement) # noqa
20
+ except:
21
+ pass
22
+
23
+
24
+ verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
25
+ verbose_router_logger = logging.getLogger("LiteLLM Router")
26
+ verbose_logger = logging.getLogger("LiteLLM")
27
+
28
+ # Add the handler to the logger
29
+ verbose_router_logger.addHandler(handler)
30
+ verbose_proxy_logger.addHandler(handler)
litellm/_redis.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # +-----------------------------------------------+
2
+ # | |
3
+ # | Give Feedback / Get Help |
4
+ # | https://github.com/BerriAI/litellm/issues/new |
5
+ # | |
6
+ # +-----------------------------------------------+
7
+ #
8
+ # Thank you users! We ❤️ you! - Krrish & Ishaan
9
+
10
+ # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
11
+ import os
12
+ import inspect
13
+ import redis, litellm
14
+ from typing import List, Optional
15
+
16
+
17
+ def _get_redis_kwargs():
18
+ arg_spec = inspect.getfullargspec(redis.Redis)
19
+
20
+ # Only allow primitive arguments
21
+ exclude_args = {
22
+ "self",
23
+ "connection_pool",
24
+ "retry",
25
+ }
26
+
27
+ include_args = ["url"]
28
+
29
+ available_args = [x for x in arg_spec.args if x not in exclude_args] + include_args
30
+
31
+ return available_args
32
+
33
+
34
+ def _get_redis_env_kwarg_mapping():
35
+ PREFIX = "REDIS_"
36
+
37
+ return {f"{PREFIX}{x.upper()}": x for x in _get_redis_kwargs()}
38
+
39
+
40
+ def _redis_kwargs_from_environment():
41
+ mapping = _get_redis_env_kwarg_mapping()
42
+
43
+ return_dict = {}
44
+ for k, v in mapping.items():
45
+ value = litellm.get_secret(k, default_value=None) # check os.environ/key vault
46
+ if value is not None:
47
+ return_dict[v] = value
48
+ return return_dict
49
+
50
+
51
+ def get_redis_url_from_environment():
52
+ if "REDIS_URL" in os.environ:
53
+ return os.environ["REDIS_URL"]
54
+
55
+ if "REDIS_HOST" not in os.environ or "REDIS_PORT" not in os.environ:
56
+ raise ValueError(
57
+ "Either 'REDIS_URL' or both 'REDIS_HOST' and 'REDIS_PORT' must be specified for Redis."
58
+ )
59
+
60
+ if "REDIS_PASSWORD" in os.environ:
61
+ redis_password = f":{os.environ['REDIS_PASSWORD']}@"
62
+ else:
63
+ redis_password = ""
64
+
65
+ return (
66
+ f"redis://{redis_password}{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}"
67
+ )
68
+
69
+
70
+ def get_redis_client(**env_overrides):
71
+ ### check if "os.environ/<key-name>" passed in
72
+ for k, v in env_overrides.items():
73
+ if isinstance(v, str) and v.startswith("os.environ/"):
74
+ v = v.replace("os.environ/", "")
75
+ value = litellm.get_secret(v)
76
+ env_overrides[k] = value
77
+
78
+ redis_kwargs = {
79
+ **_redis_kwargs_from_environment(),
80
+ **env_overrides,
81
+ }
82
+
83
+ if "url" in redis_kwargs and redis_kwargs["url"] is not None:
84
+ redis_kwargs.pop("host", None)
85
+ redis_kwargs.pop("port", None)
86
+ redis_kwargs.pop("db", None)
87
+ redis_kwargs.pop("password", None)
88
+
89
+ return redis.Redis.from_url(**redis_kwargs)
90
+ elif "host" not in redis_kwargs or redis_kwargs["host"] is None:
91
+ raise ValueError("Either 'host' or 'url' must be specified for redis.")
92
+ litellm.print_verbose(f"redis_kwargs: {redis_kwargs}")
93
+ return redis.Redis(**redis_kwargs)
litellm/_version.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import importlib_metadata
2
+
3
+ try:
4
+ version = importlib_metadata.version("litellm")
5
+ except:
6
+ pass
litellm/budget_manager.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, time
2
+ import litellm
3
+ from litellm.utils import ModelResponse
4
+ import requests, threading
5
+ from typing import Optional, Union, Literal
6
+
7
+
8
+ class BudgetManager:
9
+ def __init__(
10
+ self,
11
+ project_name: str,
12
+ client_type: str = "local",
13
+ api_base: Optional[str] = None,
14
+ ):
15
+ self.client_type = client_type
16
+ self.project_name = project_name
17
+ self.api_base = api_base or "https://api.litellm.ai"
18
+ ## load the data or init the initial dictionaries
19
+ self.load_data()
20
+
21
+ def print_verbose(self, print_statement):
22
+ try:
23
+ if litellm.set_verbose:
24
+ import logging
25
+
26
+ logging.info(print_statement)
27
+ except:
28
+ pass
29
+
30
+ def load_data(self):
31
+ if self.client_type == "local":
32
+ # Check if user dict file exists
33
+ if os.path.isfile("user_cost.json"):
34
+ # Load the user dict
35
+ with open("user_cost.json", "r") as json_file:
36
+ self.user_dict = json.load(json_file)
37
+ else:
38
+ self.print_verbose("User Dictionary not found!")
39
+ self.user_dict = {}
40
+ self.print_verbose(f"user dict from local: {self.user_dict}")
41
+ elif self.client_type == "hosted":
42
+ # Load the user_dict from hosted db
43
+ url = self.api_base + "/get_budget"
44
+ headers = {"Content-Type": "application/json"}
45
+ data = {"project_name": self.project_name}
46
+ response = requests.post(url, headers=headers, json=data)
47
+ response = response.json()
48
+ if response["status"] == "error":
49
+ self.user_dict = (
50
+ {}
51
+ ) # assume this means the user dict hasn't been stored yet
52
+ else:
53
+ self.user_dict = response["data"]
54
+
55
+ def create_budget(
56
+ self,
57
+ total_budget: float,
58
+ user: str,
59
+ duration: Optional[Literal["daily", "weekly", "monthly", "yearly"]] = None,
60
+ created_at: float = time.time(),
61
+ ):
62
+ self.user_dict[user] = {"total_budget": total_budget}
63
+ if duration is None:
64
+ return self.user_dict[user]
65
+
66
+ if duration == "daily":
67
+ duration_in_days = 1
68
+ elif duration == "weekly":
69
+ duration_in_days = 7
70
+ elif duration == "monthly":
71
+ duration_in_days = 28
72
+ elif duration == "yearly":
73
+ duration_in_days = 365
74
+ else:
75
+ raise ValueError(
76
+ """duration needs to be one of ["daily", "weekly", "monthly", "yearly"]"""
77
+ )
78
+ self.user_dict[user] = {
79
+ "total_budget": total_budget,
80
+ "duration": duration_in_days,
81
+ "created_at": created_at,
82
+ "last_updated_at": created_at,
83
+ }
84
+ self._save_data_thread() # [Non-Blocking] Update persistent storage without blocking execution
85
+ return self.user_dict[user]
86
+
87
+ def projected_cost(self, model: str, messages: list, user: str):
88
+ text = "".join(message["content"] for message in messages)
89
+ prompt_tokens = litellm.token_counter(model=model, text=text)
90
+ prompt_cost, _ = litellm.cost_per_token(
91
+ model=model, prompt_tokens=prompt_tokens, completion_tokens=0
92
+ )
93
+ current_cost = self.user_dict[user].get("current_cost", 0)
94
+ projected_cost = prompt_cost + current_cost
95
+ return projected_cost
96
+
97
+ def get_total_budget(self, user: str):
98
+ return self.user_dict[user]["total_budget"]
99
+
100
+ def update_cost(
101
+ self,
102
+ user: str,
103
+ completion_obj: Optional[ModelResponse] = None,
104
+ model: Optional[str] = None,
105
+ input_text: Optional[str] = None,
106
+ output_text: Optional[str] = None,
107
+ ):
108
+ if model and input_text and output_text:
109
+ prompt_tokens = litellm.token_counter(
110
+ model=model, messages=[{"role": "user", "content": input_text}]
111
+ )
112
+ completion_tokens = litellm.token_counter(
113
+ model=model, messages=[{"role": "user", "content": output_text}]
114
+ )
115
+ (
116
+ prompt_tokens_cost_usd_dollar,
117
+ completion_tokens_cost_usd_dollar,
118
+ ) = litellm.cost_per_token(
119
+ model=model,
120
+ prompt_tokens=prompt_tokens,
121
+ completion_tokens=completion_tokens,
122
+ )
123
+ cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
124
+ elif completion_obj:
125
+ cost = litellm.completion_cost(completion_response=completion_obj)
126
+ model = completion_obj[
127
+ "model"
128
+ ] # if this throws an error try, model = completion_obj['model']
129
+ else:
130
+ raise ValueError(
131
+ "Either a chat completion object or the text response needs to be passed in. Learn more - https://docs.litellm.ai/docs/budget_manager"
132
+ )
133
+
134
+ self.user_dict[user]["current_cost"] = cost + self.user_dict[user].get(
135
+ "current_cost", 0
136
+ )
137
+ if "model_cost" in self.user_dict[user]:
138
+ self.user_dict[user]["model_cost"][model] = cost + self.user_dict[user][
139
+ "model_cost"
140
+ ].get(model, 0)
141
+ else:
142
+ self.user_dict[user]["model_cost"] = {model: cost}
143
+
144
+ self._save_data_thread() # [Non-Blocking] Update persistent storage without blocking execution
145
+ return {"user": self.user_dict[user]}
146
+
147
+ def get_current_cost(self, user):
148
+ return self.user_dict[user].get("current_cost", 0)
149
+
150
+ def get_model_cost(self, user):
151
+ return self.user_dict[user].get("model_cost", 0)
152
+
153
+ def is_valid_user(self, user: str) -> bool:
154
+ return user in self.user_dict
155
+
156
+ def get_users(self):
157
+ return list(self.user_dict.keys())
158
+
159
+ def reset_cost(self, user):
160
+ self.user_dict[user]["current_cost"] = 0
161
+ self.user_dict[user]["model_cost"] = {}
162
+ return {"user": self.user_dict[user]}
163
+
164
+ def reset_on_duration(self, user: str):
165
+ # Get current and creation time
166
+ last_updated_at = self.user_dict[user]["last_updated_at"]
167
+ current_time = time.time()
168
+
169
+ # Convert duration from days to seconds
170
+ duration_in_seconds = self.user_dict[user]["duration"] * 24 * 60 * 60
171
+
172
+ # Check if duration has elapsed
173
+ if current_time - last_updated_at >= duration_in_seconds:
174
+ # Reset cost if duration has elapsed and update the creation time
175
+ self.reset_cost(user)
176
+ self.user_dict[user]["last_updated_at"] = current_time
177
+ self._save_data_thread() # Save the data
178
+
179
+ def update_budget_all_users(self):
180
+ for user in self.get_users():
181
+ if "duration" in self.user_dict[user]:
182
+ self.reset_on_duration(user)
183
+
184
+ def _save_data_thread(self):
185
+ thread = threading.Thread(
186
+ target=self.save_data
187
+ ) # [Non-Blocking]: saves data without blocking execution
188
+ thread.start()
189
+
190
+ def save_data(self):
191
+ if self.client_type == "local":
192
+ import json
193
+
194
+ # save the user dict
195
+ with open("user_cost.json", "w") as json_file:
196
+ json.dump(
197
+ self.user_dict, json_file, indent=4
198
+ ) # Indent for pretty formatting
199
+ return {"status": "success"}
200
+ elif self.client_type == "hosted":
201
+ url = self.api_base + "/set_budget"
202
+ headers = {"Content-Type": "application/json"}
203
+ data = {"project_name": self.project_name, "user_dict": self.user_dict}
204
+ response = requests.post(url, headers=headers, json=data)
205
+ response = response.json()
206
+ return response
litellm/caching.py ADDED
@@ -0,0 +1,678 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # +-----------------------------------------------+
2
+ # | |
3
+ # | Give Feedback / Get Help |
4
+ # | https://github.com/BerriAI/litellm/issues/new |
5
+ # | |
6
+ # +-----------------------------------------------+
7
+ #
8
+ # Thank you users! We ❤️ you! - Krrish & Ishaan
9
+
10
+ import litellm
11
+ import time, logging
12
+ import json, traceback, ast, hashlib
13
+ from typing import Optional, Literal, List, Union, Any
14
+ from openai._models import BaseModel as OpenAIObject
15
+
16
+
17
+ def print_verbose(print_statement):
18
+ try:
19
+ if litellm.set_verbose:
20
+ print(print_statement) # noqa
21
+ except:
22
+ pass
23
+
24
+
25
+ class BaseCache:
26
+ def set_cache(self, key, value, **kwargs):
27
+ raise NotImplementedError
28
+
29
+ def get_cache(self, key, **kwargs):
30
+ raise NotImplementedError
31
+
32
+
33
+ class InMemoryCache(BaseCache):
34
+ def __init__(self):
35
+ # if users don't provider one, use the default litellm cache
36
+ self.cache_dict = {}
37
+ self.ttl_dict = {}
38
+
39
+ def set_cache(self, key, value, **kwargs):
40
+ self.cache_dict[key] = value
41
+ if "ttl" in kwargs:
42
+ self.ttl_dict[key] = time.time() + kwargs["ttl"]
43
+
44
+ def get_cache(self, key, **kwargs):
45
+ if key in self.cache_dict:
46
+ if key in self.ttl_dict:
47
+ if time.time() > self.ttl_dict[key]:
48
+ self.cache_dict.pop(key, None)
49
+ return None
50
+ original_cached_response = self.cache_dict[key]
51
+ try:
52
+ cached_response = json.loads(original_cached_response)
53
+ except:
54
+ cached_response = original_cached_response
55
+ return cached_response
56
+ return None
57
+
58
+ def flush_cache(self):
59
+ self.cache_dict.clear()
60
+ self.ttl_dict.clear()
61
+
62
+
63
+ class RedisCache(BaseCache):
64
+ def __init__(self, host=None, port=None, password=None, **kwargs):
65
+ import redis
66
+
67
+ # if users don't provider one, use the default litellm cache
68
+ from ._redis import get_redis_client
69
+
70
+ redis_kwargs = {}
71
+ if host is not None:
72
+ redis_kwargs["host"] = host
73
+ if port is not None:
74
+ redis_kwargs["port"] = port
75
+ if password is not None:
76
+ redis_kwargs["password"] = password
77
+
78
+ redis_kwargs.update(kwargs)
79
+
80
+ self.redis_client = get_redis_client(**redis_kwargs)
81
+
82
+ def set_cache(self, key, value, **kwargs):
83
+ ttl = kwargs.get("ttl", None)
84
+ print_verbose(f"Set Redis Cache: key: {key}\nValue {value}")
85
+ try:
86
+ self.redis_client.set(name=key, value=str(value), ex=ttl)
87
+ except Exception as e:
88
+ # NON blocking - notify users Redis is throwing an exception
89
+ logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
90
+
91
+ def get_cache(self, key, **kwargs):
92
+ try:
93
+ print_verbose(f"Get Redis Cache: key: {key}")
94
+ cached_response = self.redis_client.get(key)
95
+ print_verbose(
96
+ f"Got Redis Cache: key: {key}, cached_response {cached_response}"
97
+ )
98
+ if cached_response != None:
99
+ # cached_response is in `b{} convert it to ModelResponse
100
+ cached_response = cached_response.decode(
101
+ "utf-8"
102
+ ) # Convert bytes to string
103
+ try:
104
+ cached_response = json.loads(
105
+ cached_response
106
+ ) # Convert string to dictionary
107
+ except:
108
+ cached_response = ast.literal_eval(cached_response)
109
+ return cached_response
110
+ except Exception as e:
111
+ # NON blocking - notify users Redis is throwing an exception
112
+ traceback.print_exc()
113
+ logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
114
+
115
+ def flush_cache(self):
116
+ self.redis_client.flushall()
117
+
118
+
119
+ class S3Cache(BaseCache):
120
+ def __init__(
121
+ self,
122
+ s3_bucket_name,
123
+ s3_region_name=None,
124
+ s3_api_version=None,
125
+ s3_use_ssl=True,
126
+ s3_verify=None,
127
+ s3_endpoint_url=None,
128
+ s3_aws_access_key_id=None,
129
+ s3_aws_secret_access_key=None,
130
+ s3_aws_session_token=None,
131
+ s3_config=None,
132
+ **kwargs,
133
+ ):
134
+ import boto3
135
+
136
+ self.bucket_name = s3_bucket_name
137
+ # Create an S3 client with custom endpoint URL
138
+ self.s3_client = boto3.client(
139
+ "s3",
140
+ region_name=s3_region_name,
141
+ endpoint_url=s3_endpoint_url,
142
+ api_version=s3_api_version,
143
+ use_ssl=s3_use_ssl,
144
+ verify=s3_verify,
145
+ aws_access_key_id=s3_aws_access_key_id,
146
+ aws_secret_access_key=s3_aws_secret_access_key,
147
+ aws_session_token=s3_aws_session_token,
148
+ config=s3_config,
149
+ **kwargs,
150
+ )
151
+
152
+ def set_cache(self, key, value, **kwargs):
153
+ try:
154
+ print_verbose(f"LiteLLM SET Cache - S3. Key={key}. Value={value}")
155
+ ttl = kwargs.get("ttl", None)
156
+ # Convert value to JSON before storing in S3
157
+ serialized_value = json.dumps(value)
158
+ if ttl is not None:
159
+ cache_control = f"immutable, max-age={ttl}, s-maxage={ttl}"
160
+ import datetime
161
+
162
+ # Calculate expiration time
163
+ expiration_time = datetime.datetime.now() + ttl
164
+
165
+ # Upload the data to S3 with the calculated expiration time
166
+ self.s3_client.put_object(
167
+ Bucket=self.bucket_name,
168
+ Key=key,
169
+ Body=serialized_value,
170
+ Expires=expiration_time,
171
+ CacheControl=cache_control,
172
+ ContentType="application/json",
173
+ ContentLanguage="en",
174
+ ContentDisposition=f"inline; filename=\"{key}.json\""
175
+ )
176
+ else:
177
+ cache_control = "immutable, max-age=31536000, s-maxage=31536000"
178
+ # Upload the data to S3 without specifying Expires
179
+ self.s3_client.put_object(
180
+ Bucket=self.bucket_name,
181
+ Key=key,
182
+ Body=serialized_value,
183
+ CacheControl=cache_control,
184
+ ContentType="application/json",
185
+ ContentLanguage="en",
186
+ ContentDisposition=f"inline; filename=\"{key}.json\""
187
+ )
188
+ except Exception as e:
189
+ # NON blocking - notify users S3 is throwing an exception
190
+ print_verbose(f"S3 Caching: set_cache() - Got exception from S3: {e}")
191
+
192
+ def get_cache(self, key, **kwargs):
193
+ import boto3, botocore
194
+
195
+ try:
196
+ print_verbose(f"Get S3 Cache: key: {key}")
197
+ # Download the data from S3
198
+ cached_response = self.s3_client.get_object(
199
+ Bucket=self.bucket_name, Key=key
200
+ )
201
+
202
+ if cached_response != None:
203
+ # cached_response is in `b{} convert it to ModelResponse
204
+ cached_response = (
205
+ cached_response["Body"].read().decode("utf-8")
206
+ ) # Convert bytes to string
207
+ try:
208
+ cached_response = json.loads(
209
+ cached_response
210
+ ) # Convert string to dictionary
211
+ except Exception as e:
212
+ cached_response = ast.literal_eval(cached_response)
213
+ if type(cached_response) is not dict:
214
+ cached_response = dict(cached_response)
215
+ print_verbose(
216
+ f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
217
+ )
218
+
219
+ return cached_response
220
+ except botocore.exceptions.ClientError as e:
221
+ if e.response["Error"]["Code"] == "NoSuchKey":
222
+ print_verbose(
223
+ f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
224
+ )
225
+ return None
226
+
227
+ except Exception as e:
228
+ # NON blocking - notify users S3 is throwing an exception
229
+ traceback.print_exc()
230
+ print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
231
+
232
+ def flush_cache(self):
233
+ pass
234
+
235
+
236
+ class DualCache(BaseCache):
237
+ """
238
+ This updates both Redis and an in-memory cache simultaneously.
239
+ When data is updated or inserted, it is written to both the in-memory cache + Redis.
240
+ This ensures that even if Redis hasn't been updated yet, the in-memory cache reflects the most recent data.
241
+ """
242
+
243
+ def __init__(
244
+ self,
245
+ in_memory_cache: Optional[InMemoryCache] = None,
246
+ redis_cache: Optional[RedisCache] = None,
247
+ ) -> None:
248
+ super().__init__()
249
+ # If in_memory_cache is not provided, use the default InMemoryCache
250
+ self.in_memory_cache = in_memory_cache or InMemoryCache()
251
+ # If redis_cache is not provided, use the default RedisCache
252
+ self.redis_cache = redis_cache
253
+
254
+ def set_cache(self, key, value, local_only: bool = False, **kwargs):
255
+ # Update both Redis and in-memory cache
256
+ try:
257
+ print_verbose(f"set cache: key: {key}; value: {value}")
258
+ if self.in_memory_cache is not None:
259
+ self.in_memory_cache.set_cache(key, value, **kwargs)
260
+
261
+ if self.redis_cache is not None and local_only == False:
262
+ self.redis_cache.set_cache(key, value, **kwargs)
263
+ except Exception as e:
264
+ print_verbose(e)
265
+
266
+ def get_cache(self, key, local_only: bool = False, **kwargs):
267
+ # Try to fetch from in-memory cache first
268
+ try:
269
+ print_verbose(f"get cache: cache key: {key}; local_only: {local_only}")
270
+ result = None
271
+ if self.in_memory_cache is not None:
272
+ in_memory_result = self.in_memory_cache.get_cache(key, **kwargs)
273
+
274
+ print_verbose(f"in_memory_result: {in_memory_result}")
275
+ if in_memory_result is not None:
276
+ result = in_memory_result
277
+
278
+ if result is None and self.redis_cache is not None and local_only == False:
279
+ # If not found in in-memory cache, try fetching from Redis
280
+ redis_result = self.redis_cache.get_cache(key, **kwargs)
281
+
282
+ if redis_result is not None:
283
+ # Update in-memory cache with the value from Redis
284
+ self.in_memory_cache.set_cache(key, redis_result, **kwargs)
285
+
286
+ result = redis_result
287
+
288
+ print_verbose(f"get cache: cache result: {result}")
289
+ return result
290
+ except Exception as e:
291
+ traceback.print_exc()
292
+
293
+ def flush_cache(self):
294
+ if self.in_memory_cache is not None:
295
+ self.in_memory_cache.flush_cache()
296
+ if self.redis_cache is not None:
297
+ self.redis_cache.flush_cache()
298
+
299
+
300
+ #### LiteLLM.Completion / Embedding Cache ####
301
+ class Cache:
302
+ def __init__(
303
+ self,
304
+ type: Optional[Literal["local", "redis", "s3"]] = "local",
305
+ host: Optional[str] = None,
306
+ port: Optional[str] = None,
307
+ password: Optional[str] = None,
308
+ supported_call_types: Optional[
309
+ List[Literal["completion", "acompletion", "embedding", "aembedding"]]
310
+ ] = ["completion", "acompletion", "embedding", "aembedding"],
311
+ # s3 Bucket, boto3 configuration
312
+ s3_bucket_name: Optional[str] = None,
313
+ s3_region_name: Optional[str] = None,
314
+ s3_api_version: Optional[str] = None,
315
+ s3_use_ssl: Optional[bool] = True,
316
+ s3_verify: Optional[Union[bool, str]] = None,
317
+ s3_endpoint_url: Optional[str] = None,
318
+ s3_aws_access_key_id: Optional[str] = None,
319
+ s3_aws_secret_access_key: Optional[str] = None,
320
+ s3_aws_session_token: Optional[str] = None,
321
+ s3_config: Optional[Any] = None,
322
+ **kwargs,
323
+ ):
324
+ """
325
+ Initializes the cache based on the given type.
326
+
327
+ Args:
328
+ type (str, optional): The type of cache to initialize. Can be "local" or "redis". Defaults to "local".
329
+ host (str, optional): The host address for the Redis cache. Required if type is "redis".
330
+ port (int, optional): The port number for the Redis cache. Required if type is "redis".
331
+ password (str, optional): The password for the Redis cache. Required if type is "redis".
332
+ supported_call_types (list, optional): List of call types to cache for. Defaults to cache == on for all call types.
333
+ **kwargs: Additional keyword arguments for redis.Redis() cache
334
+
335
+ Raises:
336
+ ValueError: If an invalid cache type is provided.
337
+
338
+ Returns:
339
+ None. Cache is set as a litellm param
340
+ """
341
+ if type == "redis":
342
+ self.cache: BaseCache = RedisCache(host, port, password, **kwargs)
343
+ if type == "local":
344
+ self.cache = InMemoryCache()
345
+ if type == "s3":
346
+ self.cache = S3Cache(
347
+ s3_bucket_name=s3_bucket_name,
348
+ s3_region_name=s3_region_name,
349
+ s3_api_version=s3_api_version,
350
+ s3_use_ssl=s3_use_ssl,
351
+ s3_verify=s3_verify,
352
+ s3_endpoint_url=s3_endpoint_url,
353
+ s3_aws_access_key_id=s3_aws_access_key_id,
354
+ s3_aws_secret_access_key=s3_aws_secret_access_key,
355
+ s3_aws_session_token=s3_aws_session_token,
356
+ s3_config=s3_config,
357
+ **kwargs,
358
+ )
359
+ if "cache" not in litellm.input_callback:
360
+ litellm.input_callback.append("cache")
361
+ if "cache" not in litellm.success_callback:
362
+ litellm.success_callback.append("cache")
363
+ if "cache" not in litellm._async_success_callback:
364
+ litellm._async_success_callback.append("cache")
365
+ self.supported_call_types = supported_call_types # default to ["completion", "acompletion", "embedding", "aembedding"]
366
+ self.type = type
367
+
368
+ def get_cache_key(self, *args, **kwargs):
369
+ """
370
+ Get the cache key for the given arguments.
371
+
372
+ Args:
373
+ *args: args to litellm.completion() or embedding()
374
+ **kwargs: kwargs to litellm.completion() or embedding()
375
+
376
+ Returns:
377
+ str: The cache key generated from the arguments, or None if no cache key could be generated.
378
+ """
379
+ cache_key = ""
380
+ print_verbose(f"\nGetting Cache key. Kwargs: {kwargs}")
381
+
382
+ # for streaming, we use preset_cache_key. It's created in wrapper(), we do this because optional params like max_tokens, get transformed for bedrock -> max_new_tokens
383
+ if kwargs.get("litellm_params", {}).get("preset_cache_key", None) is not None:
384
+ print_verbose(f"\nReturning preset cache key: {cache_key}")
385
+ return kwargs.get("litellm_params", {}).get("preset_cache_key", None)
386
+
387
+ # sort kwargs by keys, since model: [gpt-4, temperature: 0.2, max_tokens: 200] == [temperature: 0.2, max_tokens: 200, model: gpt-4]
388
+ completion_kwargs = [
389
+ "model",
390
+ "messages",
391
+ "temperature",
392
+ "top_p",
393
+ "n",
394
+ "stop",
395
+ "max_tokens",
396
+ "presence_penalty",
397
+ "frequency_penalty",
398
+ "logit_bias",
399
+ "user",
400
+ "response_format",
401
+ "seed",
402
+ "tools",
403
+ "tool_choice",
404
+ ]
405
+ embedding_only_kwargs = [
406
+ "input",
407
+ "encoding_format",
408
+ ] # embedding kwargs = model, input, user, encoding_format. Model, user are checked in completion_kwargs
409
+
410
+ # combined_kwargs - NEEDS to be ordered across get_cache_key(). Do not use a set()
411
+ combined_kwargs = completion_kwargs + embedding_only_kwargs
412
+ for param in combined_kwargs:
413
+ # ignore litellm params here
414
+ if param in kwargs:
415
+ # check if param == model and model_group is passed in, then override model with model_group
416
+ if param == "model":
417
+ model_group = None
418
+ caching_group = None
419
+ metadata = kwargs.get("metadata", None)
420
+ litellm_params = kwargs.get("litellm_params", {})
421
+ if metadata is not None:
422
+ model_group = metadata.get("model_group")
423
+ model_group = metadata.get("model_group", None)
424
+ caching_groups = metadata.get("caching_groups", None)
425
+ if caching_groups:
426
+ for group in caching_groups:
427
+ if model_group in group:
428
+ caching_group = group
429
+ break
430
+ if litellm_params is not None:
431
+ metadata = litellm_params.get("metadata", None)
432
+ if metadata is not None:
433
+ model_group = metadata.get("model_group", None)
434
+ caching_groups = metadata.get("caching_groups", None)
435
+ if caching_groups:
436
+ for group in caching_groups:
437
+ if model_group in group:
438
+ caching_group = group
439
+ break
440
+ param_value = (
441
+ caching_group or model_group or kwargs[param]
442
+ ) # use caching_group, if set then model_group if it exists, else use kwargs["model"]
443
+ else:
444
+ if kwargs[param] is None:
445
+ continue # ignore None params
446
+ param_value = kwargs[param]
447
+ cache_key += f"{str(param)}: {str(param_value)}"
448
+ print_verbose(f"\nCreated cache key: {cache_key}")
449
+ # Use hashlib to create a sha256 hash of the cache key
450
+ hash_object = hashlib.sha256(cache_key.encode())
451
+ # Hexadecimal representation of the hash
452
+ hash_hex = hash_object.hexdigest()
453
+ print_verbose(f"Hashed cache key (SHA-256): {hash_hex}")
454
+ return hash_hex
455
+
456
+ def generate_streaming_content(self, content):
457
+ chunk_size = 5 # Adjust the chunk size as needed
458
+ for i in range(0, len(content), chunk_size):
459
+ yield {
460
+ "choices": [
461
+ {
462
+ "delta": {
463
+ "role": "assistant",
464
+ "content": content[i : i + chunk_size],
465
+ }
466
+ }
467
+ ]
468
+ }
469
+ time.sleep(0.02)
470
+
471
+ def get_cache(self, *args, **kwargs):
472
+ """
473
+ Retrieves the cached result for the given arguments.
474
+
475
+ Args:
476
+ *args: args to litellm.completion() or embedding()
477
+ **kwargs: kwargs to litellm.completion() or embedding()
478
+
479
+ Returns:
480
+ The cached result if it exists, otherwise None.
481
+ """
482
+ try: # never block execution
483
+ if "cache_key" in kwargs:
484
+ cache_key = kwargs["cache_key"]
485
+ else:
486
+ cache_key = self.get_cache_key(*args, **kwargs)
487
+ if cache_key is not None:
488
+ cache_control_args = kwargs.get("cache", {})
489
+ max_age = cache_control_args.get(
490
+ "s-max-age", cache_control_args.get("s-maxage", float("inf"))
491
+ )
492
+ cached_result = self.cache.get_cache(cache_key)
493
+ # Check if a timestamp was stored with the cached response
494
+ if (
495
+ cached_result is not None
496
+ and isinstance(cached_result, dict)
497
+ and "timestamp" in cached_result
498
+ and max_age is not None
499
+ ):
500
+ timestamp = cached_result["timestamp"]
501
+ current_time = time.time()
502
+
503
+ # Calculate age of the cached response
504
+ response_age = current_time - timestamp
505
+
506
+ # Check if the cached response is older than the max-age
507
+ if response_age > max_age:
508
+ print_verbose(
509
+ f"Cached response for key {cache_key} is too old. Max-age: {max_age}s, Age: {response_age}s"
510
+ )
511
+ return None # Cached response is too old
512
+
513
+ # If the response is fresh, or there's no max-age requirement, return the cached response
514
+ # cached_response is in `b{} convert it to ModelResponse
515
+ cached_response = cached_result.get("response")
516
+ try:
517
+ if isinstance(cached_response, dict):
518
+ pass
519
+ else:
520
+ cached_response = json.loads(
521
+ cached_response
522
+ ) # Convert string to dictionary
523
+ except:
524
+ cached_response = ast.literal_eval(cached_response)
525
+ return cached_response
526
+ return cached_result
527
+ except Exception as e:
528
+ print_verbose(f"An exception occurred: {traceback.format_exc()}")
529
+ return None
530
+
531
+ def add_cache(self, result, *args, **kwargs):
532
+ """
533
+ Adds a result to the cache.
534
+
535
+ Args:
536
+ *args: args to litellm.completion() or embedding()
537
+ **kwargs: kwargs to litellm.completion() or embedding()
538
+
539
+ Returns:
540
+ None
541
+ """
542
+ try:
543
+ if "cache_key" in kwargs:
544
+ cache_key = kwargs["cache_key"]
545
+ else:
546
+ cache_key = self.get_cache_key(*args, **kwargs)
547
+ if cache_key is not None:
548
+ if isinstance(result, OpenAIObject):
549
+ result = result.model_dump_json()
550
+
551
+ ## Get Cache-Controls ##
552
+ if kwargs.get("cache", None) is not None and isinstance(
553
+ kwargs.get("cache"), dict
554
+ ):
555
+ for k, v in kwargs.get("cache").items():
556
+ if k == "ttl":
557
+ kwargs["ttl"] = v
558
+ cached_data = {"timestamp": time.time(), "response": result}
559
+ self.cache.set_cache(cache_key, cached_data, **kwargs)
560
+ except Exception as e:
561
+ print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
562
+ traceback.print_exc()
563
+ pass
564
+
565
+ async def _async_add_cache(self, result, *args, **kwargs):
566
+ self.add_cache(result, *args, **kwargs)
567
+
568
+
569
+ def enable_cache(
570
+ type: Optional[Literal["local", "redis", "s3"]] = "local",
571
+ host: Optional[str] = None,
572
+ port: Optional[str] = None,
573
+ password: Optional[str] = None,
574
+ supported_call_types: Optional[
575
+ List[Literal["completion", "acompletion", "embedding", "aembedding"]]
576
+ ] = ["completion", "acompletion", "embedding", "aembedding"],
577
+ **kwargs,
578
+ ):
579
+ """
580
+ Enable cache with the specified configuration.
581
+
582
+ Args:
583
+ type (Optional[Literal["local", "redis"]]): The type of cache to enable. Defaults to "local".
584
+ host (Optional[str]): The host address of the cache server. Defaults to None.
585
+ port (Optional[str]): The port number of the cache server. Defaults to None.
586
+ password (Optional[str]): The password for the cache server. Defaults to None.
587
+ supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
588
+ The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
589
+ **kwargs: Additional keyword arguments.
590
+
591
+ Returns:
592
+ None
593
+
594
+ Raises:
595
+ None
596
+ """
597
+ print_verbose("LiteLLM: Enabling Cache")
598
+ if "cache" not in litellm.input_callback:
599
+ litellm.input_callback.append("cache")
600
+ if "cache" not in litellm.success_callback:
601
+ litellm.success_callback.append("cache")
602
+ if "cache" not in litellm._async_success_callback:
603
+ litellm._async_success_callback.append("cache")
604
+
605
+ if litellm.cache == None:
606
+ litellm.cache = Cache(
607
+ type=type,
608
+ host=host,
609
+ port=port,
610
+ password=password,
611
+ supported_call_types=supported_call_types,
612
+ **kwargs,
613
+ )
614
+ print_verbose(f"LiteLLM: Cache enabled, litellm.cache={litellm.cache}")
615
+ print_verbose(f"LiteLLM Cache: {vars(litellm.cache)}")
616
+
617
+
618
+ def update_cache(
619
+ type: Optional[Literal["local", "redis"]] = "local",
620
+ host: Optional[str] = None,
621
+ port: Optional[str] = None,
622
+ password: Optional[str] = None,
623
+ supported_call_types: Optional[
624
+ List[Literal["completion", "acompletion", "embedding", "aembedding"]]
625
+ ] = ["completion", "acompletion", "embedding", "aembedding"],
626
+ **kwargs,
627
+ ):
628
+ """
629
+ Update the cache for LiteLLM.
630
+
631
+ Args:
632
+ type (Optional[Literal["local", "redis"]]): The type of cache. Defaults to "local".
633
+ host (Optional[str]): The host of the cache. Defaults to None.
634
+ port (Optional[str]): The port of the cache. Defaults to None.
635
+ password (Optional[str]): The password for the cache. Defaults to None.
636
+ supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
637
+ The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
638
+ **kwargs: Additional keyword arguments for the cache.
639
+
640
+ Returns:
641
+ None
642
+
643
+ """
644
+ print_verbose("LiteLLM: Updating Cache")
645
+ litellm.cache = Cache(
646
+ type=type,
647
+ host=host,
648
+ port=port,
649
+ password=password,
650
+ supported_call_types=supported_call_types,
651
+ **kwargs,
652
+ )
653
+ print_verbose(f"LiteLLM: Cache Updated, litellm.cache={litellm.cache}")
654
+ print_verbose(f"LiteLLM Cache: {vars(litellm.cache)}")
655
+
656
+
657
+ def disable_cache():
658
+ """
659
+ Disable the cache used by LiteLLM.
660
+
661
+ This function disables the cache used by the LiteLLM module. It removes the cache-related callbacks from the input_callback, success_callback, and _async_success_callback lists. It also sets the litellm.cache attribute to None.
662
+
663
+ Parameters:
664
+ None
665
+
666
+ Returns:
667
+ None
668
+ """
669
+ from contextlib import suppress
670
+
671
+ print_verbose("LiteLLM: Disabling Cache")
672
+ with suppress(ValueError):
673
+ litellm.input_callback.remove("cache")
674
+ litellm.success_callback.remove("cache")
675
+ litellm._async_success_callback.remove("cache")
676
+
677
+ litellm.cache = None
678
+ print_verbose(f"LiteLLM: Cache disabled, litellm.cache={litellm.cache}")
litellm/cost.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "gpt-3.5-turbo-0613": 0.00015000000000000001,
3
+ "claude-2": 0.00016454,
4
+ "gpt-4-0613": 0.015408
5
+ }
litellm/deprecated_litellm_server/.env.template ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
2
+ # AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
3
+
4
+ # OPENAI_API_KEY = ""
5
+
6
+ # HUGGINGFACE_API_KEY=""
7
+
8
+ # TOGETHERAI_API_KEY=""
9
+
10
+ # REPLICATE_API_KEY=""
11
+
12
+ # ## bedrock / sagemaker
13
+ # AWS_ACCESS_KEY_ID = ""
14
+ # AWS_SECRET_ACCESS_KEY = ""
15
+
16
+ # AZURE_API_KEY = ""
17
+ # AZURE_API_BASE = ""
18
+ # AZURE_API_VERSION = ""
19
+
20
+ # ANTHROPIC_API_KEY = ""
21
+
22
+ # COHERE_API_KEY = ""
23
+
24
+ # ## CONFIG FILE ##
25
+ # # CONFIG_FILE_PATH = "" # uncomment to point to config file
26
+
27
+ # ## LOGGING ##
28
+
29
+ # SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
30
+
31
+ # ### LANGFUSE
32
+ # LANGFUSE_PUBLIC_KEY = ""
33
+ # LANGFUSE_SECRET_KEY = ""
34
+ # # Optional, defaults to https://cloud.langfuse.com
35
+ # LANGFUSE_HOST = "" # optional
36
+
37
+
38
+ # ## CACHING ##
39
+
40
+ # ### REDIS
41
+ # REDIS_HOST = ""
42
+ # REDIS_PORT = ""
43
+ # REDIS_PASSWORD = ""
litellm/deprecated_litellm_server/Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # FROM python:3.10
2
+
3
+ # ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
4
+ # COPY . /app
5
+ # WORKDIR /app
6
+ # RUN pip install -r requirements.txt
7
+
8
+ # EXPOSE $PORT
9
+
10
+ # CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10
litellm/deprecated_litellm_server/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # litellm-server [experimental]
2
+
3
+ Deprecated. See litellm/proxy
litellm/deprecated_litellm_server/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # from .main import *
2
+ # from .server_utils import *
litellm/deprecated_litellm_server/main.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os, traceback
2
+ # from fastapi import FastAPI, Request, HTTPException
3
+ # from fastapi.routing import APIRouter
4
+ # from fastapi.responses import StreamingResponse, FileResponse
5
+ # from fastapi.middleware.cors import CORSMiddleware
6
+ # import json, sys
7
+ # from typing import Optional
8
+ # sys.path.insert(
9
+ # 0, os.path.abspath("../")
10
+ # ) # Adds the parent directory to the system path - for litellm local dev
11
+ # import litellm
12
+
13
+ # try:
14
+ # from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
15
+ # except ImportError:
16
+ # from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
17
+ # import dotenv
18
+ # dotenv.load_dotenv() # load env variables
19
+
20
+ # app = FastAPI(docs_url="/", title="LiteLLM API")
21
+ # router = APIRouter()
22
+ # origins = ["*"]
23
+
24
+ # app.add_middleware(
25
+ # CORSMiddleware,
26
+ # allow_origins=origins,
27
+ # allow_credentials=True,
28
+ # allow_methods=["*"],
29
+ # allow_headers=["*"],
30
+ # )
31
+ # #### GLOBAL VARIABLES ####
32
+ # llm_router: Optional[litellm.Router] = None
33
+ # llm_model_list: Optional[list] = None
34
+ # server_settings: Optional[dict] = None
35
+
36
+ # set_callbacks() # sets litellm callbacks for logging if they exist in the environment
37
+
38
+ # if "CONFIG_FILE_PATH" in os.environ:
39
+ # llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
40
+ # else:
41
+ # llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
42
+ # #### API ENDPOINTS ####
43
+ # @router.get("/v1/models")
44
+ # @router.get("/models") # if project requires model list
45
+ # def model_list():
46
+ # all_models = litellm.utils.get_valid_models()
47
+ # if llm_model_list:
48
+ # all_models += llm_model_list
49
+ # return dict(
50
+ # data=[
51
+ # {
52
+ # "id": model,
53
+ # "object": "model",
54
+ # "created": 1677610602,
55
+ # "owned_by": "openai",
56
+ # }
57
+ # for model in all_models
58
+ # ],
59
+ # object="list",
60
+ # )
61
+ # # for streaming
62
+ # def data_generator(response):
63
+
64
+ # for chunk in response:
65
+
66
+ # yield f"data: {json.dumps(chunk)}\n\n"
67
+
68
+ # @router.post("/v1/completions")
69
+ # @router.post("/completions")
70
+ # async def completion(request: Request):
71
+ # data = await request.json()
72
+ # response = litellm.completion(
73
+ # **data
74
+ # )
75
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
76
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
77
+ # return response
78
+
79
+ # @router.post("/v1/embeddings")
80
+ # @router.post("/embeddings")
81
+ # async def embedding(request: Request):
82
+ # try:
83
+ # data = await request.json()
84
+ # # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
85
+ # if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
86
+ # api_key = request.headers.get("authorization")
87
+ # api_key = api_key.replace("Bearer", "").strip() # type: ignore
88
+ # if len(api_key.strip()) > 0:
89
+ # api_key = api_key
90
+ # data["api_key"] = api_key
91
+ # response = litellm.embedding(
92
+ # **data
93
+ # )
94
+ # return response
95
+ # except Exception as e:
96
+ # error_traceback = traceback.format_exc()
97
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
98
+ # return {"error": error_msg}
99
+
100
+ # @router.post("/v1/chat/completions")
101
+ # @router.post("/chat/completions")
102
+ # @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
103
+ # async def chat_completion(request: Request, model: Optional[str] = None):
104
+ # global llm_model_list, server_settings
105
+ # try:
106
+ # data = await request.json()
107
+ # server_model = server_settings.get("completion_model", None) if server_settings else None
108
+ # data["model"] = server_model or model or data["model"]
109
+ # ## CHECK KEYS ##
110
+ # # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
111
+ # # env_validation = litellm.validate_environment(model=data["model"])
112
+ # # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
113
+ # # if "authorization" in request.headers:
114
+ # # api_key = request.headers.get("authorization")
115
+ # # elif "api-key" in request.headers:
116
+ # # api_key = request.headers.get("api-key")
117
+ # # print(f"api_key in headers: {api_key}")
118
+ # # if " " in api_key:
119
+ # # api_key = api_key.split(" ")[1]
120
+ # # print(f"api_key split: {api_key}")
121
+ # # if len(api_key) > 0:
122
+ # # api_key = api_key
123
+ # # data["api_key"] = api_key
124
+ # # print(f"api_key in data: {api_key}")
125
+ # ## CHECK CONFIG ##
126
+ # if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
127
+ # for m in llm_model_list:
128
+ # if data["model"] == m["model_name"]:
129
+ # for key, value in m["litellm_params"].items():
130
+ # data[key] = value
131
+ # break
132
+ # response = litellm.completion(
133
+ # **data
134
+ # )
135
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
136
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
137
+ # return response
138
+ # except Exception as e:
139
+ # error_traceback = traceback.format_exc()
140
+
141
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
142
+ # # return {"error": error_msg}
143
+ # raise HTTPException(status_code=500, detail=error_msg)
144
+
145
+ # @router.post("/router/completions")
146
+ # async def router_completion(request: Request):
147
+ # global llm_router
148
+ # try:
149
+ # data = await request.json()
150
+ # if "model_list" in data:
151
+ # llm_router = litellm.Router(model_list=data.pop("model_list"))
152
+ # if llm_router is None:
153
+ # raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
154
+
155
+ # # openai.ChatCompletion.create replacement
156
+ # response = await llm_router.acompletion(model="gpt-3.5-turbo",
157
+ # messages=[{"role": "user", "content": "Hey, how's it going?"}])
158
+
159
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
160
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
161
+ # return response
162
+ # except Exception as e:
163
+ # error_traceback = traceback.format_exc()
164
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
165
+ # return {"error": error_msg}
166
+
167
+ # @router.post("/router/embedding")
168
+ # async def router_embedding(request: Request):
169
+ # global llm_router
170
+ # try:
171
+ # data = await request.json()
172
+ # if "model_list" in data:
173
+ # llm_router = litellm.Router(model_list=data.pop("model_list"))
174
+ # if llm_router is None:
175
+ # raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
176
+
177
+ # response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
178
+ # messages=[{"role": "user", "content": "Hey, how's it going?"}])
179
+
180
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
181
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
182
+ # return response
183
+ # except Exception as e:
184
+ # error_traceback = traceback.format_exc()
185
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
186
+ # return {"error": error_msg}
187
+
188
+ # @router.get("/")
189
+ # async def home(request: Request):
190
+ # return "LiteLLM: RUNNING"
191
+
192
+
193
+ # app.include_router(router)
litellm/deprecated_litellm_server/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # openai
2
+ # fastapi
3
+ # uvicorn
4
+ # boto3
5
+ # litellm
6
+ # python-dotenv
7
+ # redis
litellm/deprecated_litellm_server/server_utils.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os, litellm
2
+ # import pkg_resources
3
+ # import dotenv
4
+ # dotenv.load_dotenv() # load env variables
5
+
6
+ # def print_verbose(print_statement):
7
+ # pass
8
+
9
+ # def get_package_version(package_name):
10
+ # try:
11
+ # package = pkg_resources.get_distribution(package_name)
12
+ # return package.version
13
+ # except pkg_resources.DistributionNotFound:
14
+ # return None
15
+
16
+ # # Usage example
17
+ # package_name = "litellm"
18
+ # version = get_package_version(package_name)
19
+ # if version:
20
+ # print_verbose(f"The version of {package_name} is {version}")
21
+ # else:
22
+ # print_verbose(f"{package_name} is not installed")
23
+ # import yaml
24
+ # import dotenv
25
+ # from typing import Optional
26
+ # dotenv.load_dotenv() # load env variables
27
+
28
+ # def set_callbacks():
29
+ # ## LOGGING
30
+ # if len(os.getenv("SET_VERBOSE", "")) > 0:
31
+ # if os.getenv("SET_VERBOSE") == "True":
32
+ # litellm.set_verbose = True
33
+ # print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
34
+ # else:
35
+ # litellm.set_verbose = False
36
+
37
+ # ### LANGFUSE
38
+ # if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
39
+ # litellm.success_callback = ["langfuse"]
40
+ # print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
41
+
42
+ # ## CACHING
43
+ # ### REDIS
44
+ # # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
45
+ # # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
46
+ # # from litellm.caching import Cache
47
+ # # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
48
+ # # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
49
+
50
+
51
+ # def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
52
+ # config = {}
53
+ # server_settings = {}
54
+ # try:
55
+ # if os.path.exists(config_file_path): # type: ignore
56
+ # with open(config_file_path, 'r') as file: # type: ignore
57
+ # config = yaml.safe_load(file)
58
+ # else:
59
+ # pass
60
+ # except:
61
+ # pass
62
+
63
+ # ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
64
+ # server_settings = config.get("server_settings", None)
65
+ # if server_settings:
66
+ # server_settings = server_settings
67
+
68
+ # ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
69
+ # litellm_settings = config.get('litellm_settings', None)
70
+ # if litellm_settings:
71
+ # for key, value in litellm_settings.items():
72
+ # setattr(litellm, key, value)
73
+
74
+ # ## MODEL LIST
75
+ # model_list = config.get('model_list', None)
76
+ # if model_list:
77
+ # router = litellm.Router(model_list=model_list)
78
+
79
+ # ## ENVIRONMENT VARIABLES
80
+ # environment_variables = config.get('environment_variables', None)
81
+ # if environment_variables:
82
+ # for key, value in environment_variables.items():
83
+ # os.environ[key] = value
84
+
85
+ # return router, model_list, server_settings
litellm/exceptions.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # +-----------------------------------------------+
2
+ # | |
3
+ # | Give Feedback / Get Help |
4
+ # | https://github.com/BerriAI/litellm/issues/new |
5
+ # | |
6
+ # +-----------------------------------------------+
7
+ #
8
+ # Thank you users! We ❤️ you! - Krrish & Ishaan
9
+
10
+ ## LiteLLM versions of the OpenAI Exception Types
11
+
12
+ from openai import (
13
+ AuthenticationError,
14
+ BadRequestError,
15
+ NotFoundError,
16
+ RateLimitError,
17
+ APIStatusError,
18
+ OpenAIError,
19
+ APIError,
20
+ APITimeoutError,
21
+ APIConnectionError,
22
+ APIResponseValidationError,
23
+ UnprocessableEntityError,
24
+ )
25
+ import httpx
26
+
27
+
28
+ class AuthenticationError(AuthenticationError): # type: ignore
29
+ def __init__(self, message, llm_provider, model, response: httpx.Response):
30
+ self.status_code = 401
31
+ self.message = message
32
+ self.llm_provider = llm_provider
33
+ self.model = model
34
+ super().__init__(
35
+ self.message, response=response, body=None
36
+ ) # Call the base class constructor with the parameters it needs
37
+
38
+
39
+ # raise when invalid models passed, example gpt-8
40
+ class NotFoundError(NotFoundError): # type: ignore
41
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
42
+ self.status_code = 404
43
+ self.message = message
44
+ self.model = model
45
+ self.llm_provider = llm_provider
46
+ super().__init__(
47
+ self.message, response=response, body=None
48
+ ) # Call the base class constructor with the parameters it needs
49
+
50
+
51
+ class BadRequestError(BadRequestError): # type: ignore
52
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
53
+ self.status_code = 400
54
+ self.message = message
55
+ self.model = model
56
+ self.llm_provider = llm_provider
57
+ super().__init__(
58
+ self.message, response=response, body=None
59
+ ) # Call the base class constructor with the parameters it needs
60
+
61
+
62
+ class UnprocessableEntityError(UnprocessableEntityError): # type: ignore
63
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
64
+ self.status_code = 422
65
+ self.message = message
66
+ self.model = model
67
+ self.llm_provider = llm_provider
68
+ super().__init__(
69
+ self.message, response=response, body=None
70
+ ) # Call the base class constructor with the parameters it needs
71
+
72
+
73
+ class Timeout(APITimeoutError): # type: ignore
74
+ def __init__(self, message, model, llm_provider):
75
+ self.status_code = 408
76
+ self.message = message
77
+ self.model = model
78
+ self.llm_provider = llm_provider
79
+ request = httpx.Request(method="POST", url="https://api.openai.com/v1")
80
+ super().__init__(
81
+ request=request
82
+ ) # Call the base class constructor with the parameters it needs
83
+
84
+
85
+ class RateLimitError(RateLimitError): # type: ignore
86
+ def __init__(self, message, llm_provider, model, response: httpx.Response):
87
+ self.status_code = 429
88
+ self.message = message
89
+ self.llm_provider = llm_provider
90
+ self.modle = model
91
+ super().__init__(
92
+ self.message, response=response, body=None
93
+ ) # Call the base class constructor with the parameters it needs
94
+
95
+
96
+ # sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
97
+ class ContextWindowExceededError(BadRequestError): # type: ignore
98
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
99
+ self.status_code = 400
100
+ self.message = message
101
+ self.model = model
102
+ self.llm_provider = llm_provider
103
+ super().__init__(
104
+ message=self.message,
105
+ model=self.model, # type: ignore
106
+ llm_provider=self.llm_provider, # type: ignore
107
+ response=response,
108
+ ) # Call the base class constructor with the parameters it needs
109
+
110
+
111
+ class ContentPolicyViolationError(BadRequestError): # type: ignore
112
+ # Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
113
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
114
+ self.status_code = 400
115
+ self.message = message
116
+ self.model = model
117
+ self.llm_provider = llm_provider
118
+ super().__init__(
119
+ message=self.message,
120
+ model=self.model, # type: ignore
121
+ llm_provider=self.llm_provider, # type: ignore
122
+ response=response,
123
+ ) # Call the base class constructor with the parameters it needs
124
+
125
+
126
+ class ServiceUnavailableError(APIStatusError): # type: ignore
127
+ def __init__(self, message, llm_provider, model, response: httpx.Response):
128
+ self.status_code = 503
129
+ self.message = message
130
+ self.llm_provider = llm_provider
131
+ self.model = model
132
+ super().__init__(
133
+ self.message, response=response, body=None
134
+ ) # Call the base class constructor with the parameters it needs
135
+
136
+
137
+ # raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
138
+ class APIError(APIError): # type: ignore
139
+ def __init__(
140
+ self, status_code, message, llm_provider, model, request: httpx.Request
141
+ ):
142
+ self.status_code = status_code
143
+ self.message = message
144
+ self.llm_provider = llm_provider
145
+ self.model = model
146
+ super().__init__(self.message, request=request, body=None) # type: ignore
147
+
148
+
149
+ # raised if an invalid request (not get, delete, put, post) is made
150
+ class APIConnectionError(APIConnectionError): # type: ignore
151
+ def __init__(self, message, llm_provider, model, request: httpx.Request):
152
+ self.message = message
153
+ self.llm_provider = llm_provider
154
+ self.model = model
155
+ self.status_code = 500
156
+ super().__init__(message=self.message, request=request)
157
+
158
+
159
+ # raised if an invalid request (not get, delete, put, post) is made
160
+ class APIResponseValidationError(APIResponseValidationError): # type: ignore
161
+ def __init__(self, message, llm_provider, model):
162
+ self.message = message
163
+ self.llm_provider = llm_provider
164
+ self.model = model
165
+ request = httpx.Request(method="POST", url="https://api.openai.com/v1")
166
+ response = httpx.Response(status_code=500, request=request)
167
+ super().__init__(response=response, body=None, message=message)
168
+
169
+
170
+ class OpenAIError(OpenAIError): # type: ignore
171
+ def __init__(self, original_exception):
172
+ self.status_code = original_exception.http_status
173
+ super().__init__(
174
+ http_body=original_exception.http_body,
175
+ http_status=original_exception.http_status,
176
+ json_body=original_exception.json_body,
177
+ headers=original_exception.headers,
178
+ code=original_exception.code,
179
+ )
180
+ self.llm_provider = "openai"
181
+
182
+
183
+ class BudgetExceededError(Exception):
184
+ def __init__(self, current_cost, max_budget):
185
+ self.current_cost = current_cost
186
+ self.max_budget = max_budget
187
+ message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
188
+ super().__init__(message)
189
+
190
+
191
+ ## DEPRECATED ##
192
+ class InvalidRequestError(BadRequestError): # type: ignore
193
+ def __init__(self, message, model, llm_provider):
194
+ self.status_code = 400
195
+ self.message = message
196
+ self.model = model
197
+ self.llm_provider = llm_provider
198
+ super().__init__(
199
+ self.message, f"{self.model}"
200
+ ) # Call the base class constructor with the parameters it needs
litellm/integrations/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import *
litellm/integrations/aispend.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to aispend.io
3
+ import dotenv, os
4
+ import requests
5
+
6
+ dotenv.load_dotenv() # Loading env variables using dotenv
7
+ import traceback
8
+ import datetime
9
+
10
+ model_cost = {
11
+ "gpt-3.5-turbo": {
12
+ "max_tokens": 4000,
13
+ "input_cost_per_token": 0.0000015,
14
+ "output_cost_per_token": 0.000002,
15
+ },
16
+ "gpt-35-turbo": {
17
+ "max_tokens": 4000,
18
+ "input_cost_per_token": 0.0000015,
19
+ "output_cost_per_token": 0.000002,
20
+ }, # azure model name
21
+ "gpt-3.5-turbo-0613": {
22
+ "max_tokens": 4000,
23
+ "input_cost_per_token": 0.0000015,
24
+ "output_cost_per_token": 0.000002,
25
+ },
26
+ "gpt-3.5-turbo-0301": {
27
+ "max_tokens": 4000,
28
+ "input_cost_per_token": 0.0000015,
29
+ "output_cost_per_token": 0.000002,
30
+ },
31
+ "gpt-3.5-turbo-16k": {
32
+ "max_tokens": 16000,
33
+ "input_cost_per_token": 0.000003,
34
+ "output_cost_per_token": 0.000004,
35
+ },
36
+ "gpt-35-turbo-16k": {
37
+ "max_tokens": 16000,
38
+ "input_cost_per_token": 0.000003,
39
+ "output_cost_per_token": 0.000004,
40
+ }, # azure model name
41
+ "gpt-3.5-turbo-16k-0613": {
42
+ "max_tokens": 16000,
43
+ "input_cost_per_token": 0.000003,
44
+ "output_cost_per_token": 0.000004,
45
+ },
46
+ "gpt-4": {
47
+ "max_tokens": 8000,
48
+ "input_cost_per_token": 0.000003,
49
+ "output_cost_per_token": 0.00006,
50
+ },
51
+ "gpt-4-0613": {
52
+ "max_tokens": 8000,
53
+ "input_cost_per_token": 0.000003,
54
+ "output_cost_per_token": 0.00006,
55
+ },
56
+ "gpt-4-32k": {
57
+ "max_tokens": 8000,
58
+ "input_cost_per_token": 0.00006,
59
+ "output_cost_per_token": 0.00012,
60
+ },
61
+ "claude-instant-1": {
62
+ "max_tokens": 100000,
63
+ "input_cost_per_token": 0.00000163,
64
+ "output_cost_per_token": 0.00000551,
65
+ },
66
+ "claude-2": {
67
+ "max_tokens": 100000,
68
+ "input_cost_per_token": 0.00001102,
69
+ "output_cost_per_token": 0.00003268,
70
+ },
71
+ "text-bison-001": {
72
+ "max_tokens": 8192,
73
+ "input_cost_per_token": 0.000004,
74
+ "output_cost_per_token": 0.000004,
75
+ },
76
+ "chat-bison-001": {
77
+ "max_tokens": 4096,
78
+ "input_cost_per_token": 0.000002,
79
+ "output_cost_per_token": 0.000002,
80
+ },
81
+ "command-nightly": {
82
+ "max_tokens": 4096,
83
+ "input_cost_per_token": 0.000015,
84
+ "output_cost_per_token": 0.000015,
85
+ },
86
+ }
87
+
88
+
89
+ class AISpendLogger:
90
+ # Class variables or attributes
91
+ def __init__(self):
92
+ # Instance variables
93
+ self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
94
+ self.api_key = os.getenv("AISPEND_API_KEY")
95
+
96
+ def price_calculator(self, model, response_obj, start_time, end_time):
97
+ # try and find if the model is in the model_cost map
98
+ # else default to the average of the costs
99
+ prompt_tokens_cost_usd_dollar = 0
100
+ completion_tokens_cost_usd_dollar = 0
101
+ if model in model_cost:
102
+ prompt_tokens_cost_usd_dollar = (
103
+ model_cost[model]["input_cost_per_token"]
104
+ * response_obj["usage"]["prompt_tokens"]
105
+ )
106
+ completion_tokens_cost_usd_dollar = (
107
+ model_cost[model]["output_cost_per_token"]
108
+ * response_obj["usage"]["completion_tokens"]
109
+ )
110
+ elif "replicate" in model:
111
+ # replicate models are charged based on time
112
+ # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
113
+ model_run_time = end_time - start_time # assuming time in seconds
114
+ cost_usd_dollar = model_run_time * 0.0032
115
+ prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
116
+ completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
117
+ else:
118
+ # calculate average input cost
119
+ input_cost_sum = 0
120
+ output_cost_sum = 0
121
+ for model in model_cost:
122
+ input_cost_sum += model_cost[model]["input_cost_per_token"]
123
+ output_cost_sum += model_cost[model]["output_cost_per_token"]
124
+ avg_input_cost = input_cost_sum / len(model_cost.keys())
125
+ avg_output_cost = output_cost_sum / len(model_cost.keys())
126
+ prompt_tokens_cost_usd_dollar = (
127
+ model_cost[model]["input_cost_per_token"]
128
+ * response_obj["usage"]["prompt_tokens"]
129
+ )
130
+ completion_tokens_cost_usd_dollar = (
131
+ model_cost[model]["output_cost_per_token"]
132
+ * response_obj["usage"]["completion_tokens"]
133
+ )
134
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
135
+
136
+ def log_event(self, model, response_obj, start_time, end_time, print_verbose):
137
+ # Method definition
138
+ try:
139
+ print_verbose(
140
+ f"AISpend Logging - Enters logging function for model {model}"
141
+ )
142
+
143
+ url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
144
+ headers = {
145
+ "Authorization": f"Bearer {self.api_key}",
146
+ "Content-Type": "application/json",
147
+ }
148
+
149
+ response_timestamp = datetime.datetime.fromtimestamp(
150
+ int(response_obj["created"])
151
+ ).strftime("%Y-%m-%d")
152
+
153
+ (
154
+ prompt_tokens_cost_usd_dollar,
155
+ completion_tokens_cost_usd_dollar,
156
+ ) = self.price_calculator(model, response_obj, start_time, end_time)
157
+ prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
158
+ completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
159
+ data = [
160
+ {
161
+ "requests": 1,
162
+ "requests_context": 1,
163
+ "context_tokens": response_obj["usage"]["prompt_tokens"],
164
+ "requests_generated": 1,
165
+ "generated_tokens": response_obj["usage"]["completion_tokens"],
166
+ "recorded_date": response_timestamp,
167
+ "model_id": response_obj["model"],
168
+ "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
169
+ "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
170
+ }
171
+ ]
172
+
173
+ print_verbose(f"AISpend Logging - final data object: {data}")
174
+ except:
175
+ # traceback.print_exc()
176
+ print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
177
+ pass
litellm/integrations/berrispend.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to aispend.io
3
+ import dotenv, os
4
+ import requests
5
+
6
+ dotenv.load_dotenv() # Loading env variables using dotenv
7
+ import traceback
8
+ import datetime
9
+
10
+ model_cost = {
11
+ "gpt-3.5-turbo": {
12
+ "max_tokens": 4000,
13
+ "input_cost_per_token": 0.0000015,
14
+ "output_cost_per_token": 0.000002,
15
+ },
16
+ "gpt-35-turbo": {
17
+ "max_tokens": 4000,
18
+ "input_cost_per_token": 0.0000015,
19
+ "output_cost_per_token": 0.000002,
20
+ }, # azure model name
21
+ "gpt-3.5-turbo-0613": {
22
+ "max_tokens": 4000,
23
+ "input_cost_per_token": 0.0000015,
24
+ "output_cost_per_token": 0.000002,
25
+ },
26
+ "gpt-3.5-turbo-0301": {
27
+ "max_tokens": 4000,
28
+ "input_cost_per_token": 0.0000015,
29
+ "output_cost_per_token": 0.000002,
30
+ },
31
+ "gpt-3.5-turbo-16k": {
32
+ "max_tokens": 16000,
33
+ "input_cost_per_token": 0.000003,
34
+ "output_cost_per_token": 0.000004,
35
+ },
36
+ "gpt-35-turbo-16k": {
37
+ "max_tokens": 16000,
38
+ "input_cost_per_token": 0.000003,
39
+ "output_cost_per_token": 0.000004,
40
+ }, # azure model name
41
+ "gpt-3.5-turbo-16k-0613": {
42
+ "max_tokens": 16000,
43
+ "input_cost_per_token": 0.000003,
44
+ "output_cost_per_token": 0.000004,
45
+ },
46
+ "gpt-4": {
47
+ "max_tokens": 8000,
48
+ "input_cost_per_token": 0.000003,
49
+ "output_cost_per_token": 0.00006,
50
+ },
51
+ "gpt-4-0613": {
52
+ "max_tokens": 8000,
53
+ "input_cost_per_token": 0.000003,
54
+ "output_cost_per_token": 0.00006,
55
+ },
56
+ "gpt-4-32k": {
57
+ "max_tokens": 8000,
58
+ "input_cost_per_token": 0.00006,
59
+ "output_cost_per_token": 0.00012,
60
+ },
61
+ "claude-instant-1": {
62
+ "max_tokens": 100000,
63
+ "input_cost_per_token": 0.00000163,
64
+ "output_cost_per_token": 0.00000551,
65
+ },
66
+ "claude-2": {
67
+ "max_tokens": 100000,
68
+ "input_cost_per_token": 0.00001102,
69
+ "output_cost_per_token": 0.00003268,
70
+ },
71
+ "text-bison-001": {
72
+ "max_tokens": 8192,
73
+ "input_cost_per_token": 0.000004,
74
+ "output_cost_per_token": 0.000004,
75
+ },
76
+ "chat-bison-001": {
77
+ "max_tokens": 4096,
78
+ "input_cost_per_token": 0.000002,
79
+ "output_cost_per_token": 0.000002,
80
+ },
81
+ "command-nightly": {
82
+ "max_tokens": 4096,
83
+ "input_cost_per_token": 0.000015,
84
+ "output_cost_per_token": 0.000015,
85
+ },
86
+ }
87
+
88
+
89
+ class BerriSpendLogger:
90
+ # Class variables or attributes
91
+ def __init__(self):
92
+ # Instance variables
93
+ self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
94
+
95
+ def price_calculator(self, model, response_obj, start_time, end_time):
96
+ # try and find if the model is in the model_cost map
97
+ # else default to the average of the costs
98
+ prompt_tokens_cost_usd_dollar = 0
99
+ completion_tokens_cost_usd_dollar = 0
100
+ if model in model_cost:
101
+ prompt_tokens_cost_usd_dollar = (
102
+ model_cost[model]["input_cost_per_token"]
103
+ * response_obj["usage"]["prompt_tokens"]
104
+ )
105
+ completion_tokens_cost_usd_dollar = (
106
+ model_cost[model]["output_cost_per_token"]
107
+ * response_obj["usage"]["completion_tokens"]
108
+ )
109
+ elif "replicate" in model:
110
+ # replicate models are charged based on time
111
+ # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
112
+ model_run_time = end_time - start_time # assuming time in seconds
113
+ cost_usd_dollar = model_run_time * 0.0032
114
+ prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
115
+ completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
116
+ else:
117
+ # calculate average input cost
118
+ input_cost_sum = 0
119
+ output_cost_sum = 0
120
+ for model in model_cost:
121
+ input_cost_sum += model_cost[model]["input_cost_per_token"]
122
+ output_cost_sum += model_cost[model]["output_cost_per_token"]
123
+ avg_input_cost = input_cost_sum / len(model_cost.keys())
124
+ avg_output_cost = output_cost_sum / len(model_cost.keys())
125
+ prompt_tokens_cost_usd_dollar = (
126
+ model_cost[model]["input_cost_per_token"]
127
+ * response_obj["usage"]["prompt_tokens"]
128
+ )
129
+ completion_tokens_cost_usd_dollar = (
130
+ model_cost[model]["output_cost_per_token"]
131
+ * response_obj["usage"]["completion_tokens"]
132
+ )
133
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
134
+
135
+ def log_event(
136
+ self, model, messages, response_obj, start_time, end_time, print_verbose
137
+ ):
138
+ # Method definition
139
+ try:
140
+ print_verbose(
141
+ f"BerriSpend Logging - Enters logging function for model {model}"
142
+ )
143
+
144
+ url = f"https://berrispend.berri.ai/spend"
145
+ headers = {"Content-Type": "application/json"}
146
+
147
+ (
148
+ prompt_tokens_cost_usd_dollar,
149
+ completion_tokens_cost_usd_dollar,
150
+ ) = self.price_calculator(model, response_obj, start_time, end_time)
151
+ total_cost = (
152
+ prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
153
+ )
154
+
155
+ response_time = (end_time - start_time).total_seconds()
156
+ if "response" in response_obj:
157
+ data = [
158
+ {
159
+ "response_time": response_time,
160
+ "model_id": response_obj["model"],
161
+ "total_cost": total_cost,
162
+ "messages": messages,
163
+ "response": response_obj["choices"][0]["message"]["content"],
164
+ "account_id": self.account_id,
165
+ }
166
+ ]
167
+ elif "error" in response_obj:
168
+ data = [
169
+ {
170
+ "response_time": response_time,
171
+ "model_id": response_obj["model"],
172
+ "total_cost": total_cost,
173
+ "messages": messages,
174
+ "error": response_obj["error"],
175
+ "account_id": self.account_id,
176
+ }
177
+ ]
178
+
179
+ print_verbose(f"BerriSpend Logging - final data object: {data}")
180
+ response = requests.post(url, headers=headers, json=data)
181
+ except:
182
+ # traceback.print_exc()
183
+ print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
184
+ pass
litellm/integrations/custom_logger.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Promptlayer
3
+ import dotenv, os
4
+ import requests
5
+ from litellm.proxy._types import UserAPIKeyAuth
6
+ from litellm.caching import DualCache
7
+ from typing import Literal
8
+
9
+ dotenv.load_dotenv() # Loading env variables using dotenv
10
+ import traceback
11
+
12
+
13
+ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
14
+ # Class variables or attributes
15
+ def __init__(self):
16
+ pass
17
+
18
+ def log_pre_api_call(self, model, messages, kwargs):
19
+ pass
20
+
21
+ def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
22
+ pass
23
+
24
+ def log_stream_event(self, kwargs, response_obj, start_time, end_time):
25
+ pass
26
+
27
+ def log_success_event(self, kwargs, response_obj, start_time, end_time):
28
+ pass
29
+
30
+ def log_failure_event(self, kwargs, response_obj, start_time, end_time):
31
+ pass
32
+
33
+ #### ASYNC ####
34
+
35
+ async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
36
+ pass
37
+
38
+ async def async_log_pre_api_call(self, model, messages, kwargs):
39
+ pass
40
+
41
+ async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
42
+ pass
43
+
44
+ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
45
+ pass
46
+
47
+ #### CALL HOOKS - proxy only ####
48
+ """
49
+ Control the modify incoming / outgoung data before calling the model
50
+ """
51
+
52
+ async def async_pre_call_hook(
53
+ self,
54
+ user_api_key_dict: UserAPIKeyAuth,
55
+ cache: DualCache,
56
+ data: dict,
57
+ call_type: Literal["completion", "embeddings"],
58
+ ):
59
+ pass
60
+
61
+ async def async_post_call_failure_hook(
62
+ self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
63
+ ):
64
+ pass
65
+
66
+ #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
67
+
68
+ def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
69
+ try:
70
+ kwargs["model"] = model
71
+ kwargs["messages"] = messages
72
+ kwargs["log_event_type"] = "pre_api_call"
73
+ callback_func(
74
+ kwargs,
75
+ )
76
+ print_verbose(f"Custom Logger - model call details: {kwargs}")
77
+ except:
78
+ traceback.print_exc()
79
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
80
+
81
+ async def async_log_input_event(
82
+ self, model, messages, kwargs, print_verbose, callback_func
83
+ ):
84
+ try:
85
+ kwargs["model"] = model
86
+ kwargs["messages"] = messages
87
+ kwargs["log_event_type"] = "pre_api_call"
88
+ await callback_func(
89
+ kwargs,
90
+ )
91
+ print_verbose(f"Custom Logger - model call details: {kwargs}")
92
+ except:
93
+ traceback.print_exc()
94
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
95
+
96
+ def log_event(
97
+ self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func
98
+ ):
99
+ # Method definition
100
+ try:
101
+ kwargs["log_event_type"] = "post_api_call"
102
+ callback_func(
103
+ kwargs, # kwargs to func
104
+ response_obj,
105
+ start_time,
106
+ end_time,
107
+ )
108
+ print_verbose(f"Custom Logger - final response object: {response_obj}")
109
+ except:
110
+ # traceback.print_exc()
111
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
112
+ pass
113
+
114
+ async def async_log_event(
115
+ self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func
116
+ ):
117
+ # Method definition
118
+ try:
119
+ kwargs["log_event_type"] = "post_api_call"
120
+ await callback_func(
121
+ kwargs, # kwargs to func
122
+ response_obj,
123
+ start_time,
124
+ end_time,
125
+ )
126
+ print_verbose(f"Custom Logger - final response object: {response_obj}")
127
+ except:
128
+ # traceback.print_exc()
129
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
130
+ pass
litellm/integrations/dynamodb.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to Supabase
3
+
4
+ import dotenv, os
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+ import datetime, subprocess, sys
10
+ import litellm, uuid
11
+ from litellm._logging import print_verbose
12
+
13
+
14
+ class DyanmoDBLogger:
15
+ # Class variables or attributes
16
+
17
+ def __init__(self):
18
+ # Instance variables
19
+ import boto3
20
+
21
+ self.dynamodb = boto3.resource(
22
+ "dynamodb", region_name=os.environ["AWS_REGION_NAME"]
23
+ )
24
+ if litellm.dynamodb_table_name is None:
25
+ raise ValueError(
26
+ "LiteLLM Error, trying to use DynamoDB but not table name passed. Create a table and set `litellm.dynamodb_table_name=<your-table>`"
27
+ )
28
+ self.table_name = litellm.dynamodb_table_name
29
+
30
+ async def _async_log_event(
31
+ self, kwargs, response_obj, start_time, end_time, print_verbose
32
+ ):
33
+ self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
34
+
35
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
36
+ try:
37
+ print_verbose(
38
+ f"DynamoDB Logging - Enters logging function for model {kwargs}"
39
+ )
40
+
41
+ # construct payload to send to DynamoDB
42
+ # follows the same params as langfuse.py
43
+ litellm_params = kwargs.get("litellm_params", {})
44
+ metadata = (
45
+ litellm_params.get("metadata", {}) or {}
46
+ ) # if litellm_params['metadata'] == None
47
+ messages = kwargs.get("messages")
48
+ optional_params = kwargs.get("optional_params", {})
49
+ call_type = kwargs.get("call_type", "litellm.completion")
50
+ usage = response_obj["usage"]
51
+ id = response_obj.get("id", str(uuid.uuid4()))
52
+
53
+ # Build the initial payload
54
+ payload = {
55
+ "id": id,
56
+ "call_type": call_type,
57
+ "startTime": start_time,
58
+ "endTime": end_time,
59
+ "model": kwargs.get("model", ""),
60
+ "user": kwargs.get("user", ""),
61
+ "modelParameters": optional_params,
62
+ "messages": messages,
63
+ "response": response_obj,
64
+ "usage": usage,
65
+ "metadata": metadata,
66
+ }
67
+
68
+ # Ensure everything in the payload is converted to str
69
+ for key, value in payload.items():
70
+ try:
71
+ payload[key] = str(value)
72
+ except:
73
+ # non blocking if it can't cast to a str
74
+ pass
75
+
76
+ print_verbose(f"\nDynamoDB Logger - Logging payload = {payload}")
77
+
78
+ # put data in dyanmo DB
79
+ table = self.dynamodb.Table(self.table_name)
80
+ # Assuming log_data is a dictionary with log information
81
+ response = table.put_item(Item=payload)
82
+
83
+ print_verbose(f"Response from DynamoDB:{str(response)}")
84
+
85
+ print_verbose(
86
+ f"DynamoDB Layer Logging - final response object: {response_obj}"
87
+ )
88
+ return response
89
+ except:
90
+ traceback.print_exc()
91
+ print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
92
+ pass
litellm/integrations/helicone.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Helicone
3
+ import dotenv, os
4
+ import requests
5
+
6
+ dotenv.load_dotenv() # Loading env variables using dotenv
7
+ import traceback
8
+
9
+
10
+ class HeliconeLogger:
11
+ # Class variables or attributes
12
+ helicone_model_list = ["gpt", "claude"]
13
+
14
+ def __init__(self):
15
+ # Instance variables
16
+ self.provider_url = "https://api.openai.com/v1"
17
+ self.key = os.getenv("HELICONE_API_KEY")
18
+
19
+ def claude_mapping(self, model, messages, response_obj):
20
+ from anthropic import HUMAN_PROMPT, AI_PROMPT
21
+
22
+ prompt = f"{HUMAN_PROMPT}"
23
+ for message in messages:
24
+ if "role" in message:
25
+ if message["role"] == "user":
26
+ prompt += f"{HUMAN_PROMPT}{message['content']}"
27
+ else:
28
+ prompt += f"{AI_PROMPT}{message['content']}"
29
+ else:
30
+ prompt += f"{HUMAN_PROMPT}{message['content']}"
31
+ prompt += f"{AI_PROMPT}"
32
+ claude_provider_request = {"model": model, "prompt": prompt}
33
+
34
+ claude_response_obj = {
35
+ "completion": response_obj["choices"][0]["message"]["content"],
36
+ "model": model,
37
+ "stop_reason": "stop_sequence",
38
+ }
39
+
40
+ return claude_provider_request, claude_response_obj
41
+
42
+ def log_success(
43
+ self, model, messages, response_obj, start_time, end_time, print_verbose
44
+ ):
45
+ # Method definition
46
+ try:
47
+ print_verbose(
48
+ f"Helicone Logging - Enters logging function for model {model}"
49
+ )
50
+ model = (
51
+ model
52
+ if any(
53
+ accepted_model in model
54
+ for accepted_model in self.helicone_model_list
55
+ )
56
+ else "gpt-3.5-turbo"
57
+ )
58
+ provider_request = {"model": model, "messages": messages}
59
+
60
+ if "claude" in model:
61
+ provider_request, response_obj = self.claude_mapping(
62
+ model=model, messages=messages, response_obj=response_obj
63
+ )
64
+
65
+ providerResponse = {
66
+ "json": response_obj,
67
+ "headers": {"openai-version": "2020-10-01"},
68
+ "status": 200,
69
+ }
70
+
71
+ # Code to be executed
72
+ url = "https://api.hconeai.com/oai/v1/log"
73
+ headers = {
74
+ "Authorization": f"Bearer {self.key}",
75
+ "Content-Type": "application/json",
76
+ }
77
+ start_time_seconds = int(start_time.timestamp())
78
+ start_time_milliseconds = int(
79
+ (start_time.timestamp() - start_time_seconds) * 1000
80
+ )
81
+ end_time_seconds = int(end_time.timestamp())
82
+ end_time_milliseconds = int(
83
+ (end_time.timestamp() - end_time_seconds) * 1000
84
+ )
85
+ data = {
86
+ "providerRequest": {
87
+ "url": self.provider_url,
88
+ "json": provider_request,
89
+ "meta": {"Helicone-Auth": f"Bearer {self.key}"},
90
+ },
91
+ "providerResponse": providerResponse,
92
+ "timing": {
93
+ "startTime": {
94
+ "seconds": start_time_seconds,
95
+ "milliseconds": start_time_milliseconds,
96
+ },
97
+ "endTime": {
98
+ "seconds": end_time_seconds,
99
+ "milliseconds": end_time_milliseconds,
100
+ },
101
+ }, # {"seconds": .., "milliseconds": ..}
102
+ }
103
+ response = requests.post(url, headers=headers, json=data)
104
+ if response.status_code == 200:
105
+ print_verbose("Helicone Logging - Success!")
106
+ else:
107
+ print_verbose(
108
+ f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
109
+ )
110
+ print_verbose(f"Helicone Logging - Error {response.text}")
111
+ except:
112
+ # traceback.print_exc()
113
+ print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
114
+ pass
litellm/integrations/langfuse.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Langfuse
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+ from datetime import datetime
7
+
8
+ dotenv.load_dotenv() # Loading env variables using dotenv
9
+ import traceback
10
+ from packaging.version import Version
11
+
12
+
13
+ class LangFuseLogger:
14
+ # Class variables or attributes
15
+ def __init__(self):
16
+ try:
17
+ from langfuse import Langfuse
18
+ except Exception as e:
19
+ raise Exception(
20
+ f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\033[0m"
21
+ )
22
+ # Instance variables
23
+ self.secret_key = os.getenv("LANGFUSE_SECRET_KEY")
24
+ self.public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
25
+ self.langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
26
+ self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
27
+ self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
28
+ self.Langfuse = Langfuse(
29
+ public_key=self.public_key,
30
+ secret_key=self.secret_key,
31
+ host=self.langfuse_host,
32
+ release=self.langfuse_release,
33
+ debug=self.langfuse_debug,
34
+ )
35
+
36
+ def log_event(
37
+ self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
38
+ ):
39
+ # Method definition
40
+
41
+ try:
42
+ print_verbose(
43
+ f"Langfuse Logging - Enters logging function for model {kwargs}"
44
+ )
45
+ litellm_params = kwargs.get("litellm_params", {})
46
+ metadata = (
47
+ litellm_params.get("metadata", {}) or {}
48
+ ) # if litellm_params['metadata'] == None
49
+ prompt = [kwargs.get("messages")]
50
+ optional_params = kwargs.get("optional_params", {})
51
+
52
+ optional_params.pop("functions", None)
53
+ optional_params.pop("tools", None)
54
+
55
+ # langfuse only accepts str, int, bool, float for logging
56
+ for param, value in optional_params.items():
57
+ if not isinstance(value, (str, int, bool, float)):
58
+ try:
59
+ optional_params[param] = str(value)
60
+ except:
61
+ # if casting value to str fails don't block logging
62
+ pass
63
+
64
+ # end of processing langfuse ########################
65
+ input = prompt
66
+ output = response_obj["choices"][0]["message"].json()
67
+ print_verbose(
68
+ f"OUTPUT IN LANGFUSE: {output}; original: {response_obj['choices'][0]['message']}"
69
+ )
70
+ self._log_langfuse_v2(
71
+ user_id,
72
+ metadata,
73
+ output,
74
+ start_time,
75
+ end_time,
76
+ kwargs,
77
+ optional_params,
78
+ input,
79
+ response_obj,
80
+ ) if self._is_langfuse_v2() else self._log_langfuse_v1(
81
+ user_id,
82
+ metadata,
83
+ output,
84
+ start_time,
85
+ end_time,
86
+ kwargs,
87
+ optional_params,
88
+ input,
89
+ response_obj,
90
+ )
91
+
92
+ self.Langfuse.flush()
93
+ print_verbose(
94
+ f"Langfuse Layer Logging - final response object: {response_obj}"
95
+ )
96
+ except:
97
+ traceback.print_exc()
98
+ print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
99
+ pass
100
+
101
+ async def _async_log_event(
102
+ self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
103
+ ):
104
+ self.log_event(
105
+ kwargs, response_obj, start_time, end_time, user_id, print_verbose
106
+ )
107
+
108
+ def _is_langfuse_v2(self):
109
+ import langfuse
110
+
111
+ return Version(langfuse.version.__version__) >= Version("2.0.0")
112
+
113
+ def _log_langfuse_v1(
114
+ self,
115
+ user_id,
116
+ metadata,
117
+ output,
118
+ start_time,
119
+ end_time,
120
+ kwargs,
121
+ optional_params,
122
+ input,
123
+ response_obj,
124
+ ):
125
+ from langfuse.model import CreateTrace, CreateGeneration
126
+
127
+ print(
128
+ "Please upgrade langfuse to v2.0.0 or higher: https://github.com/langfuse/langfuse-python/releases/tag/v2.0.1"
129
+ )
130
+
131
+ trace = self.Langfuse.trace(
132
+ CreateTrace(
133
+ name=metadata.get("generation_name", "litellm-completion"),
134
+ input=input,
135
+ output=output,
136
+ userId=user_id,
137
+ )
138
+ )
139
+
140
+ trace.generation(
141
+ CreateGeneration(
142
+ name=metadata.get("generation_name", "litellm-completion"),
143
+ startTime=start_time,
144
+ endTime=end_time,
145
+ model=kwargs["model"],
146
+ modelParameters=optional_params,
147
+ input=input,
148
+ output=output,
149
+ usage={
150
+ "prompt_tokens": response_obj["usage"]["prompt_tokens"],
151
+ "completion_tokens": response_obj["usage"]["completion_tokens"],
152
+ },
153
+ metadata=metadata,
154
+ )
155
+ )
156
+
157
+ def _log_langfuse_v2(
158
+ self,
159
+ user_id,
160
+ metadata,
161
+ output,
162
+ start_time,
163
+ end_time,
164
+ kwargs,
165
+ optional_params,
166
+ input,
167
+ response_obj,
168
+ ):
169
+ trace = self.Langfuse.trace(
170
+ name=metadata.get("generation_name", "litellm-completion"),
171
+ input=input,
172
+ output=output,
173
+ user_id=metadata.get("trace_user_id", user_id),
174
+ id=metadata.get("trace_id", None),
175
+ )
176
+
177
+ trace.generation(
178
+ name=metadata.get("generation_name", "litellm-completion"),
179
+ id=metadata.get("generation_id", None),
180
+ startTime=start_time,
181
+ endTime=end_time,
182
+ model=kwargs["model"],
183
+ modelParameters=optional_params,
184
+ input=input,
185
+ output=output,
186
+ usage={
187
+ "prompt_tokens": response_obj["usage"]["prompt_tokens"],
188
+ "completion_tokens": response_obj["usage"]["completion_tokens"],
189
+ },
190
+ metadata=metadata,
191
+ )
litellm/integrations/langsmith.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Langsmith
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+ from datetime import datetime
7
+
8
+ dotenv.load_dotenv() # Loading env variables using dotenv
9
+ import traceback
10
+
11
+
12
+ class LangsmithLogger:
13
+ # Class variables or attributes
14
+ def __init__(self):
15
+ self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
16
+
17
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
18
+ # Method definition
19
+ # inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb
20
+ metadata = {}
21
+ if "litellm_params" in kwargs:
22
+ metadata = kwargs["litellm_params"].get("metadata", {})
23
+ # set project name and run_name for langsmith logging
24
+ # users can pass project_name and run name to litellm.completion()
25
+ # Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"})
26
+ # if not set litellm will use default project_name = litellm-completion, run_name = LLMRun
27
+ project_name = metadata.get("project_name", "litellm-completion")
28
+ run_name = metadata.get("run_name", "LLMRun")
29
+ print_verbose(
30
+ f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
31
+ )
32
+ try:
33
+ print_verbose(
34
+ f"Langsmith Logging - Enters logging function for model {kwargs}"
35
+ )
36
+ import requests
37
+ import datetime
38
+ from datetime import timezone
39
+
40
+ try:
41
+ start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
42
+ end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
43
+ except:
44
+ start_time = datetime.datetime.utcnow().isoformat()
45
+ end_time = datetime.datetime.utcnow().isoformat()
46
+
47
+ # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
48
+ new_kwargs = {}
49
+ for key in kwargs:
50
+ value = kwargs[key]
51
+ if key == "start_time" or key == "end_time":
52
+ pass
53
+ elif type(value) != dict:
54
+ new_kwargs[key] = value
55
+
56
+ requests.post(
57
+ "https://api.smith.langchain.com/runs",
58
+ json={
59
+ "name": run_name,
60
+ "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
61
+ "inputs": {**new_kwargs},
62
+ "outputs": response_obj.json(),
63
+ "session_name": project_name,
64
+ "start_time": start_time,
65
+ "end_time": end_time,
66
+ },
67
+ headers={"x-api-key": self.langsmith_api_key},
68
+ )
69
+ print_verbose(
70
+ f"Langsmith Layer Logging - final response object: {response_obj}"
71
+ )
72
+ except:
73
+ # traceback.print_exc()
74
+ print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
75
+ pass
litellm/integrations/litedebugger.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests, traceback, json, os
2
+ import types
3
+
4
+
5
+ class LiteDebugger:
6
+ user_email = None
7
+ dashboard_url = None
8
+
9
+ def __init__(self, email=None):
10
+ self.api_url = "https://api.litellm.ai/debugger"
11
+ self.validate_environment(email)
12
+ pass
13
+
14
+ def validate_environment(self, email):
15
+ try:
16
+ self.user_email = (
17
+ email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL")
18
+ )
19
+ if (
20
+ self.user_email == None
21
+ ): # if users are trying to use_client=True but token not set
22
+ raise ValueError(
23
+ "litellm.use_client = True but no token or email passed. Please set it in litellm.token"
24
+ )
25
+ self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
26
+ try:
27
+ print(
28
+ f"\033[92mHere's your LiteLLM Dashboard 👉 \033[94m\033[4m{self.dashboard_url}\033[0m"
29
+ )
30
+ except:
31
+ print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
32
+ if self.user_email == None:
33
+ raise ValueError(
34
+ "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
35
+ )
36
+ except Exception as e:
37
+ raise ValueError(
38
+ "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
39
+ )
40
+
41
+ def input_log_event(
42
+ self,
43
+ model,
44
+ messages,
45
+ end_user,
46
+ litellm_call_id,
47
+ call_type,
48
+ print_verbose,
49
+ litellm_params,
50
+ optional_params,
51
+ ):
52
+ print_verbose(
53
+ f"LiteDebugger: Pre-API Call Logging for call id {litellm_call_id}"
54
+ )
55
+ try:
56
+ print_verbose(
57
+ f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
58
+ )
59
+
60
+ def remove_key_value(dictionary, key):
61
+ new_dict = dictionary.copy() # Create a copy of the original dictionary
62
+ new_dict.pop(key) # Remove the specified key-value pair from the copy
63
+ return new_dict
64
+
65
+ updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
66
+
67
+ if call_type == "embedding":
68
+ for (
69
+ message
70
+ ) in (
71
+ messages
72
+ ): # assuming the input is a list as required by the embedding function
73
+ litellm_data_obj = {
74
+ "model": model,
75
+ "messages": [{"role": "user", "content": message}],
76
+ "end_user": end_user,
77
+ "status": "initiated",
78
+ "litellm_call_id": litellm_call_id,
79
+ "user_email": self.user_email,
80
+ "litellm_params": updated_litellm_params,
81
+ "optional_params": optional_params,
82
+ }
83
+ print_verbose(
84
+ f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
85
+ )
86
+ response = requests.post(
87
+ url=self.api_url,
88
+ headers={"content-type": "application/json"},
89
+ data=json.dumps(litellm_data_obj),
90
+ )
91
+ print_verbose(f"LiteDebugger: embedding api response - {response.text}")
92
+ elif call_type == "completion":
93
+ litellm_data_obj = {
94
+ "model": model,
95
+ "messages": messages
96
+ if isinstance(messages, list)
97
+ else [{"role": "user", "content": messages}],
98
+ "end_user": end_user,
99
+ "status": "initiated",
100
+ "litellm_call_id": litellm_call_id,
101
+ "user_email": self.user_email,
102
+ "litellm_params": updated_litellm_params,
103
+ "optional_params": optional_params,
104
+ }
105
+ print_verbose(
106
+ f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
107
+ )
108
+ response = requests.post(
109
+ url=self.api_url,
110
+ headers={"content-type": "application/json"},
111
+ data=json.dumps(litellm_data_obj),
112
+ )
113
+ print_verbose(
114
+ f"LiteDebugger: completion api response - {response.text}"
115
+ )
116
+ except:
117
+ print_verbose(
118
+ f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
119
+ )
120
+ pass
121
+
122
+ def post_call_log_event(
123
+ self, original_response, litellm_call_id, print_verbose, call_type, stream
124
+ ):
125
+ print_verbose(
126
+ f"LiteDebugger: Post-API Call Logging for call id {litellm_call_id}"
127
+ )
128
+ try:
129
+ if call_type == "embedding":
130
+ litellm_data_obj = {
131
+ "status": "received",
132
+ "additional_details": {
133
+ "original_response": str(
134
+ original_response["data"][0]["embedding"][:5]
135
+ )
136
+ }, # don't store the entire vector
137
+ "litellm_call_id": litellm_call_id,
138
+ "user_email": self.user_email,
139
+ }
140
+ elif call_type == "completion" and not stream:
141
+ litellm_data_obj = {
142
+ "status": "received",
143
+ "additional_details": {"original_response": original_response},
144
+ "litellm_call_id": litellm_call_id,
145
+ "user_email": self.user_email,
146
+ }
147
+ elif call_type == "completion" and stream:
148
+ litellm_data_obj = {
149
+ "status": "received",
150
+ "additional_details": {
151
+ "original_response": "Streamed response"
152
+ if isinstance(original_response, types.GeneratorType)
153
+ else original_response
154
+ },
155
+ "litellm_call_id": litellm_call_id,
156
+ "user_email": self.user_email,
157
+ }
158
+ print_verbose(f"litedebugger post-call data object - {litellm_data_obj}")
159
+ response = requests.post(
160
+ url=self.api_url,
161
+ headers={"content-type": "application/json"},
162
+ data=json.dumps(litellm_data_obj),
163
+ )
164
+ print_verbose(f"LiteDebugger: api response - {response.text}")
165
+ except:
166
+ print_verbose(
167
+ f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
168
+ )
169
+
170
+ def log_event(
171
+ self,
172
+ end_user,
173
+ response_obj,
174
+ start_time,
175
+ end_time,
176
+ litellm_call_id,
177
+ print_verbose,
178
+ call_type,
179
+ stream=False,
180
+ ):
181
+ print_verbose(
182
+ f"LiteDebugger: Success/Failure Call Logging for call id {litellm_call_id}"
183
+ )
184
+ try:
185
+ print_verbose(
186
+ f"LiteLLMDebugger: Success/Failure Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
187
+ )
188
+ total_cost = 0 # [TODO] implement cost tracking
189
+ response_time = (end_time - start_time).total_seconds()
190
+ if call_type == "completion" and stream == False:
191
+ litellm_data_obj = {
192
+ "response_time": response_time,
193
+ "total_cost": total_cost,
194
+ "response": response_obj["choices"][0]["message"]["content"],
195
+ "litellm_call_id": litellm_call_id,
196
+ "status": "success",
197
+ }
198
+ print_verbose(
199
+ f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
200
+ )
201
+ response = requests.post(
202
+ url=self.api_url,
203
+ headers={"content-type": "application/json"},
204
+ data=json.dumps(litellm_data_obj),
205
+ )
206
+ elif call_type == "embedding":
207
+ litellm_data_obj = {
208
+ "response_time": response_time,
209
+ "total_cost": total_cost,
210
+ "response": str(response_obj["data"][0]["embedding"][:5]),
211
+ "litellm_call_id": litellm_call_id,
212
+ "status": "success",
213
+ }
214
+ response = requests.post(
215
+ url=self.api_url,
216
+ headers={"content-type": "application/json"},
217
+ data=json.dumps(litellm_data_obj),
218
+ )
219
+ elif call_type == "completion" and stream == True:
220
+ if len(response_obj["content"]) > 0: # don't log the empty strings
221
+ litellm_data_obj = {
222
+ "response_time": response_time,
223
+ "total_cost": total_cost,
224
+ "response": response_obj["content"],
225
+ "litellm_call_id": litellm_call_id,
226
+ "status": "success",
227
+ }
228
+ print_verbose(
229
+ f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
230
+ )
231
+ response = requests.post(
232
+ url=self.api_url,
233
+ headers={"content-type": "application/json"},
234
+ data=json.dumps(litellm_data_obj),
235
+ )
236
+ elif "error" in response_obj:
237
+ if "Unable to map your input to a model." in response_obj["error"]:
238
+ total_cost = 0
239
+ litellm_data_obj = {
240
+ "response_time": response_time,
241
+ "model": response_obj["model"],
242
+ "total_cost": total_cost,
243
+ "error": response_obj["error"],
244
+ "end_user": end_user,
245
+ "litellm_call_id": litellm_call_id,
246
+ "status": "failure",
247
+ "user_email": self.user_email,
248
+ }
249
+ print_verbose(
250
+ f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
251
+ )
252
+ response = requests.post(
253
+ url=self.api_url,
254
+ headers={"content-type": "application/json"},
255
+ data=json.dumps(litellm_data_obj),
256
+ )
257
+ print_verbose(f"LiteDebugger: api response - {response.text}")
258
+ except:
259
+ print_verbose(
260
+ f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
261
+ )
262
+ pass
litellm/integrations/llmonitor.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to aispend.io
3
+ import datetime
4
+ import traceback
5
+ import dotenv
6
+ import os
7
+ import requests
8
+
9
+ dotenv.load_dotenv() # Loading env variables using dotenv
10
+
11
+
12
+ # convert to {completion: xx, tokens: xx}
13
+ def parse_usage(usage):
14
+ return {
15
+ "completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
16
+ "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
17
+ }
18
+
19
+
20
+ def parse_messages(input):
21
+ if input is None:
22
+ return None
23
+
24
+ def clean_message(message):
25
+ # if is strin, return as is
26
+ if isinstance(message, str):
27
+ return message
28
+
29
+ if "message" in message:
30
+ return clean_message(message["message"])
31
+ text = message["content"]
32
+ if text == None:
33
+ text = message.get("function_call", None)
34
+
35
+ return {
36
+ "role": message["role"],
37
+ "text": text,
38
+ }
39
+
40
+ if isinstance(input, list):
41
+ if len(input) == 1:
42
+ return clean_message(input[0])
43
+ else:
44
+ return [clean_message(msg) for msg in input]
45
+ else:
46
+ return clean_message(input)
47
+
48
+
49
+ class LLMonitorLogger:
50
+ # Class variables or attributes
51
+ def __init__(self):
52
+ # Instance variables
53
+ self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
54
+ self.app_id = os.getenv("LLMONITOR_APP_ID")
55
+
56
+ def log_event(
57
+ self,
58
+ type,
59
+ event,
60
+ run_id,
61
+ model,
62
+ print_verbose,
63
+ input=None,
64
+ user_id=None,
65
+ response_obj=None,
66
+ start_time=datetime.datetime.now(),
67
+ end_time=datetime.datetime.now(),
68
+ error=None,
69
+ ):
70
+ # Method definition
71
+ try:
72
+ print_verbose(f"LLMonitor Logging - Logging request for model {model}")
73
+
74
+ if response_obj:
75
+ usage = (
76
+ parse_usage(response_obj["usage"])
77
+ if "usage" in response_obj
78
+ else None
79
+ )
80
+ output = response_obj["choices"] if "choices" in response_obj else None
81
+ else:
82
+ usage = None
83
+ output = None
84
+
85
+ if error:
86
+ error_obj = {"stack": error}
87
+
88
+ else:
89
+ error_obj = None
90
+
91
+ data = [
92
+ {
93
+ "type": type,
94
+ "name": model,
95
+ "runId": run_id,
96
+ "app": self.app_id,
97
+ "event": "start",
98
+ "timestamp": start_time.isoformat(),
99
+ "userId": user_id,
100
+ "input": parse_messages(input),
101
+ },
102
+ {
103
+ "type": type,
104
+ "runId": run_id,
105
+ "app": self.app_id,
106
+ "event": event,
107
+ "error": error_obj,
108
+ "timestamp": end_time.isoformat(),
109
+ "userId": user_id,
110
+ "output": parse_messages(output),
111
+ "tokensUsage": usage,
112
+ },
113
+ ]
114
+
115
+ print_verbose(f"LLMonitor Logging - final data object: {data}")
116
+
117
+ response = requests.post(
118
+ self.api_url + "/api/report",
119
+ headers={"Content-Type": "application/json"},
120
+ json={"events": data},
121
+ )
122
+
123
+ print_verbose(f"LLMonitor Logging - response: {response}")
124
+ except:
125
+ # traceback.print_exc()
126
+ print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
127
+ pass
litellm/integrations/prompt_layer.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Promptlayer
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+
10
+
11
+ class PromptLayerLogger:
12
+ # Class variables or attributes
13
+ def __init__(self):
14
+ # Instance variables
15
+ self.key = os.getenv("PROMPTLAYER_API_KEY")
16
+
17
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
18
+ # Method definition
19
+ try:
20
+ new_kwargs = {}
21
+ new_kwargs["model"] = kwargs["model"]
22
+ new_kwargs["messages"] = kwargs["messages"]
23
+
24
+ # add kwargs["optional_params"] to new_kwargs
25
+ for optional_param in kwargs["optional_params"]:
26
+ new_kwargs[optional_param] = kwargs["optional_params"][optional_param]
27
+
28
+ print_verbose(
29
+ f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
30
+ )
31
+
32
+ request_response = requests.post(
33
+ "https://api.promptlayer.com/rest/track-request",
34
+ json={
35
+ "function_name": "openai.ChatCompletion.create",
36
+ "kwargs": new_kwargs,
37
+ "tags": ["hello", "world"],
38
+ "request_response": dict(response_obj),
39
+ "request_start_time": int(start_time.timestamp()),
40
+ "request_end_time": int(end_time.timestamp()),
41
+ "api_key": self.key,
42
+ # Optional params for PromptLayer
43
+ # "prompt_id": "<PROMPT ID>",
44
+ # "prompt_input_variables": "<Dictionary of variables for prompt>",
45
+ # "prompt_version":1,
46
+ },
47
+ )
48
+ print_verbose(
49
+ f"Prompt Layer Logging: success - final response object: {request_response.text}"
50
+ )
51
+ response_json = request_response.json()
52
+ if "success" not in request_response.json():
53
+ raise Exception("Promptlayer did not successfully log the response!")
54
+
55
+ if "request_id" in response_json:
56
+ print(kwargs["litellm_params"]["metadata"])
57
+ if kwargs["litellm_params"]["metadata"] is not None:
58
+ response = requests.post(
59
+ "https://api.promptlayer.com/rest/track-metadata",
60
+ json={
61
+ "request_id": response_json["request_id"],
62
+ "api_key": self.key,
63
+ "metadata": kwargs["litellm_params"]["metadata"],
64
+ },
65
+ )
66
+ print_verbose(
67
+ f"Prompt Layer Logging: success - metadata post response object: {response.text}"
68
+ )
69
+
70
+ except:
71
+ print_verbose(f"error: Prompt Layer Error - {traceback.format_exc()}")
72
+ pass
litellm/integrations/s3.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to Supabase
3
+
4
+ import dotenv, os
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+ import datetime, subprocess, sys
10
+ import litellm, uuid
11
+ from litellm._logging import print_verbose
12
+
13
+
14
+ class S3Logger:
15
+ # Class variables or attributes
16
+ def __init__(
17
+ self,
18
+ s3_bucket_name=None,
19
+ s3_region_name=None,
20
+ s3_api_version=None,
21
+ s3_use_ssl=True,
22
+ s3_verify=None,
23
+ s3_endpoint_url=None,
24
+ s3_aws_access_key_id=None,
25
+ s3_aws_secret_access_key=None,
26
+ s3_aws_session_token=None,
27
+ s3_config=None,
28
+ **kwargs,
29
+ ):
30
+ import boto3
31
+
32
+ try:
33
+ print_verbose("in init s3 logger")
34
+
35
+ if litellm.s3_callback_params is not None:
36
+ # read in .env variables - example os.environ/AWS_BUCKET_NAME
37
+ for key, value in litellm.s3_callback_params.items():
38
+ if type(value) is str and value.startswith("os.environ/"):
39
+ litellm.s3_callback_params[key] = litellm.get_secret(value)
40
+ # now set s3 params from litellm.s3_logger_params
41
+ s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")
42
+ s3_region_name = litellm.s3_callback_params.get("s3_region_name")
43
+ s3_api_version = litellm.s3_callback_params.get("s3_api_version")
44
+ s3_use_ssl = litellm.s3_callback_params.get("s3_use_ssl")
45
+ s3_verify = litellm.s3_callback_params.get("s3_verify")
46
+ s3_endpoint_url = litellm.s3_callback_params.get("s3_endpoint_url")
47
+ s3_aws_access_key_id = litellm.s3_callback_params.get(
48
+ "s3_aws_access_key_id"
49
+ )
50
+ s3_aws_secret_access_key = litellm.s3_callback_params.get(
51
+ "s3_aws_secret_access_key"
52
+ )
53
+ s3_aws_session_token = litellm.s3_callback_params.get(
54
+ "s3_aws_session_token"
55
+ )
56
+ s3_config = litellm.s3_callback_params.get("s3_config")
57
+ # done reading litellm.s3_callback_params
58
+
59
+ self.bucket_name = s3_bucket_name
60
+ # Create an S3 client with custom endpoint URL
61
+ self.s3_client = boto3.client(
62
+ "s3",
63
+ region_name=s3_region_name,
64
+ endpoint_url=s3_endpoint_url,
65
+ api_version=s3_api_version,
66
+ use_ssl=s3_use_ssl,
67
+ verify=s3_verify,
68
+ aws_access_key_id=s3_aws_access_key_id,
69
+ aws_secret_access_key=s3_aws_secret_access_key,
70
+ aws_session_token=s3_aws_session_token,
71
+ config=s3_config,
72
+ **kwargs,
73
+ )
74
+ except Exception as e:
75
+ print_verbose(f"Got exception on init s3 client {str(e)}")
76
+ raise e
77
+
78
+ async def _async_log_event(
79
+ self, kwargs, response_obj, start_time, end_time, print_verbose
80
+ ):
81
+ self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
82
+
83
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
84
+ try:
85
+ print_verbose(f"s3 Logging - Enters logging function for model {kwargs}")
86
+
87
+ # construct payload to send to s3
88
+ # follows the same params as langfuse.py
89
+ litellm_params = kwargs.get("litellm_params", {})
90
+ metadata = (
91
+ litellm_params.get("metadata", {}) or {}
92
+ ) # if litellm_params['metadata'] == None
93
+ messages = kwargs.get("messages")
94
+ optional_params = kwargs.get("optional_params", {})
95
+ call_type = kwargs.get("call_type", "litellm.completion")
96
+ cache_hit = kwargs.get("cache_hit", False)
97
+ usage = response_obj["usage"]
98
+ id = response_obj.get("id", str(uuid.uuid4()))
99
+
100
+ # Build the initial payload
101
+ payload = {
102
+ "id": id,
103
+ "call_type": call_type,
104
+ "cache_hit": cache_hit,
105
+ "startTime": start_time,
106
+ "endTime": end_time,
107
+ "model": kwargs.get("model", ""),
108
+ "user": kwargs.get("user", ""),
109
+ "modelParameters": optional_params,
110
+ "messages": messages,
111
+ "response": response_obj,
112
+ "usage": usage,
113
+ "metadata": metadata,
114
+ }
115
+
116
+ # Ensure everything in the payload is converted to str
117
+ for key, value in payload.items():
118
+ try:
119
+ payload[key] = str(value)
120
+ except:
121
+ # non blocking if it can't cast to a str
122
+ pass
123
+
124
+ s3_object_key = (
125
+ payload["id"] + "-time=" + str(start_time)
126
+ ) # we need the s3 key to include the time, so we log cache hits too
127
+
128
+ import json
129
+
130
+ payload = json.dumps(payload)
131
+
132
+ print_verbose(f"\ns3 Logger - Logging payload = {payload}")
133
+
134
+ response = self.s3_client.put_object(
135
+ Bucket=self.bucket_name,
136
+ Key=s3_object_key,
137
+ Body=payload,
138
+ ContentType="application/json",
139
+ ContentLanguage="en",
140
+ ContentDisposition=f'inline; filename="{key}.json"',
141
+ )
142
+
143
+ print_verbose(f"Response from s3:{str(response)}")
144
+
145
+ print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
146
+ return response
147
+ except Exception as e:
148
+ traceback.print_exc()
149
+ print_verbose(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
150
+ pass
litellm/integrations/supabase.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to Supabase
3
+
4
+ import dotenv, os
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+ import datetime, subprocess, sys
10
+ import litellm
11
+
12
+
13
+ class Supabase:
14
+ # Class variables or attributes
15
+ supabase_table_name = "request_logs"
16
+
17
+ def __init__(self):
18
+ # Instance variables
19
+ self.supabase_url = os.getenv("SUPABASE_URL")
20
+ self.supabase_key = os.getenv("SUPABASE_KEY")
21
+ try:
22
+ import supabase
23
+ except ImportError:
24
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
25
+ import supabase
26
+ self.supabase_client = supabase.create_client(
27
+ self.supabase_url, self.supabase_key
28
+ )
29
+
30
+ def input_log_event(
31
+ self, model, messages, end_user, litellm_call_id, print_verbose
32
+ ):
33
+ try:
34
+ print_verbose(
35
+ f"Supabase Logging - Enters input logging function for model {model}"
36
+ )
37
+ supabase_data_obj = {
38
+ "model": model,
39
+ "messages": messages,
40
+ "end_user": end_user,
41
+ "status": "initiated",
42
+ "litellm_call_id": litellm_call_id,
43
+ }
44
+ data, count = (
45
+ self.supabase_client.table(self.supabase_table_name)
46
+ .insert(supabase_data_obj)
47
+ .execute()
48
+ )
49
+ print_verbose(f"data: {data}")
50
+ except:
51
+ print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
52
+ pass
53
+
54
+ def log_event(
55
+ self,
56
+ model,
57
+ messages,
58
+ end_user,
59
+ response_obj,
60
+ start_time,
61
+ end_time,
62
+ litellm_call_id,
63
+ print_verbose,
64
+ ):
65
+ try:
66
+ print_verbose(
67
+ f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
68
+ )
69
+
70
+ total_cost = litellm.completion_cost(completion_response=response_obj)
71
+
72
+ response_time = (end_time - start_time).total_seconds()
73
+ if "choices" in response_obj:
74
+ supabase_data_obj = {
75
+ "response_time": response_time,
76
+ "model": response_obj["model"],
77
+ "total_cost": total_cost,
78
+ "messages": messages,
79
+ "response": response_obj["choices"][0]["message"]["content"],
80
+ "end_user": end_user,
81
+ "litellm_call_id": litellm_call_id,
82
+ "status": "success",
83
+ }
84
+ print_verbose(
85
+ f"Supabase Logging - final data object: {supabase_data_obj}"
86
+ )
87
+ data, count = (
88
+ self.supabase_client.table(self.supabase_table_name)
89
+ .upsert(supabase_data_obj, on_conflict="litellm_call_id")
90
+ .execute()
91
+ )
92
+ elif "error" in response_obj:
93
+ if "Unable to map your input to a model." in response_obj["error"]:
94
+ total_cost = 0
95
+ supabase_data_obj = {
96
+ "response_time": response_time,
97
+ "model": response_obj["model"],
98
+ "total_cost": total_cost,
99
+ "messages": messages,
100
+ "error": response_obj["error"],
101
+ "end_user": end_user,
102
+ "litellm_call_id": litellm_call_id,
103
+ "status": "failure",
104
+ }
105
+ print_verbose(
106
+ f"Supabase Logging - final data object: {supabase_data_obj}"
107
+ )
108
+ data, count = (
109
+ self.supabase_client.table(self.supabase_table_name)
110
+ .upsert(supabase_data_obj, on_conflict="litellm_call_id")
111
+ .execute()
112
+ )
113
+
114
+ except:
115
+ # traceback.print_exc()
116
+ print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
117
+ pass
litellm/integrations/traceloop.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class TraceloopLogger:
2
+ def __init__(self):
3
+ from traceloop.sdk.tracing.tracing import TracerWrapper
4
+ from traceloop.sdk import Traceloop
5
+
6
+ Traceloop.init(app_name="Litellm-Server", disable_batch=True)
7
+ self.tracer_wrapper = TracerWrapper()
8
+
9
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
10
+ from opentelemetry.trace import SpanKind
11
+ from opentelemetry.semconv.ai import SpanAttributes
12
+
13
+ try:
14
+ tracer = self.tracer_wrapper.get_tracer()
15
+
16
+ model = kwargs.get("model")
17
+
18
+ # LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
19
+ if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
20
+ return
21
+
22
+ optional_params = kwargs.get("optional_params", {})
23
+ with tracer.start_as_current_span(
24
+ "litellm.completion",
25
+ kind=SpanKind.CLIENT,
26
+ ) as span:
27
+ if span.is_recording():
28
+ span.set_attribute(
29
+ SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
30
+ )
31
+ if "stop" in optional_params:
32
+ span.set_attribute(
33
+ SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
34
+ optional_params.get("stop"),
35
+ )
36
+ if "frequency_penalty" in optional_params:
37
+ span.set_attribute(
38
+ SpanAttributes.LLM_FREQUENCY_PENALTY,
39
+ optional_params.get("frequency_penalty"),
40
+ )
41
+ if "presence_penalty" in optional_params:
42
+ span.set_attribute(
43
+ SpanAttributes.LLM_PRESENCE_PENALTY,
44
+ optional_params.get("presence_penalty"),
45
+ )
46
+ if "top_p" in optional_params:
47
+ span.set_attribute(
48
+ SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
49
+ )
50
+ if "tools" in optional_params or "functions" in optional_params:
51
+ span.set_attribute(
52
+ SpanAttributes.LLM_REQUEST_FUNCTIONS,
53
+ optional_params.get(
54
+ "tools", optional_params.get("functions")
55
+ ),
56
+ )
57
+ if "user" in optional_params:
58
+ span.set_attribute(
59
+ SpanAttributes.LLM_USER, optional_params.get("user")
60
+ )
61
+ if "max_tokens" in optional_params:
62
+ span.set_attribute(
63
+ SpanAttributes.LLM_REQUEST_MAX_TOKENS,
64
+ kwargs.get("max_tokens"),
65
+ )
66
+ if "temperature" in optional_params:
67
+ span.set_attribute(
68
+ SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
69
+ )
70
+
71
+ for idx, prompt in enumerate(kwargs.get("messages")):
72
+ span.set_attribute(
73
+ f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
74
+ prompt.get("role"),
75
+ )
76
+ span.set_attribute(
77
+ f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
78
+ prompt.get("content"),
79
+ )
80
+
81
+ span.set_attribute(
82
+ SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
83
+ )
84
+ usage = response_obj.get("usage")
85
+ if usage:
86
+ span.set_attribute(
87
+ SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
88
+ usage.get("total_tokens"),
89
+ )
90
+ span.set_attribute(
91
+ SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
92
+ usage.get("completion_tokens"),
93
+ )
94
+ span.set_attribute(
95
+ SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
96
+ usage.get("prompt_tokens"),
97
+ )
98
+
99
+ for idx, choice in enumerate(response_obj.get("choices")):
100
+ span.set_attribute(
101
+ f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
102
+ choice.get("finish_reason"),
103
+ )
104
+ span.set_attribute(
105
+ f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
106
+ choice.get("message").get("role"),
107
+ )
108
+ span.set_attribute(
109
+ f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
110
+ choice.get("message").get("content"),
111
+ )
112
+
113
+ except Exception as e:
114
+ print_verbose(f"Traceloop Layer Error - {e}")
litellm/integrations/weights_biases.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ imported_openAIResponse = True
2
+ try:
3
+ import io
4
+ import logging
5
+ import sys
6
+ from typing import Any, Dict, List, Optional, TypeVar
7
+
8
+ from wandb.sdk.data_types import trace_tree
9
+
10
+ if sys.version_info >= (3, 8):
11
+ from typing import Literal, Protocol
12
+ else:
13
+ from typing_extensions import Literal, Protocol
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ K = TypeVar("K", bound=str)
18
+ V = TypeVar("V")
19
+
20
+ class OpenAIResponse(Protocol[K, V]): # type: ignore
21
+ # contains a (known) object attribute
22
+ object: Literal["chat.completion", "edit", "text_completion"]
23
+
24
+ def __getitem__(self, key: K) -> V:
25
+ ... # pragma: no cover
26
+
27
+ def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
28
+ ... # pragma: no cover
29
+
30
+ class OpenAIRequestResponseResolver:
31
+ def __call__(
32
+ self,
33
+ request: Dict[str, Any],
34
+ response: OpenAIResponse,
35
+ time_elapsed: float,
36
+ ) -> Optional[trace_tree.WBTraceTree]:
37
+ try:
38
+ if response["object"] == "edit":
39
+ return self._resolve_edit(request, response, time_elapsed)
40
+ elif response["object"] == "text_completion":
41
+ return self._resolve_completion(request, response, time_elapsed)
42
+ elif response["object"] == "chat.completion":
43
+ return self._resolve_chat_completion(
44
+ request, response, time_elapsed
45
+ )
46
+ else:
47
+ logger.info(f"Unknown OpenAI response object: {response['object']}")
48
+ except Exception as e:
49
+ logger.warning(f"Failed to resolve request/response: {e}")
50
+ return None
51
+
52
+ @staticmethod
53
+ def results_to_trace_tree(
54
+ request: Dict[str, Any],
55
+ response: OpenAIResponse,
56
+ results: List[trace_tree.Result],
57
+ time_elapsed: float,
58
+ ) -> trace_tree.WBTraceTree:
59
+ """Converts the request, response, and results into a trace tree.
60
+
61
+ params:
62
+ request: The request dictionary
63
+ response: The response object
64
+ results: A list of results object
65
+ time_elapsed: The time elapsed in seconds
66
+ returns:
67
+ A wandb trace tree object.
68
+ """
69
+ start_time_ms = int(round(response["created"] * 1000))
70
+ end_time_ms = start_time_ms + int(round(time_elapsed * 1000))
71
+ span = trace_tree.Span(
72
+ name=f"{response.get('model', 'openai')}_{response['object']}_{response.get('created')}",
73
+ attributes=dict(response), # type: ignore
74
+ start_time_ms=start_time_ms,
75
+ end_time_ms=end_time_ms,
76
+ span_kind=trace_tree.SpanKind.LLM,
77
+ results=results,
78
+ )
79
+ model_obj = {"request": request, "response": response, "_kind": "openai"}
80
+ return trace_tree.WBTraceTree(root_span=span, model_dict=model_obj)
81
+
82
+ def _resolve_edit(
83
+ self,
84
+ request: Dict[str, Any],
85
+ response: OpenAIResponse,
86
+ time_elapsed: float,
87
+ ) -> trace_tree.WBTraceTree:
88
+ """Resolves the request and response objects for `openai.Edit`."""
89
+ request_str = (
90
+ f"\n\n**Instruction**: {request['instruction']}\n\n"
91
+ f"**Input**: {request['input']}\n"
92
+ )
93
+ choices = [
94
+ f"\n\n**Edited**: {choice['text']}\n" for choice in response["choices"]
95
+ ]
96
+
97
+ return self._request_response_result_to_trace(
98
+ request=request,
99
+ response=response,
100
+ request_str=request_str,
101
+ choices=choices,
102
+ time_elapsed=time_elapsed,
103
+ )
104
+
105
+ def _resolve_completion(
106
+ self,
107
+ request: Dict[str, Any],
108
+ response: OpenAIResponse,
109
+ time_elapsed: float,
110
+ ) -> trace_tree.WBTraceTree:
111
+ """Resolves the request and response objects for `openai.Completion`."""
112
+ request_str = f"\n\n**Prompt**: {request['prompt']}\n"
113
+ choices = [
114
+ f"\n\n**Completion**: {choice['text']}\n"
115
+ for choice in response["choices"]
116
+ ]
117
+
118
+ return self._request_response_result_to_trace(
119
+ request=request,
120
+ response=response,
121
+ request_str=request_str,
122
+ choices=choices,
123
+ time_elapsed=time_elapsed,
124
+ )
125
+
126
+ def _resolve_chat_completion(
127
+ self,
128
+ request: Dict[str, Any],
129
+ response: OpenAIResponse,
130
+ time_elapsed: float,
131
+ ) -> trace_tree.WBTraceTree:
132
+ """Resolves the request and response objects for `openai.Completion`."""
133
+ prompt = io.StringIO()
134
+ for message in request["messages"]:
135
+ prompt.write(f"\n\n**{message['role']}**: {message['content']}\n")
136
+ request_str = prompt.getvalue()
137
+
138
+ choices = [
139
+ f"\n\n**{choice['message']['role']}**: {choice['message']['content']}\n"
140
+ for choice in response["choices"]
141
+ ]
142
+
143
+ return self._request_response_result_to_trace(
144
+ request=request,
145
+ response=response,
146
+ request_str=request_str,
147
+ choices=choices,
148
+ time_elapsed=time_elapsed,
149
+ )
150
+
151
+ def _request_response_result_to_trace(
152
+ self,
153
+ request: Dict[str, Any],
154
+ response: OpenAIResponse,
155
+ request_str: str,
156
+ choices: List[str],
157
+ time_elapsed: float,
158
+ ) -> trace_tree.WBTraceTree:
159
+ """Resolves the request and response objects for `openai.Completion`."""
160
+ results = [
161
+ trace_tree.Result(
162
+ inputs={"request": request_str},
163
+ outputs={"response": choice},
164
+ )
165
+ for choice in choices
166
+ ]
167
+ trace = self.results_to_trace_tree(request, response, results, time_elapsed)
168
+ return trace
169
+
170
+ except:
171
+ imported_openAIResponse = False
172
+
173
+
174
+ #### What this does ####
175
+ # On success, logs events to Langfuse
176
+ import dotenv, os
177
+ import requests
178
+ import requests
179
+ from datetime import datetime
180
+
181
+ dotenv.load_dotenv() # Loading env variables using dotenv
182
+ import traceback
183
+
184
+
185
+ class WeightsBiasesLogger:
186
+ # Class variables or attributes
187
+ def __init__(self):
188
+ try:
189
+ import wandb
190
+ except:
191
+ raise Exception(
192
+ "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
193
+ )
194
+ if imported_openAIResponse == False:
195
+ raise Exception(
196
+ "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
197
+ )
198
+ self.resolver = OpenAIRequestResponseResolver()
199
+
200
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
201
+ # Method definition
202
+ import wandb
203
+
204
+ try:
205
+ print_verbose(f"W&B Logging - Enters logging function for model {kwargs}")
206
+ run = wandb.init()
207
+ print_verbose(response_obj)
208
+
209
+ trace = self.resolver(
210
+ kwargs, response_obj, (end_time - start_time).total_seconds()
211
+ )
212
+
213
+ if trace is not None:
214
+ run.log({"trace": trace})
215
+
216
+ run.finish()
217
+ print_verbose(
218
+ f"W&B Logging Logging - final response object: {response_obj}"
219
+ )
220
+ except:
221
+ # traceback.print_exc()
222
+ print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
223
+ pass
litellm/llms/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import *
litellm/llms/ai21.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types, traceback
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time, httpx
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Choices, Message
8
+ import litellm
9
+
10
+
11
+ class AI21Error(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ self.request = httpx.Request(
16
+ method="POST", url="https://api.ai21.com/studio/v1/"
17
+ )
18
+ self.response = httpx.Response(status_code=status_code, request=self.request)
19
+ super().__init__(
20
+ self.message
21
+ ) # Call the base class constructor with the parameters it needs
22
+
23
+
24
+ class AI21Config:
25
+ """
26
+ Reference: https://docs.ai21.com/reference/j2-complete-ref
27
+
28
+ The class `AI21Config` provides configuration for the AI21's API interface. Below are the parameters:
29
+
30
+ - `numResults` (int32): Number of completions to sample and return. Optional, default is 1. If the temperature is greater than 0 (non-greedy decoding), a value greater than 1 can be meaningful.
31
+
32
+ - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
33
+
34
+ - `minTokens` (int32): The minimum number of tokens to generate per result. Optional, default is 0. If `stopSequences` are given, they are ignored until `minTokens` are generated.
35
+
36
+ - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
37
+
38
+ - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
39
+
40
+ - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
41
+
42
+ - `topKReturn` (int32): Range between 0 to 10, including both. Optional, default is 0. Specifies the top-K alternative tokens to return. A non-zero value includes the string representations and log-probabilities for each of the top-K alternatives at each position.
43
+
44
+ - `frequencyPenalty` (object): Placeholder for frequency penalty object.
45
+
46
+ - `presencePenalty` (object): Placeholder for presence penalty object.
47
+
48
+ - `countPenalty` (object): Placeholder for count penalty object.
49
+ """
50
+
51
+ numResults: Optional[int] = None
52
+ maxTokens: Optional[int] = None
53
+ minTokens: Optional[int] = None
54
+ temperature: Optional[float] = None
55
+ topP: Optional[float] = None
56
+ stopSequences: Optional[list] = None
57
+ topKReturn: Optional[int] = None
58
+ frequencePenalty: Optional[dict] = None
59
+ presencePenalty: Optional[dict] = None
60
+ countPenalty: Optional[dict] = None
61
+
62
+ def __init__(
63
+ self,
64
+ numResults: Optional[int] = None,
65
+ maxTokens: Optional[int] = None,
66
+ minTokens: Optional[int] = None,
67
+ temperature: Optional[float] = None,
68
+ topP: Optional[float] = None,
69
+ stopSequences: Optional[list] = None,
70
+ topKReturn: Optional[int] = None,
71
+ frequencePenalty: Optional[dict] = None,
72
+ presencePenalty: Optional[dict] = None,
73
+ countPenalty: Optional[dict] = None,
74
+ ) -> None:
75
+ locals_ = locals()
76
+ for key, value in locals_.items():
77
+ if key != "self" and value is not None:
78
+ setattr(self.__class__, key, value)
79
+
80
+ @classmethod
81
+ def get_config(cls):
82
+ return {
83
+ k: v
84
+ for k, v in cls.__dict__.items()
85
+ if not k.startswith("__")
86
+ and not isinstance(
87
+ v,
88
+ (
89
+ types.FunctionType,
90
+ types.BuiltinFunctionType,
91
+ classmethod,
92
+ staticmethod,
93
+ ),
94
+ )
95
+ and v is not None
96
+ }
97
+
98
+
99
+ def validate_environment(api_key):
100
+ if api_key is None:
101
+ raise ValueError(
102
+ "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
103
+ )
104
+ headers = {
105
+ "accept": "application/json",
106
+ "content-type": "application/json",
107
+ "Authorization": "Bearer " + api_key,
108
+ }
109
+ return headers
110
+
111
+
112
+ def completion(
113
+ model: str,
114
+ messages: list,
115
+ api_base: str,
116
+ model_response: ModelResponse,
117
+ print_verbose: Callable,
118
+ encoding,
119
+ api_key,
120
+ logging_obj,
121
+ optional_params=None,
122
+ litellm_params=None,
123
+ logger_fn=None,
124
+ ):
125
+ headers = validate_environment(api_key)
126
+ model = model
127
+ prompt = ""
128
+ for message in messages:
129
+ if "role" in message:
130
+ if message["role"] == "user":
131
+ prompt += f"{message['content']}"
132
+ else:
133
+ prompt += f"{message['content']}"
134
+ else:
135
+ prompt += f"{message['content']}"
136
+
137
+ ## Load Config
138
+ config = litellm.AI21Config.get_config()
139
+ for k, v in config.items():
140
+ if (
141
+ k not in optional_params
142
+ ): # completion(top_k=3) > ai21_config(top_k=3) <- allows for dynamic variables to be passed in
143
+ optional_params[k] = v
144
+
145
+ data = {
146
+ "prompt": prompt,
147
+ # "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
148
+ **optional_params,
149
+ }
150
+
151
+ ## LOGGING
152
+ logging_obj.pre_call(
153
+ input=prompt,
154
+ api_key=api_key,
155
+ additional_args={"complete_input_dict": data},
156
+ )
157
+ ## COMPLETION CALL
158
+ response = requests.post(
159
+ api_base + model + "/complete", headers=headers, data=json.dumps(data)
160
+ )
161
+ if response.status_code != 200:
162
+ raise AI21Error(status_code=response.status_code, message=response.text)
163
+ if "stream" in optional_params and optional_params["stream"] == True:
164
+ return response.iter_lines()
165
+ else:
166
+ ## LOGGING
167
+ logging_obj.post_call(
168
+ input=prompt,
169
+ api_key=api_key,
170
+ original_response=response.text,
171
+ additional_args={"complete_input_dict": data},
172
+ )
173
+ ## RESPONSE OBJECT
174
+ completion_response = response.json()
175
+ try:
176
+ choices_list = []
177
+ for idx, item in enumerate(completion_response["completions"]):
178
+ if len(item["data"]["text"]) > 0:
179
+ message_obj = Message(content=item["data"]["text"])
180
+ else:
181
+ message_obj = Message(content=None)
182
+ choice_obj = Choices(
183
+ finish_reason=item["finishReason"]["reason"],
184
+ index=idx + 1,
185
+ message=message_obj,
186
+ )
187
+ choices_list.append(choice_obj)
188
+ model_response["choices"] = choices_list
189
+ except Exception as e:
190
+ raise AI21Error(
191
+ message=traceback.format_exc(), status_code=response.status_code
192
+ )
193
+
194
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
195
+ prompt_tokens = len(encoding.encode(prompt))
196
+ completion_tokens = len(
197
+ encoding.encode(model_response["choices"][0]["message"].get("content"))
198
+ )
199
+
200
+ model_response["created"] = int(time.time())
201
+ model_response["model"] = model
202
+ model_response["usage"] = {
203
+ "prompt_tokens": prompt_tokens,
204
+ "completion_tokens": completion_tokens,
205
+ "total_tokens": prompt_tokens + completion_tokens,
206
+ }
207
+ return model_response
208
+
209
+
210
+ def embedding():
211
+ # logic for parsing in - calling - parsing out model embedding calls
212
+ pass
litellm/llms/aleph_alpha.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ from litellm.utils import ModelResponse, Choices, Message, Usage
9
+ import httpx
10
+
11
+
12
+ class AlephAlphaError(Exception):
13
+ def __init__(self, status_code, message):
14
+ self.status_code = status_code
15
+ self.message = message
16
+ self.request = httpx.Request(
17
+ method="POST", url="https://api.aleph-alpha.com/complete"
18
+ )
19
+ self.response = httpx.Response(status_code=status_code, request=self.request)
20
+ super().__init__(
21
+ self.message
22
+ ) # Call the base class constructor with the parameters it needs
23
+
24
+
25
+ class AlephAlphaConfig:
26
+ """
27
+ Reference: https://docs.aleph-alpha.com/api/complete/
28
+
29
+ The `AlephAlphaConfig` class represents the configuration for the Aleph Alpha API. Here are the properties:
30
+
31
+ - `maximum_tokens` (integer, required): The maximum number of tokens to be generated by the completion. The sum of input tokens and maximum tokens may not exceed 2048.
32
+
33
+ - `minimum_tokens` (integer, optional; default value: 0): Generate at least this number of tokens before an end-of-text token is generated.
34
+
35
+ - `echo` (boolean, optional; default value: false): Whether to echo the prompt in the completion.
36
+
37
+ - `temperature` (number, nullable; default value: 0): Adjusts how creatively the model generates outputs. Use combinations of temperature, top_k, and top_p sensibly.
38
+
39
+ - `top_k` (integer, nullable; default value: 0): Introduces randomness into token generation by considering the top k most likely options.
40
+
41
+ - `top_p` (number, nullable; default value: 0): Adds randomness by considering the smallest set of tokens whose cumulative probability exceeds top_p.
42
+
43
+ - `presence_penalty`, `frequency_penalty`, `sequence_penalty` (number, nullable; default value: 0): Various penalties that can reduce repetition.
44
+
45
+ - `sequence_penalty_min_length` (integer; default value: 2): Minimum number of tokens to be considered as a sequence.
46
+
47
+ - `repetition_penalties_include_prompt`, `repetition_penalties_include_completion`, `use_multiplicative_presence_penalty`,`use_multiplicative_frequency_penalty`,`use_multiplicative_sequence_penalty` (boolean, nullable; default value: false): Various settings that adjust how the repetition penalties are applied.
48
+
49
+ - `penalty_bias` (string, nullable): Text used in addition to the penalized tokens for repetition penalties.
50
+
51
+ - `penalty_exceptions` (string[], nullable): Strings that may be generated without penalty.
52
+
53
+ - `penalty_exceptions_include_stop_sequences` (boolean, nullable; default value: true): Include all stop_sequences in penalty_exceptions.
54
+
55
+ - `best_of` (integer, nullable; default value: 1): The number of completions will be generated on the server side.
56
+
57
+ - `n` (integer, nullable; default value: 1): The number of completions to return.
58
+
59
+ - `logit_bias` (object, nullable): Adjust the logit scores before sampling.
60
+
61
+ - `log_probs` (integer, nullable): Number of top log probabilities for each token generated.
62
+
63
+ - `stop_sequences` (string[], nullable): List of strings that will stop generation if they're generated.
64
+
65
+ - `tokens` (boolean, nullable; default value: false): Flag indicating whether individual tokens of the completion should be returned or not.
66
+
67
+ - `raw_completion` (boolean; default value: false): if True, the raw completion of the model will be returned.
68
+
69
+ - `disable_optimizations` (boolean, nullable; default value: false): Disables any applied optimizations to both your prompt and completion.
70
+
71
+ - `completion_bias_inclusion`, `completion_bias_exclusion` (string[], default value: []): Set of strings to bias the generation of tokens.
72
+
73
+ - `completion_bias_inclusion_first_token_only`, `completion_bias_exclusion_first_token_only` (boolean; default value: false): Consider only the first token for the completion_bias_inclusion/exclusion.
74
+
75
+ - `contextual_control_threshold` (number, nullable): Control over how similar tokens are controlled.
76
+
77
+ - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
78
+ """
79
+
80
+ maximum_tokens: Optional[
81
+ int
82
+ ] = litellm.max_tokens # aleph alpha requires max tokens
83
+ minimum_tokens: Optional[int] = None
84
+ echo: Optional[bool] = None
85
+ temperature: Optional[int] = None
86
+ top_k: Optional[int] = None
87
+ top_p: Optional[int] = None
88
+ presence_penalty: Optional[int] = None
89
+ frequency_penalty: Optional[int] = None
90
+ sequence_penalty: Optional[int] = None
91
+ sequence_penalty_min_length: Optional[int] = None
92
+ repetition_penalties_include_prompt: Optional[bool] = None
93
+ repetition_penalties_include_completion: Optional[bool] = None
94
+ use_multiplicative_presence_penalty: Optional[bool] = None
95
+ use_multiplicative_frequency_penalty: Optional[bool] = None
96
+ use_multiplicative_sequence_penalty: Optional[bool] = None
97
+ penalty_bias: Optional[str] = None
98
+ penalty_exceptions_include_stop_sequences: Optional[bool] = None
99
+ best_of: Optional[int] = None
100
+ n: Optional[int] = None
101
+ logit_bias: Optional[dict] = None
102
+ log_probs: Optional[int] = None
103
+ stop_sequences: Optional[list] = None
104
+ tokens: Optional[bool] = None
105
+ raw_completion: Optional[bool] = None
106
+ disable_optimizations: Optional[bool] = None
107
+ completion_bias_inclusion: Optional[list] = None
108
+ completion_bias_exclusion: Optional[list] = None
109
+ completion_bias_inclusion_first_token_only: Optional[bool] = None
110
+ completion_bias_exclusion_first_token_only: Optional[bool] = None
111
+ contextual_control_threshold: Optional[int] = None
112
+ control_log_additive: Optional[bool] = None
113
+
114
+ def __init__(
115
+ self,
116
+ maximum_tokens: Optional[int] = None,
117
+ minimum_tokens: Optional[int] = None,
118
+ echo: Optional[bool] = None,
119
+ temperature: Optional[int] = None,
120
+ top_k: Optional[int] = None,
121
+ top_p: Optional[int] = None,
122
+ presence_penalty: Optional[int] = None,
123
+ frequency_penalty: Optional[int] = None,
124
+ sequence_penalty: Optional[int] = None,
125
+ sequence_penalty_min_length: Optional[int] = None,
126
+ repetition_penalties_include_prompt: Optional[bool] = None,
127
+ repetition_penalties_include_completion: Optional[bool] = None,
128
+ use_multiplicative_presence_penalty: Optional[bool] = None,
129
+ use_multiplicative_frequency_penalty: Optional[bool] = None,
130
+ use_multiplicative_sequence_penalty: Optional[bool] = None,
131
+ penalty_bias: Optional[str] = None,
132
+ penalty_exceptions_include_stop_sequences: Optional[bool] = None,
133
+ best_of: Optional[int] = None,
134
+ n: Optional[int] = None,
135
+ logit_bias: Optional[dict] = None,
136
+ log_probs: Optional[int] = None,
137
+ stop_sequences: Optional[list] = None,
138
+ tokens: Optional[bool] = None,
139
+ raw_completion: Optional[bool] = None,
140
+ disable_optimizations: Optional[bool] = None,
141
+ completion_bias_inclusion: Optional[list] = None,
142
+ completion_bias_exclusion: Optional[list] = None,
143
+ completion_bias_inclusion_first_token_only: Optional[bool] = None,
144
+ completion_bias_exclusion_first_token_only: Optional[bool] = None,
145
+ contextual_control_threshold: Optional[int] = None,
146
+ control_log_additive: Optional[bool] = None,
147
+ ) -> None:
148
+ locals_ = locals()
149
+ for key, value in locals_.items():
150
+ if key != "self" and value is not None:
151
+ setattr(self.__class__, key, value)
152
+
153
+ @classmethod
154
+ def get_config(cls):
155
+ return {
156
+ k: v
157
+ for k, v in cls.__dict__.items()
158
+ if not k.startswith("__")
159
+ and not isinstance(
160
+ v,
161
+ (
162
+ types.FunctionType,
163
+ types.BuiltinFunctionType,
164
+ classmethod,
165
+ staticmethod,
166
+ ),
167
+ )
168
+ and v is not None
169
+ }
170
+
171
+
172
+ def validate_environment(api_key):
173
+ headers = {
174
+ "accept": "application/json",
175
+ "content-type": "application/json",
176
+ }
177
+ if api_key:
178
+ headers["Authorization"] = f"Bearer {api_key}"
179
+ return headers
180
+
181
+
182
+ def completion(
183
+ model: str,
184
+ messages: list,
185
+ api_base: str,
186
+ model_response: ModelResponse,
187
+ print_verbose: Callable,
188
+ encoding,
189
+ api_key,
190
+ logging_obj,
191
+ optional_params=None,
192
+ litellm_params=None,
193
+ logger_fn=None,
194
+ default_max_tokens_to_sample=None,
195
+ ):
196
+ headers = validate_environment(api_key)
197
+
198
+ ## Load Config
199
+ config = litellm.AlephAlphaConfig.get_config()
200
+ for k, v in config.items():
201
+ if (
202
+ k not in optional_params
203
+ ): # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
204
+ optional_params[k] = v
205
+
206
+ completion_url = api_base
207
+ model = model
208
+ prompt = ""
209
+ if "control" in model: # follow the ###Instruction / ###Response format
210
+ for idx, message in enumerate(messages):
211
+ if "role" in message:
212
+ if (
213
+ idx == 0
214
+ ): # set first message as instruction (required), let later user messages be input
215
+ prompt += f"###Instruction: {message['content']}"
216
+ else:
217
+ if message["role"] == "system":
218
+ prompt += f"###Instruction: {message['content']}"
219
+ elif message["role"] == "user":
220
+ prompt += f"###Input: {message['content']}"
221
+ else:
222
+ prompt += f"###Response: {message['content']}"
223
+ else:
224
+ prompt += f"{message['content']}"
225
+ else:
226
+ prompt = " ".join(message["content"] for message in messages)
227
+ data = {
228
+ "model": model,
229
+ "prompt": prompt,
230
+ **optional_params,
231
+ }
232
+
233
+ ## LOGGING
234
+ logging_obj.pre_call(
235
+ input=prompt,
236
+ api_key=api_key,
237
+ additional_args={"complete_input_dict": data},
238
+ )
239
+ ## COMPLETION CALL
240
+ response = requests.post(
241
+ completion_url,
242
+ headers=headers,
243
+ data=json.dumps(data),
244
+ stream=optional_params["stream"] if "stream" in optional_params else False,
245
+ )
246
+ if "stream" in optional_params and optional_params["stream"] == True:
247
+ return response.iter_lines()
248
+ else:
249
+ ## LOGGING
250
+ logging_obj.post_call(
251
+ input=prompt,
252
+ api_key=api_key,
253
+ original_response=response.text,
254
+ additional_args={"complete_input_dict": data},
255
+ )
256
+ print_verbose(f"raw model_response: {response.text}")
257
+ ## RESPONSE OBJECT
258
+ completion_response = response.json()
259
+ if "error" in completion_response:
260
+ raise AlephAlphaError(
261
+ message=completion_response["error"],
262
+ status_code=response.status_code,
263
+ )
264
+ else:
265
+ try:
266
+ choices_list = []
267
+ for idx, item in enumerate(completion_response["completions"]):
268
+ if len(item["completion"]) > 0:
269
+ message_obj = Message(content=item["completion"])
270
+ else:
271
+ message_obj = Message(content=None)
272
+ choice_obj = Choices(
273
+ finish_reason=item["finish_reason"],
274
+ index=idx + 1,
275
+ message=message_obj,
276
+ )
277
+ choices_list.append(choice_obj)
278
+ model_response["choices"] = choices_list
279
+ except:
280
+ raise AlephAlphaError(
281
+ message=json.dumps(completion_response),
282
+ status_code=response.status_code,
283
+ )
284
+
285
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
286
+ prompt_tokens = len(encoding.encode(prompt))
287
+ completion_tokens = len(
288
+ encoding.encode(model_response["choices"][0]["message"]["content"])
289
+ )
290
+
291
+ model_response["created"] = int(time.time())
292
+ model_response["model"] = model
293
+ usage = Usage(
294
+ prompt_tokens=prompt_tokens,
295
+ completion_tokens=completion_tokens,
296
+ total_tokens=prompt_tokens + completion_tokens,
297
+ )
298
+ model_response.usage = usage
299
+ return model_response
300
+
301
+
302
+ def embedding():
303
+ # logic for parsing in - calling - parsing out model embedding calls
304
+ pass
litellm/llms/anthropic.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Usage
8
+ import litellm
9
+ from .prompt_templates.factory import prompt_factory, custom_prompt
10
+ import httpx
11
+
12
+
13
+ class AnthropicConstants(Enum):
14
+ HUMAN_PROMPT = "\n\nHuman: "
15
+ AI_PROMPT = "\n\nAssistant: "
16
+
17
+
18
+ class AnthropicError(Exception):
19
+ def __init__(self, status_code, message):
20
+ self.status_code = status_code
21
+ self.message = message
22
+ self.request = httpx.Request(
23
+ method="POST", url="https://api.anthropic.com/v1/complete"
24
+ )
25
+ self.response = httpx.Response(status_code=status_code, request=self.request)
26
+ super().__init__(
27
+ self.message
28
+ ) # Call the base class constructor with the parameters it needs
29
+
30
+
31
+ class AnthropicConfig:
32
+ """
33
+ Reference: https://docs.anthropic.com/claude/reference/complete_post
34
+
35
+ to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
36
+ """
37
+
38
+ max_tokens_to_sample: Optional[
39
+ int
40
+ ] = litellm.max_tokens # anthropic requires a default
41
+ stop_sequences: Optional[list] = None
42
+ temperature: Optional[int] = None
43
+ top_p: Optional[int] = None
44
+ top_k: Optional[int] = None
45
+ metadata: Optional[dict] = None
46
+
47
+ def __init__(
48
+ self,
49
+ max_tokens_to_sample: Optional[int] = 256, # anthropic requires a default
50
+ stop_sequences: Optional[list] = None,
51
+ temperature: Optional[int] = None,
52
+ top_p: Optional[int] = None,
53
+ top_k: Optional[int] = None,
54
+ metadata: Optional[dict] = None,
55
+ ) -> None:
56
+ locals_ = locals()
57
+ for key, value in locals_.items():
58
+ if key != "self" and value is not None:
59
+ setattr(self.__class__, key, value)
60
+
61
+ @classmethod
62
+ def get_config(cls):
63
+ return {
64
+ k: v
65
+ for k, v in cls.__dict__.items()
66
+ if not k.startswith("__")
67
+ and not isinstance(
68
+ v,
69
+ (
70
+ types.FunctionType,
71
+ types.BuiltinFunctionType,
72
+ classmethod,
73
+ staticmethod,
74
+ ),
75
+ )
76
+ and v is not None
77
+ }
78
+
79
+
80
+ # makes headers for API call
81
+ def validate_environment(api_key):
82
+ if api_key is None:
83
+ raise ValueError(
84
+ "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
85
+ )
86
+ headers = {
87
+ "accept": "application/json",
88
+ "anthropic-version": "2023-06-01",
89
+ "content-type": "application/json",
90
+ "x-api-key": api_key,
91
+ }
92
+ return headers
93
+
94
+
95
+ def completion(
96
+ model: str,
97
+ messages: list,
98
+ api_base: str,
99
+ custom_prompt_dict: dict,
100
+ model_response: ModelResponse,
101
+ print_verbose: Callable,
102
+ encoding,
103
+ api_key,
104
+ logging_obj,
105
+ optional_params=None,
106
+ litellm_params=None,
107
+ logger_fn=None,
108
+ ):
109
+ headers = validate_environment(api_key)
110
+ if model in custom_prompt_dict:
111
+ # check if the model has a registered custom prompt
112
+ model_prompt_details = custom_prompt_dict[model]
113
+ prompt = custom_prompt(
114
+ role_dict=model_prompt_details["roles"],
115
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
116
+ final_prompt_value=model_prompt_details["final_prompt_value"],
117
+ messages=messages,
118
+ )
119
+ else:
120
+ prompt = prompt_factory(
121
+ model=model, messages=messages, custom_llm_provider="anthropic"
122
+ )
123
+
124
+ ## Load Config
125
+ config = litellm.AnthropicConfig.get_config()
126
+ for k, v in config.items():
127
+ if (
128
+ k not in optional_params
129
+ ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
130
+ optional_params[k] = v
131
+
132
+ data = {
133
+ "model": model,
134
+ "prompt": prompt,
135
+ **optional_params,
136
+ }
137
+
138
+ ## LOGGING
139
+ logging_obj.pre_call(
140
+ input=prompt,
141
+ api_key=api_key,
142
+ additional_args={"complete_input_dict": data, "api_base": api_base},
143
+ )
144
+
145
+ ## COMPLETION CALL
146
+ if "stream" in optional_params and optional_params["stream"] == True:
147
+ response = requests.post(
148
+ api_base,
149
+ headers=headers,
150
+ data=json.dumps(data),
151
+ stream=optional_params["stream"],
152
+ )
153
+
154
+ if response.status_code != 200:
155
+ raise AnthropicError(
156
+ status_code=response.status_code, message=response.text
157
+ )
158
+
159
+ return response.iter_lines()
160
+ else:
161
+ response = requests.post(api_base, headers=headers, data=json.dumps(data))
162
+ if response.status_code != 200:
163
+ raise AnthropicError(
164
+ status_code=response.status_code, message=response.text
165
+ )
166
+
167
+ ## LOGGING
168
+ logging_obj.post_call(
169
+ input=prompt,
170
+ api_key=api_key,
171
+ original_response=response.text,
172
+ additional_args={"complete_input_dict": data},
173
+ )
174
+ print_verbose(f"raw model_response: {response.text}")
175
+ ## RESPONSE OBJECT
176
+ try:
177
+ completion_response = response.json()
178
+ except:
179
+ raise AnthropicError(
180
+ message=response.text, status_code=response.status_code
181
+ )
182
+ if "error" in completion_response:
183
+ raise AnthropicError(
184
+ message=str(completion_response["error"]),
185
+ status_code=response.status_code,
186
+ )
187
+ else:
188
+ if len(completion_response["completion"]) > 0:
189
+ model_response["choices"][0]["message"][
190
+ "content"
191
+ ] = completion_response["completion"]
192
+ model_response.choices[0].finish_reason = completion_response["stop_reason"]
193
+
194
+ ## CALCULATING USAGE
195
+ prompt_tokens = len(
196
+ encoding.encode(prompt)
197
+ ) ##[TODO] use the anthropic tokenizer here
198
+ completion_tokens = len(
199
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
200
+ ) ##[TODO] use the anthropic tokenizer here
201
+
202
+ model_response["created"] = int(time.time())
203
+ model_response["model"] = model
204
+ usage = Usage(
205
+ prompt_tokens=prompt_tokens,
206
+ completion_tokens=completion_tokens,
207
+ total_tokens=prompt_tokens + completion_tokens,
208
+ )
209
+ model_response.usage = usage
210
+ return model_response
211
+
212
+
213
+ def embedding():
214
+ # logic for parsing in - calling - parsing out model embedding calls
215
+ pass
litellm/llms/azure.py ADDED
@@ -0,0 +1,799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Union, Any
2
+ import types, requests
3
+ from .base import BaseLLM
4
+ from litellm.utils import (
5
+ ModelResponse,
6
+ Choices,
7
+ Message,
8
+ CustomStreamWrapper,
9
+ convert_to_model_response_object,
10
+ )
11
+ from typing import Callable, Optional
12
+ from litellm import OpenAIConfig
13
+ import litellm, json
14
+ import httpx
15
+ from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
16
+ from openai import AzureOpenAI, AsyncAzureOpenAI
17
+
18
+
19
+ class AzureOpenAIError(Exception):
20
+ def __init__(
21
+ self,
22
+ status_code,
23
+ message,
24
+ request: Optional[httpx.Request] = None,
25
+ response: Optional[httpx.Response] = None,
26
+ ):
27
+ self.status_code = status_code
28
+ self.message = message
29
+ if request:
30
+ self.request = request
31
+ else:
32
+ self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
33
+ if response:
34
+ self.response = response
35
+ else:
36
+ self.response = httpx.Response(
37
+ status_code=status_code, request=self.request
38
+ )
39
+ super().__init__(
40
+ self.message
41
+ ) # Call the base class constructor with the parameters it needs
42
+
43
+
44
+ class AzureOpenAIConfig(OpenAIConfig):
45
+ """
46
+ Reference: https://platform.openai.com/docs/api-reference/chat/create
47
+
48
+ The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. It inherits from `OpenAIConfig`. Below are the parameters::
49
+
50
+ - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
51
+
52
+ - `function_call` (string or object): This optional parameter controls how the model calls functions.
53
+
54
+ - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
55
+
56
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
57
+
58
+ - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
59
+
60
+ - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
61
+
62
+ - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
63
+
64
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
65
+
66
+ - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
67
+
68
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ frequency_penalty: Optional[int] = None,
74
+ function_call: Optional[Union[str, dict]] = None,
75
+ functions: Optional[list] = None,
76
+ logit_bias: Optional[dict] = None,
77
+ max_tokens: Optional[int] = None,
78
+ n: Optional[int] = None,
79
+ presence_penalty: Optional[int] = None,
80
+ stop: Optional[Union[str, list]] = None,
81
+ temperature: Optional[int] = None,
82
+ top_p: Optional[int] = None,
83
+ ) -> None:
84
+ super().__init__(
85
+ frequency_penalty,
86
+ function_call,
87
+ functions,
88
+ logit_bias,
89
+ max_tokens,
90
+ n,
91
+ presence_penalty,
92
+ stop,
93
+ temperature,
94
+ top_p,
95
+ )
96
+
97
+
98
+ class AzureChatCompletion(BaseLLM):
99
+ def __init__(self) -> None:
100
+ super().__init__()
101
+
102
+ def validate_environment(self, api_key, azure_ad_token):
103
+ headers = {
104
+ "content-type": "application/json",
105
+ }
106
+ if api_key is not None:
107
+ headers["api-key"] = api_key
108
+ elif azure_ad_token is not None:
109
+ headers["Authorization"] = f"Bearer {azure_ad_token}"
110
+ return headers
111
+
112
+ def completion(
113
+ self,
114
+ model: str,
115
+ messages: list,
116
+ model_response: ModelResponse,
117
+ api_key: str,
118
+ api_base: str,
119
+ api_version: str,
120
+ api_type: str,
121
+ azure_ad_token: str,
122
+ print_verbose: Callable,
123
+ timeout,
124
+ logging_obj,
125
+ optional_params,
126
+ litellm_params,
127
+ logger_fn,
128
+ acompletion: bool = False,
129
+ headers: Optional[dict] = None,
130
+ client=None,
131
+ ):
132
+ super().completion()
133
+ exception_mapping_worked = False
134
+ try:
135
+ if model is None or messages is None:
136
+ raise AzureOpenAIError(
137
+ status_code=422, message=f"Missing model or messages"
138
+ )
139
+
140
+ max_retries = optional_params.pop("max_retries", 2)
141
+
142
+ ### CHECK IF CLOUDFLARE AI GATEWAY ###
143
+ ### if so - set the model as part of the base url
144
+ if "gateway.ai.cloudflare.com" in api_base:
145
+ ## build base url - assume api base includes resource name
146
+ if client is None:
147
+ if not api_base.endswith("/"):
148
+ api_base += "/"
149
+ api_base += f"{model}"
150
+
151
+ azure_client_params = {
152
+ "api_version": api_version,
153
+ "base_url": f"{api_base}",
154
+ "http_client": litellm.client_session,
155
+ "max_retries": max_retries,
156
+ "timeout": timeout,
157
+ }
158
+ if api_key is not None:
159
+ azure_client_params["api_key"] = api_key
160
+ elif azure_ad_token is not None:
161
+ azure_client_params["azure_ad_token"] = azure_ad_token
162
+
163
+ if acompletion is True:
164
+ client = AsyncAzureOpenAI(**azure_client_params)
165
+ else:
166
+ client = AzureOpenAI(**azure_client_params)
167
+
168
+ data = {"model": None, "messages": messages, **optional_params}
169
+ else:
170
+ data = {
171
+ "model": model, # type: ignore
172
+ "messages": messages,
173
+ **optional_params,
174
+ }
175
+
176
+ if acompletion is True:
177
+ if optional_params.get("stream", False):
178
+ return self.async_streaming(
179
+ logging_obj=logging_obj,
180
+ api_base=api_base,
181
+ data=data,
182
+ model=model,
183
+ api_key=api_key,
184
+ api_version=api_version,
185
+ azure_ad_token=azure_ad_token,
186
+ timeout=timeout,
187
+ client=client,
188
+ )
189
+ else:
190
+ return self.acompletion(
191
+ api_base=api_base,
192
+ data=data,
193
+ model_response=model_response,
194
+ api_key=api_key,
195
+ api_version=api_version,
196
+ model=model,
197
+ azure_ad_token=azure_ad_token,
198
+ timeout=timeout,
199
+ client=client,
200
+ logging_obj=logging_obj,
201
+ )
202
+ elif "stream" in optional_params and optional_params["stream"] == True:
203
+ return self.streaming(
204
+ logging_obj=logging_obj,
205
+ api_base=api_base,
206
+ data=data,
207
+ model=model,
208
+ api_key=api_key,
209
+ api_version=api_version,
210
+ azure_ad_token=azure_ad_token,
211
+ timeout=timeout,
212
+ client=client,
213
+ )
214
+ else:
215
+ ## LOGGING
216
+ logging_obj.pre_call(
217
+ input=messages,
218
+ api_key=api_key,
219
+ additional_args={
220
+ "headers": {
221
+ "api_key": api_key,
222
+ "azure_ad_token": azure_ad_token,
223
+ },
224
+ "api_version": api_version,
225
+ "api_base": api_base,
226
+ "complete_input_dict": data,
227
+ },
228
+ )
229
+ if not isinstance(max_retries, int):
230
+ raise AzureOpenAIError(
231
+ status_code=422, message="max retries must be an int"
232
+ )
233
+ # init AzureOpenAI Client
234
+ azure_client_params = {
235
+ "api_version": api_version,
236
+ "azure_endpoint": api_base,
237
+ "azure_deployment": model,
238
+ "http_client": litellm.client_session,
239
+ "max_retries": max_retries,
240
+ "timeout": timeout,
241
+ }
242
+ if api_key is not None:
243
+ azure_client_params["api_key"] = api_key
244
+ elif azure_ad_token is not None:
245
+ azure_client_params["azure_ad_token"] = azure_ad_token
246
+ if client is None:
247
+ azure_client = AzureOpenAI(**azure_client_params)
248
+ else:
249
+ azure_client = client
250
+ response = azure_client.chat.completions.create(**data, timeout=timeout) # type: ignore
251
+ stringified_response = response.model_dump()
252
+ ## LOGGING
253
+ logging_obj.post_call(
254
+ input=messages,
255
+ api_key=api_key,
256
+ original_response=stringified_response,
257
+ additional_args={
258
+ "headers": headers,
259
+ "api_version": api_version,
260
+ "api_base": api_base,
261
+ },
262
+ )
263
+ return convert_to_model_response_object(
264
+ response_object=stringified_response,
265
+ model_response_object=model_response,
266
+ )
267
+ except AzureOpenAIError as e:
268
+ exception_mapping_worked = True
269
+ raise e
270
+ except Exception as e:
271
+ if hasattr(e, "status_code"):
272
+ raise AzureOpenAIError(status_code=e.status_code, message=str(e))
273
+ else:
274
+ raise AzureOpenAIError(status_code=500, message=str(e))
275
+
276
+ async def acompletion(
277
+ self,
278
+ api_key: str,
279
+ api_version: str,
280
+ model: str,
281
+ api_base: str,
282
+ data: dict,
283
+ timeout: Any,
284
+ model_response: ModelResponse,
285
+ azure_ad_token: Optional[str] = None,
286
+ client=None, # this is the AsyncAzureOpenAI
287
+ logging_obj=None,
288
+ ):
289
+ response = None
290
+ try:
291
+ max_retries = data.pop("max_retries", 2)
292
+ if not isinstance(max_retries, int):
293
+ raise AzureOpenAIError(
294
+ status_code=422, message="max retries must be an int"
295
+ )
296
+
297
+ # init AzureOpenAI Client
298
+ azure_client_params = {
299
+ "api_version": api_version,
300
+ "azure_endpoint": api_base,
301
+ "azure_deployment": model,
302
+ "http_client": litellm.client_session,
303
+ "max_retries": max_retries,
304
+ "timeout": timeout,
305
+ }
306
+ if api_key is not None:
307
+ azure_client_params["api_key"] = api_key
308
+ elif azure_ad_token is not None:
309
+ azure_client_params["azure_ad_token"] = azure_ad_token
310
+ if client is None:
311
+ azure_client = AsyncAzureOpenAI(**azure_client_params)
312
+ else:
313
+ azure_client = client
314
+ ## LOGGING
315
+ logging_obj.pre_call(
316
+ input=data["messages"],
317
+ api_key=azure_client.api_key,
318
+ additional_args={
319
+ "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
320
+ "api_base": azure_client._base_url._uri_reference,
321
+ "acompletion": True,
322
+ "complete_input_dict": data,
323
+ },
324
+ )
325
+ response = await azure_client.chat.completions.create(
326
+ **data, timeout=timeout
327
+ )
328
+ return convert_to_model_response_object(
329
+ response_object=response.model_dump(),
330
+ model_response_object=model_response,
331
+ )
332
+ except AzureOpenAIError as e:
333
+ exception_mapping_worked = True
334
+ raise e
335
+ except Exception as e:
336
+ if hasattr(e, "status_code"):
337
+ raise e
338
+ else:
339
+ raise AzureOpenAIError(status_code=500, message=str(e))
340
+
341
+ def streaming(
342
+ self,
343
+ logging_obj,
344
+ api_base: str,
345
+ api_key: str,
346
+ api_version: str,
347
+ data: dict,
348
+ model: str,
349
+ timeout: Any,
350
+ azure_ad_token: Optional[str] = None,
351
+ client=None,
352
+ ):
353
+ max_retries = data.pop("max_retries", 2)
354
+ if not isinstance(max_retries, int):
355
+ raise AzureOpenAIError(
356
+ status_code=422, message="max retries must be an int"
357
+ )
358
+ # init AzureOpenAI Client
359
+ azure_client_params = {
360
+ "api_version": api_version,
361
+ "azure_endpoint": api_base,
362
+ "azure_deployment": model,
363
+ "http_client": litellm.client_session,
364
+ "max_retries": max_retries,
365
+ "timeout": timeout,
366
+ }
367
+ if api_key is not None:
368
+ azure_client_params["api_key"] = api_key
369
+ elif azure_ad_token is not None:
370
+ azure_client_params["azure_ad_token"] = azure_ad_token
371
+ if client is None:
372
+ azure_client = AzureOpenAI(**azure_client_params)
373
+ else:
374
+ azure_client = client
375
+ ## LOGGING
376
+ logging_obj.pre_call(
377
+ input=data["messages"],
378
+ api_key=azure_client.api_key,
379
+ additional_args={
380
+ "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
381
+ "api_base": azure_client._base_url._uri_reference,
382
+ "acompletion": True,
383
+ "complete_input_dict": data,
384
+ },
385
+ )
386
+ response = azure_client.chat.completions.create(**data, timeout=timeout)
387
+ streamwrapper = CustomStreamWrapper(
388
+ completion_stream=response,
389
+ model=model,
390
+ custom_llm_provider="azure",
391
+ logging_obj=logging_obj,
392
+ )
393
+ return streamwrapper
394
+
395
+ async def async_streaming(
396
+ self,
397
+ logging_obj,
398
+ api_base: str,
399
+ api_key: str,
400
+ api_version: str,
401
+ data: dict,
402
+ model: str,
403
+ timeout: Any,
404
+ azure_ad_token: Optional[str] = None,
405
+ client=None,
406
+ ):
407
+ try:
408
+ # init AzureOpenAI Client
409
+ azure_client_params = {
410
+ "api_version": api_version,
411
+ "azure_endpoint": api_base,
412
+ "azure_deployment": model,
413
+ "http_client": litellm.client_session,
414
+ "max_retries": data.pop("max_retries", 2),
415
+ "timeout": timeout,
416
+ }
417
+ if api_key is not None:
418
+ azure_client_params["api_key"] = api_key
419
+ elif azure_ad_token is not None:
420
+ azure_client_params["azure_ad_token"] = azure_ad_token
421
+ if client is None:
422
+ azure_client = AsyncAzureOpenAI(**azure_client_params)
423
+ else:
424
+ azure_client = client
425
+ ## LOGGING
426
+ logging_obj.pre_call(
427
+ input=data["messages"],
428
+ api_key=azure_client.api_key,
429
+ additional_args={
430
+ "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
431
+ "api_base": azure_client._base_url._uri_reference,
432
+ "acompletion": True,
433
+ "complete_input_dict": data,
434
+ },
435
+ )
436
+ response = await azure_client.chat.completions.create(
437
+ **data, timeout=timeout
438
+ )
439
+ # return response
440
+ streamwrapper = CustomStreamWrapper(
441
+ completion_stream=response,
442
+ model=model,
443
+ custom_llm_provider="azure",
444
+ logging_obj=logging_obj,
445
+ )
446
+ return streamwrapper ## DO NOT make this into an async for ... loop, it will yield an async generator, which won't raise errors if the response fails
447
+ except Exception as e:
448
+ if hasattr(e, "status_code"):
449
+ raise AzureOpenAIError(status_code=e.status_code, message=str(e))
450
+ else:
451
+ raise AzureOpenAIError(status_code=500, message=str(e))
452
+
453
+ async def aembedding(
454
+ self,
455
+ data: dict,
456
+ model_response: ModelResponse,
457
+ azure_client_params: dict,
458
+ api_key: str,
459
+ input: list,
460
+ client=None,
461
+ logging_obj=None,
462
+ timeout=None,
463
+ ):
464
+ response = None
465
+ try:
466
+ if client is None:
467
+ openai_aclient = AsyncAzureOpenAI(**azure_client_params)
468
+ else:
469
+ openai_aclient = client
470
+ response = await openai_aclient.embeddings.create(**data, timeout=timeout)
471
+ stringified_response = response.model_dump()
472
+ ## LOGGING
473
+ logging_obj.post_call(
474
+ input=input,
475
+ api_key=api_key,
476
+ additional_args={"complete_input_dict": data},
477
+ original_response=stringified_response,
478
+ )
479
+ return convert_to_model_response_object(
480
+ response_object=stringified_response,
481
+ model_response_object=model_response,
482
+ response_type="embedding",
483
+ )
484
+ except Exception as e:
485
+ ## LOGGING
486
+ logging_obj.post_call(
487
+ input=input,
488
+ api_key=api_key,
489
+ additional_args={"complete_input_dict": data},
490
+ original_response=str(e),
491
+ )
492
+ raise e
493
+
494
+ def embedding(
495
+ self,
496
+ model: str,
497
+ input: list,
498
+ api_key: str,
499
+ api_base: str,
500
+ api_version: str,
501
+ timeout: float,
502
+ logging_obj=None,
503
+ model_response=None,
504
+ optional_params=None,
505
+ azure_ad_token: Optional[str] = None,
506
+ client=None,
507
+ aembedding=None,
508
+ ):
509
+ super().embedding()
510
+ exception_mapping_worked = False
511
+ if self._client_session is None:
512
+ self._client_session = self.create_client_session()
513
+ try:
514
+ data = {"model": model, "input": input, **optional_params}
515
+ max_retries = data.pop("max_retries", 2)
516
+ if not isinstance(max_retries, int):
517
+ raise AzureOpenAIError(
518
+ status_code=422, message="max retries must be an int"
519
+ )
520
+
521
+ # init AzureOpenAI Client
522
+ azure_client_params = {
523
+ "api_version": api_version,
524
+ "azure_endpoint": api_base,
525
+ "azure_deployment": model,
526
+ "http_client": litellm.client_session,
527
+ "max_retries": max_retries,
528
+ "timeout": timeout,
529
+ }
530
+ if api_key is not None:
531
+ azure_client_params["api_key"] = api_key
532
+ elif azure_ad_token is not None:
533
+ azure_client_params["azure_ad_token"] = azure_ad_token
534
+
535
+ ## LOGGING
536
+ logging_obj.pre_call(
537
+ input=input,
538
+ api_key=api_key,
539
+ additional_args={
540
+ "complete_input_dict": data,
541
+ "headers": {"api_key": api_key, "azure_ad_token": azure_ad_token},
542
+ },
543
+ )
544
+
545
+ if aembedding == True:
546
+ response = self.aembedding(
547
+ data=data,
548
+ input=input,
549
+ logging_obj=logging_obj,
550
+ api_key=api_key,
551
+ model_response=model_response,
552
+ azure_client_params=azure_client_params,
553
+ timeout=timeout,
554
+ )
555
+ return response
556
+ if client is None:
557
+ azure_client = AzureOpenAI(**azure_client_params) # type: ignore
558
+ else:
559
+ azure_client = client
560
+ ## COMPLETION CALL
561
+ response = azure_client.embeddings.create(**data, timeout=timeout) # type: ignore
562
+ ## LOGGING
563
+ logging_obj.post_call(
564
+ input=input,
565
+ api_key=api_key,
566
+ additional_args={"complete_input_dict": data, "api_base": api_base},
567
+ original_response=response,
568
+ )
569
+
570
+ return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
571
+ except AzureOpenAIError as e:
572
+ exception_mapping_worked = True
573
+ raise e
574
+ except Exception as e:
575
+ if hasattr(e, "status_code"):
576
+ raise AzureOpenAIError(status_code=e.status_code, message=str(e))
577
+ else:
578
+ raise AzureOpenAIError(status_code=500, message=str(e))
579
+
580
+ async def aimage_generation(
581
+ self,
582
+ data: dict,
583
+ model_response: ModelResponse,
584
+ azure_client_params: dict,
585
+ api_key: str,
586
+ input: list,
587
+ client=None,
588
+ logging_obj=None,
589
+ timeout=None,
590
+ ):
591
+ response = None
592
+ try:
593
+ if client is None:
594
+ client_session = litellm.aclient_session or httpx.AsyncClient(
595
+ transport=AsyncCustomHTTPTransport(),
596
+ )
597
+ openai_aclient = AsyncAzureOpenAI(
598
+ http_client=client_session, **azure_client_params
599
+ )
600
+ else:
601
+ openai_aclient = client
602
+ response = await openai_aclient.images.generate(**data, timeout=timeout)
603
+ stringified_response = response.model_dump()
604
+ ## LOGGING
605
+ logging_obj.post_call(
606
+ input=input,
607
+ api_key=api_key,
608
+ additional_args={"complete_input_dict": data},
609
+ original_response=stringified_response,
610
+ )
611
+ return convert_to_model_response_object(
612
+ response_object=stringified_response,
613
+ model_response_object=model_response,
614
+ response_type="image_generation",
615
+ )
616
+ except Exception as e:
617
+ ## LOGGING
618
+ logging_obj.post_call(
619
+ input=input,
620
+ api_key=api_key,
621
+ additional_args={"complete_input_dict": data},
622
+ original_response=str(e),
623
+ )
624
+ raise e
625
+
626
+ def image_generation(
627
+ self,
628
+ prompt: str,
629
+ timeout: float,
630
+ model: Optional[str] = None,
631
+ api_key: Optional[str] = None,
632
+ api_base: Optional[str] = None,
633
+ api_version: Optional[str] = None,
634
+ model_response: Optional[litellm.utils.ImageResponse] = None,
635
+ azure_ad_token: Optional[str] = None,
636
+ logging_obj=None,
637
+ optional_params=None,
638
+ client=None,
639
+ aimg_generation=None,
640
+ ):
641
+ exception_mapping_worked = False
642
+ try:
643
+ if model and len(model) > 0:
644
+ model = model
645
+ else:
646
+ model = None
647
+ data = {"model": model, "prompt": prompt, **optional_params}
648
+ max_retries = data.pop("max_retries", 2)
649
+ if not isinstance(max_retries, int):
650
+ raise AzureOpenAIError(
651
+ status_code=422, message="max retries must be an int"
652
+ )
653
+
654
+ # init AzureOpenAI Client
655
+ azure_client_params = {
656
+ "api_version": api_version,
657
+ "azure_endpoint": api_base,
658
+ "azure_deployment": model,
659
+ "max_retries": max_retries,
660
+ "timeout": timeout,
661
+ }
662
+ if api_key is not None:
663
+ azure_client_params["api_key"] = api_key
664
+ elif azure_ad_token is not None:
665
+ azure_client_params["azure_ad_token"] = azure_ad_token
666
+
667
+ if aimg_generation == True:
668
+ response = self.aimage_generation(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_key=api_key, client=client, azure_client_params=azure_client_params, timeout=timeout) # type: ignore
669
+ return response
670
+
671
+ if client is None:
672
+ client_session = litellm.client_session or httpx.Client(
673
+ transport=CustomHTTPTransport(),
674
+ )
675
+ azure_client = AzureOpenAI(http_client=client_session, **azure_client_params) # type: ignore
676
+ else:
677
+ azure_client = client
678
+
679
+ ## LOGGING
680
+ logging_obj.pre_call(
681
+ input=prompt,
682
+ api_key=azure_client.api_key,
683
+ additional_args={
684
+ "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
685
+ "api_base": azure_client._base_url._uri_reference,
686
+ "acompletion": False,
687
+ "complete_input_dict": data,
688
+ },
689
+ )
690
+
691
+ ## COMPLETION CALL
692
+ response = azure_client.images.generate(**data, timeout=timeout) # type: ignore
693
+ ## LOGGING
694
+ logging_obj.post_call(
695
+ input=input,
696
+ api_key=api_key,
697
+ additional_args={"complete_input_dict": data},
698
+ original_response=response,
699
+ )
700
+ # return response
701
+ return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore
702
+ except AzureOpenAIError as e:
703
+ exception_mapping_worked = True
704
+ raise e
705
+ except Exception as e:
706
+ if hasattr(e, "status_code"):
707
+ raise AzureOpenAIError(status_code=e.status_code, message=str(e))
708
+ else:
709
+ raise AzureOpenAIError(status_code=500, message=str(e))
710
+
711
+ async def ahealth_check(
712
+ self,
713
+ model: Optional[str],
714
+ api_key: str,
715
+ api_base: str,
716
+ api_version: str,
717
+ timeout: float,
718
+ mode: str,
719
+ messages: Optional[list] = None,
720
+ input: Optional[list] = None,
721
+ prompt: Optional[str] = None,
722
+ ):
723
+ client_session = litellm.aclient_session or httpx.AsyncClient(
724
+ transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls
725
+ )
726
+ if "gateway.ai.cloudflare.com" in api_base:
727
+ ## build base url - assume api base includes resource name
728
+ if not api_base.endswith("/"):
729
+ api_base += "/"
730
+ api_base += f"{model}"
731
+ client = AsyncAzureOpenAI(
732
+ base_url=api_base,
733
+ api_version=api_version,
734
+ api_key=api_key,
735
+ timeout=timeout,
736
+ http_client=client_session,
737
+ )
738
+ model = None
739
+ # cloudflare ai gateway, needs model=None
740
+ else:
741
+ client = AsyncAzureOpenAI(
742
+ api_version=api_version,
743
+ azure_endpoint=api_base,
744
+ api_key=api_key,
745
+ timeout=timeout,
746
+ http_client=client_session,
747
+ )
748
+
749
+ # only run this check if it's not cloudflare ai gateway
750
+ if model is None and mode != "image_generation":
751
+ raise Exception("model is not set")
752
+
753
+ completion = None
754
+
755
+ if mode == "completion":
756
+ completion = await client.completions.with_raw_response.create(
757
+ model=model, # type: ignore
758
+ prompt=prompt, # type: ignore
759
+ )
760
+ elif mode == "chat":
761
+ if messages is None:
762
+ raise Exception("messages is not set")
763
+ completion = await client.chat.completions.with_raw_response.create(
764
+ model=model, # type: ignore
765
+ messages=messages, # type: ignore
766
+ )
767
+ elif mode == "embedding":
768
+ if input is None:
769
+ raise Exception("input is not set")
770
+ completion = await client.embeddings.with_raw_response.create(
771
+ model=model, # type: ignore
772
+ input=input, # type: ignore
773
+ )
774
+ elif mode == "image_generation":
775
+ if prompt is None:
776
+ raise Exception("prompt is not set")
777
+ completion = await client.images.with_raw_response.generate(
778
+ model=model, # type: ignore
779
+ prompt=prompt, # type: ignore
780
+ )
781
+ else:
782
+ raise Exception("mode not set")
783
+ response = {}
784
+
785
+ if completion is None or not hasattr(completion, "headers"):
786
+ raise Exception("invalid completion response")
787
+
788
+ if (
789
+ completion.headers.get("x-ratelimit-remaining-requests", None) is not None
790
+ ): # not provided for dall-e requests
791
+ response["x-ratelimit-remaining-requests"] = completion.headers[
792
+ "x-ratelimit-remaining-requests"
793
+ ]
794
+
795
+ if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None:
796
+ response["x-ratelimit-remaining-tokens"] = completion.headers[
797
+ "x-ratelimit-remaining-tokens"
798
+ ]
799
+ return response
litellm/llms/base.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## This is a template base class to be used for adding new LLM providers via API calls
2
+ import litellm
3
+ import httpx
4
+ from typing import Optional
5
+
6
+
7
+ class BaseLLM:
8
+ _client_session: Optional[httpx.Client] = None
9
+
10
+ def create_client_session(self):
11
+ if litellm.client_session:
12
+ _client_session = litellm.client_session
13
+ else:
14
+ _client_session = httpx.Client()
15
+
16
+ return _client_session
17
+
18
+ def create_aclient_session(self):
19
+ if litellm.aclient_session:
20
+ _aclient_session = litellm.aclient_session
21
+ else:
22
+ _aclient_session = httpx.AsyncClient()
23
+
24
+ return _aclient_session
25
+
26
+ def __exit__(self):
27
+ if hasattr(self, "_client_session"):
28
+ self._client_session.close()
29
+
30
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
31
+ if hasattr(self, "_aclient_session"):
32
+ await self._aclient_session.aclose()
33
+
34
+ def validate_environment(self): # set up the environment required to run the model
35
+ pass
36
+
37
+ def completion(
38
+ self, *args, **kwargs
39
+ ): # logic for parsing in - calling - parsing out model completion calls
40
+ pass
41
+
42
+ def embedding(
43
+ self, *args, **kwargs
44
+ ): # logic for parsing in - calling - parsing out model embedding calls
45
+ pass
litellm/llms/baseten.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable
7
+ from litellm.utils import ModelResponse, Usage
8
+
9
+
10
+ class BasetenError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ super().__init__(
15
+ self.message
16
+ ) # Call the base class constructor with the parameters it needs
17
+
18
+
19
+ def validate_environment(api_key):
20
+ headers = {
21
+ "accept": "application/json",
22
+ "content-type": "application/json",
23
+ }
24
+ if api_key:
25
+ headers["Authorization"] = f"Api-Key {api_key}"
26
+ return headers
27
+
28
+
29
+ def completion(
30
+ model: str,
31
+ messages: list,
32
+ model_response: ModelResponse,
33
+ print_verbose: Callable,
34
+ encoding,
35
+ api_key,
36
+ logging_obj,
37
+ optional_params=None,
38
+ litellm_params=None,
39
+ logger_fn=None,
40
+ ):
41
+ headers = validate_environment(api_key)
42
+ completion_url_fragment_1 = "https://app.baseten.co/models/"
43
+ completion_url_fragment_2 = "/predict"
44
+ model = model
45
+ prompt = ""
46
+ for message in messages:
47
+ if "role" in message:
48
+ if message["role"] == "user":
49
+ prompt += f"{message['content']}"
50
+ else:
51
+ prompt += f"{message['content']}"
52
+ else:
53
+ prompt += f"{message['content']}"
54
+ data = {
55
+ "inputs": prompt,
56
+ "prompt": prompt,
57
+ "parameters": optional_params,
58
+ "stream": True
59
+ if "stream" in optional_params and optional_params["stream"] == True
60
+ else False,
61
+ }
62
+
63
+ ## LOGGING
64
+ logging_obj.pre_call(
65
+ input=prompt,
66
+ api_key=api_key,
67
+ additional_args={"complete_input_dict": data},
68
+ )
69
+ ## COMPLETION CALL
70
+ response = requests.post(
71
+ completion_url_fragment_1 + model + completion_url_fragment_2,
72
+ headers=headers,
73
+ data=json.dumps(data),
74
+ stream=True
75
+ if "stream" in optional_params and optional_params["stream"] == True
76
+ else False,
77
+ )
78
+ if "text/event-stream" in response.headers["Content-Type"] or (
79
+ "stream" in optional_params and optional_params["stream"] == True
80
+ ):
81
+ return response.iter_lines()
82
+ else:
83
+ ## LOGGING
84
+ logging_obj.post_call(
85
+ input=prompt,
86
+ api_key=api_key,
87
+ original_response=response.text,
88
+ additional_args={"complete_input_dict": data},
89
+ )
90
+ print_verbose(f"raw model_response: {response.text}")
91
+ ## RESPONSE OBJECT
92
+ completion_response = response.json()
93
+ if "error" in completion_response:
94
+ raise BasetenError(
95
+ message=completion_response["error"],
96
+ status_code=response.status_code,
97
+ )
98
+ else:
99
+ if "model_output" in completion_response:
100
+ if (
101
+ isinstance(completion_response["model_output"], dict)
102
+ and "data" in completion_response["model_output"]
103
+ and isinstance(completion_response["model_output"]["data"], list)
104
+ ):
105
+ model_response["choices"][0]["message"][
106
+ "content"
107
+ ] = completion_response["model_output"]["data"][0]
108
+ elif isinstance(completion_response["model_output"], str):
109
+ model_response["choices"][0]["message"][
110
+ "content"
111
+ ] = completion_response["model_output"]
112
+ elif "completion" in completion_response and isinstance(
113
+ completion_response["completion"], str
114
+ ):
115
+ model_response["choices"][0]["message"][
116
+ "content"
117
+ ] = completion_response["completion"]
118
+ elif isinstance(completion_response, list) and len(completion_response) > 0:
119
+ if "generated_text" not in completion_response:
120
+ raise BasetenError(
121
+ message=f"Unable to parse response. Original response: {response.text}",
122
+ status_code=response.status_code,
123
+ )
124
+ model_response["choices"][0]["message"][
125
+ "content"
126
+ ] = completion_response[0]["generated_text"]
127
+ ## GETTING LOGPROBS
128
+ if (
129
+ "details" in completion_response[0]
130
+ and "tokens" in completion_response[0]["details"]
131
+ ):
132
+ model_response.choices[0].finish_reason = completion_response[0][
133
+ "details"
134
+ ]["finish_reason"]
135
+ sum_logprob = 0
136
+ for token in completion_response[0]["details"]["tokens"]:
137
+ sum_logprob += token["logprob"]
138
+ model_response["choices"][0]["message"]._logprobs = sum_logprob
139
+ else:
140
+ raise BasetenError(
141
+ message=f"Unable to parse response. Original response: {response.text}",
142
+ status_code=response.status_code,
143
+ )
144
+
145
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
146
+ prompt_tokens = len(encoding.encode(prompt))
147
+ completion_tokens = len(
148
+ encoding.encode(model_response["choices"][0]["message"]["content"])
149
+ )
150
+
151
+ model_response["created"] = int(time.time())
152
+ model_response["model"] = model
153
+ usage = Usage(
154
+ prompt_tokens=prompt_tokens,
155
+ completion_tokens=completion_tokens,
156
+ total_tokens=prompt_tokens + completion_tokens,
157
+ )
158
+ model_response.usage = usage
159
+ return model_response
160
+
161
+
162
+ def embedding():
163
+ # logic for parsing in - calling - parsing out model embedding calls
164
+ pass
litellm/llms/bedrock.py ADDED
@@ -0,0 +1,799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, copy, types
2
+ import os
3
+ from enum import Enum
4
+ import time
5
+ from typing import Callable, Optional, Any, Union
6
+ import litellm
7
+ from litellm.utils import ModelResponse, get_secret, Usage
8
+ from .prompt_templates.factory import prompt_factory, custom_prompt
9
+ import httpx
10
+
11
+
12
+ class BedrockError(Exception):
13
+ def __init__(self, status_code, message):
14
+ self.status_code = status_code
15
+ self.message = message
16
+ self.request = httpx.Request(
17
+ method="POST", url="https://us-west-2.console.aws.amazon.com/bedrock"
18
+ )
19
+ self.response = httpx.Response(status_code=status_code, request=self.request)
20
+ super().__init__(
21
+ self.message
22
+ ) # Call the base class constructor with the parameters it needs
23
+
24
+
25
+ class AmazonTitanConfig:
26
+ """
27
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
28
+
29
+ Supported Params for the Amazon Titan models:
30
+
31
+ - `maxTokenCount` (integer) max tokens,
32
+ - `stopSequences` (string[]) list of stop sequence strings
33
+ - `temperature` (float) temperature for model,
34
+ - `topP` (int) top p for model
35
+ """
36
+
37
+ maxTokenCount: Optional[int] = None
38
+ stopSequences: Optional[list] = None
39
+ temperature: Optional[float] = None
40
+ topP: Optional[int] = None
41
+
42
+ def __init__(
43
+ self,
44
+ maxTokenCount: Optional[int] = None,
45
+ stopSequences: Optional[list] = None,
46
+ temperature: Optional[float] = None,
47
+ topP: Optional[int] = None,
48
+ ) -> None:
49
+ locals_ = locals()
50
+ for key, value in locals_.items():
51
+ if key != "self" and value is not None:
52
+ setattr(self.__class__, key, value)
53
+
54
+ @classmethod
55
+ def get_config(cls):
56
+ return {
57
+ k: v
58
+ for k, v in cls.__dict__.items()
59
+ if not k.startswith("__")
60
+ and not isinstance(
61
+ v,
62
+ (
63
+ types.FunctionType,
64
+ types.BuiltinFunctionType,
65
+ classmethod,
66
+ staticmethod,
67
+ ),
68
+ )
69
+ and v is not None
70
+ }
71
+
72
+
73
+ class AmazonAnthropicConfig:
74
+ """
75
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
76
+
77
+ Supported Params for the Amazon / Anthropic models:
78
+
79
+ - `max_tokens_to_sample` (integer) max tokens,
80
+ - `temperature` (float) model temperature,
81
+ - `top_k` (integer) top k,
82
+ - `top_p` (integer) top p,
83
+ - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
84
+ - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
85
+ """
86
+
87
+ max_tokens_to_sample: Optional[int] = litellm.max_tokens
88
+ stop_sequences: Optional[list] = None
89
+ temperature: Optional[float] = None
90
+ top_k: Optional[int] = None
91
+ top_p: Optional[int] = None
92
+ anthropic_version: Optional[str] = None
93
+
94
+ def __init__(
95
+ self,
96
+ max_tokens_to_sample: Optional[int] = None,
97
+ stop_sequences: Optional[list] = None,
98
+ temperature: Optional[float] = None,
99
+ top_k: Optional[int] = None,
100
+ top_p: Optional[int] = None,
101
+ anthropic_version: Optional[str] = None,
102
+ ) -> None:
103
+ locals_ = locals()
104
+ for key, value in locals_.items():
105
+ if key != "self" and value is not None:
106
+ setattr(self.__class__, key, value)
107
+
108
+ @classmethod
109
+ def get_config(cls):
110
+ return {
111
+ k: v
112
+ for k, v in cls.__dict__.items()
113
+ if not k.startswith("__")
114
+ and not isinstance(
115
+ v,
116
+ (
117
+ types.FunctionType,
118
+ types.BuiltinFunctionType,
119
+ classmethod,
120
+ staticmethod,
121
+ ),
122
+ )
123
+ and v is not None
124
+ }
125
+
126
+
127
+ class AmazonCohereConfig:
128
+ """
129
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
130
+
131
+ Supported Params for the Amazon / Cohere models:
132
+
133
+ - `max_tokens` (integer) max tokens,
134
+ - `temperature` (float) model temperature,
135
+ - `return_likelihood` (string) n/a
136
+ """
137
+
138
+ max_tokens: Optional[int] = None
139
+ temperature: Optional[float] = None
140
+ return_likelihood: Optional[str] = None
141
+
142
+ def __init__(
143
+ self,
144
+ max_tokens: Optional[int] = None,
145
+ temperature: Optional[float] = None,
146
+ return_likelihood: Optional[str] = None,
147
+ ) -> None:
148
+ locals_ = locals()
149
+ for key, value in locals_.items():
150
+ if key != "self" and value is not None:
151
+ setattr(self.__class__, key, value)
152
+
153
+ @classmethod
154
+ def get_config(cls):
155
+ return {
156
+ k: v
157
+ for k, v in cls.__dict__.items()
158
+ if not k.startswith("__")
159
+ and not isinstance(
160
+ v,
161
+ (
162
+ types.FunctionType,
163
+ types.BuiltinFunctionType,
164
+ classmethod,
165
+ staticmethod,
166
+ ),
167
+ )
168
+ and v is not None
169
+ }
170
+
171
+
172
+ class AmazonAI21Config:
173
+ """
174
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
175
+
176
+ Supported Params for the Amazon / AI21 models:
177
+
178
+ - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
179
+
180
+ - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
181
+
182
+ - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
183
+
184
+ - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
185
+
186
+ - `frequencyPenalty` (object): Placeholder for frequency penalty object.
187
+
188
+ - `presencePenalty` (object): Placeholder for presence penalty object.
189
+
190
+ - `countPenalty` (object): Placeholder for count penalty object.
191
+ """
192
+
193
+ maxTokens: Optional[int] = None
194
+ temperature: Optional[float] = None
195
+ topP: Optional[float] = None
196
+ stopSequences: Optional[list] = None
197
+ frequencePenalty: Optional[dict] = None
198
+ presencePenalty: Optional[dict] = None
199
+ countPenalty: Optional[dict] = None
200
+
201
+ def __init__(
202
+ self,
203
+ maxTokens: Optional[int] = None,
204
+ temperature: Optional[float] = None,
205
+ topP: Optional[float] = None,
206
+ stopSequences: Optional[list] = None,
207
+ frequencePenalty: Optional[dict] = None,
208
+ presencePenalty: Optional[dict] = None,
209
+ countPenalty: Optional[dict] = None,
210
+ ) -> None:
211
+ locals_ = locals()
212
+ for key, value in locals_.items():
213
+ if key != "self" and value is not None:
214
+ setattr(self.__class__, key, value)
215
+
216
+ @classmethod
217
+ def get_config(cls):
218
+ return {
219
+ k: v
220
+ for k, v in cls.__dict__.items()
221
+ if not k.startswith("__")
222
+ and not isinstance(
223
+ v,
224
+ (
225
+ types.FunctionType,
226
+ types.BuiltinFunctionType,
227
+ classmethod,
228
+ staticmethod,
229
+ ),
230
+ )
231
+ and v is not None
232
+ }
233
+
234
+
235
+ class AnthropicConstants(Enum):
236
+ HUMAN_PROMPT = "\n\nHuman: "
237
+ AI_PROMPT = "\n\nAssistant: "
238
+
239
+
240
+ class AmazonLlamaConfig:
241
+ """
242
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
243
+
244
+ Supported Params for the Amazon / Meta Llama models:
245
+
246
+ - `max_gen_len` (integer) max tokens,
247
+ - `temperature` (float) temperature for model,
248
+ - `top_p` (float) top p for model
249
+ """
250
+
251
+ max_gen_len: Optional[int] = None
252
+ temperature: Optional[float] = None
253
+ topP: Optional[float] = None
254
+
255
+ def __init__(
256
+ self,
257
+ maxTokenCount: Optional[int] = None,
258
+ temperature: Optional[float] = None,
259
+ topP: Optional[int] = None,
260
+ ) -> None:
261
+ locals_ = locals()
262
+ for key, value in locals_.items():
263
+ if key != "self" and value is not None:
264
+ setattr(self.__class__, key, value)
265
+
266
+ @classmethod
267
+ def get_config(cls):
268
+ return {
269
+ k: v
270
+ for k, v in cls.__dict__.items()
271
+ if not k.startswith("__")
272
+ and not isinstance(
273
+ v,
274
+ (
275
+ types.FunctionType,
276
+ types.BuiltinFunctionType,
277
+ classmethod,
278
+ staticmethod,
279
+ ),
280
+ )
281
+ and v is not None
282
+ }
283
+
284
+
285
+ def init_bedrock_client(
286
+ region_name=None,
287
+ aws_access_key_id: Optional[str] = None,
288
+ aws_secret_access_key: Optional[str] = None,
289
+ aws_region_name: Optional[str] = None,
290
+ aws_bedrock_runtime_endpoint: Optional[str] = None,
291
+ ):
292
+ # check for custom AWS_REGION_NAME and use it if not passed to init_bedrock_client
293
+ litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
294
+ standard_aws_region_name = get_secret("AWS_REGION", None)
295
+
296
+ ## CHECK IS 'os.environ/' passed in
297
+ # Define the list of parameters to check
298
+ params_to_check = [
299
+ aws_access_key_id,
300
+ aws_secret_access_key,
301
+ aws_region_name,
302
+ aws_bedrock_runtime_endpoint,
303
+ ]
304
+
305
+ # Iterate over parameters and update if needed
306
+ for i, param in enumerate(params_to_check):
307
+ if param and param.startswith("os.environ/"):
308
+ params_to_check[i] = get_secret(param)
309
+ # Assign updated values back to parameters
310
+ (
311
+ aws_access_key_id,
312
+ aws_secret_access_key,
313
+ aws_region_name,
314
+ aws_bedrock_runtime_endpoint,
315
+ ) = params_to_check
316
+ if region_name:
317
+ pass
318
+ elif aws_region_name:
319
+ region_name = aws_region_name
320
+ elif litellm_aws_region_name:
321
+ region_name = litellm_aws_region_name
322
+ elif standard_aws_region_name:
323
+ region_name = standard_aws_region_name
324
+ else:
325
+ raise BedrockError(
326
+ message="AWS region not set: set AWS_REGION_NAME or AWS_REGION env variable or in .env file",
327
+ status_code=401,
328
+ )
329
+
330
+ # check for custom AWS_BEDROCK_RUNTIME_ENDPOINT and use it if not passed to init_bedrock_client
331
+ env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
332
+ if aws_bedrock_runtime_endpoint:
333
+ endpoint_url = aws_bedrock_runtime_endpoint
334
+ elif env_aws_bedrock_runtime_endpoint:
335
+ endpoint_url = env_aws_bedrock_runtime_endpoint
336
+ else:
337
+ endpoint_url = f"https://bedrock-runtime.{region_name}.amazonaws.com"
338
+
339
+ import boto3
340
+
341
+ if aws_access_key_id != None:
342
+ # uses auth params passed to completion
343
+ # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
344
+
345
+ client = boto3.client(
346
+ service_name="bedrock-runtime",
347
+ aws_access_key_id=aws_access_key_id,
348
+ aws_secret_access_key=aws_secret_access_key,
349
+ region_name=region_name,
350
+ endpoint_url=endpoint_url,
351
+ )
352
+ else:
353
+ # aws_access_key_id is None, assume user is trying to auth using env variables
354
+ # boto3 automatically reads env variables
355
+
356
+ client = boto3.client(
357
+ service_name="bedrock-runtime",
358
+ region_name=region_name,
359
+ endpoint_url=endpoint_url,
360
+ )
361
+
362
+ return client
363
+
364
+
365
+ def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
366
+ # handle anthropic prompts using anthropic constants
367
+ if provider == "anthropic":
368
+ if model in custom_prompt_dict:
369
+ # check if the model has a registered custom prompt
370
+ model_prompt_details = custom_prompt_dict[model]
371
+ prompt = custom_prompt(
372
+ role_dict=model_prompt_details["roles"],
373
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
374
+ final_prompt_value=model_prompt_details["final_prompt_value"],
375
+ messages=messages,
376
+ )
377
+ else:
378
+ prompt = prompt_factory(
379
+ model=model, messages=messages, custom_llm_provider="anthropic"
380
+ )
381
+ else:
382
+ prompt = ""
383
+ for message in messages:
384
+ if "role" in message:
385
+ if message["role"] == "user":
386
+ prompt += f"{message['content']}"
387
+ else:
388
+ prompt += f"{message['content']}"
389
+ else:
390
+ prompt += f"{message['content']}"
391
+ return prompt
392
+
393
+
394
+ """
395
+ BEDROCK AUTH Keys/Vars
396
+ os.environ['AWS_ACCESS_KEY_ID'] = ""
397
+ os.environ['AWS_SECRET_ACCESS_KEY'] = ""
398
+ """
399
+
400
+
401
+ # set os.environ['AWS_REGION_NAME'] = <your-region_name>
402
+
403
+
404
+ def completion(
405
+ model: str,
406
+ messages: list,
407
+ custom_prompt_dict: dict,
408
+ model_response: ModelResponse,
409
+ print_verbose: Callable,
410
+ encoding,
411
+ logging_obj,
412
+ optional_params=None,
413
+ litellm_params=None,
414
+ logger_fn=None,
415
+ ):
416
+ exception_mapping_worked = False
417
+ try:
418
+ # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
419
+ aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
420
+ aws_access_key_id = optional_params.pop("aws_access_key_id", None)
421
+ aws_region_name = optional_params.pop("aws_region_name", None)
422
+ aws_bedrock_runtime_endpoint = optional_params.pop(
423
+ "aws_bedrock_runtime_endpoint", None
424
+ )
425
+
426
+ # use passed in BedrockRuntime.Client if provided, otherwise create a new one
427
+ client = optional_params.pop("aws_bedrock_client", None)
428
+
429
+ # only init client, if user did not pass one
430
+ if client is None:
431
+ client = init_bedrock_client(
432
+ aws_access_key_id=aws_access_key_id,
433
+ aws_secret_access_key=aws_secret_access_key,
434
+ aws_region_name=aws_region_name,
435
+ aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
436
+ )
437
+
438
+ model = model
439
+ modelId = (
440
+ optional_params.pop("model_id", None) or model
441
+ ) # default to model if not passed
442
+ provider = model.split(".")[0]
443
+ prompt = convert_messages_to_prompt(
444
+ model, messages, provider, custom_prompt_dict
445
+ )
446
+ inference_params = copy.deepcopy(optional_params)
447
+ stream = inference_params.pop("stream", False)
448
+ if provider == "anthropic":
449
+ ## LOAD CONFIG
450
+ config = litellm.AmazonAnthropicConfig.get_config()
451
+ for k, v in config.items():
452
+ if (
453
+ k not in inference_params
454
+ ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
455
+ inference_params[k] = v
456
+ data = json.dumps({"prompt": prompt, **inference_params})
457
+ elif provider == "ai21":
458
+ ## LOAD CONFIG
459
+ config = litellm.AmazonAI21Config.get_config()
460
+ for k, v in config.items():
461
+ if (
462
+ k not in inference_params
463
+ ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
464
+ inference_params[k] = v
465
+
466
+ data = json.dumps({"prompt": prompt, **inference_params})
467
+ elif provider == "cohere":
468
+ ## LOAD CONFIG
469
+ config = litellm.AmazonCohereConfig.get_config()
470
+ for k, v in config.items():
471
+ if (
472
+ k not in inference_params
473
+ ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
474
+ inference_params[k] = v
475
+ if optional_params.get("stream", False) == True:
476
+ inference_params[
477
+ "stream"
478
+ ] = True # cohere requires stream = True in inference params
479
+ data = json.dumps({"prompt": prompt, **inference_params})
480
+ elif provider == "meta":
481
+ ## LOAD CONFIG
482
+ config = litellm.AmazonLlamaConfig.get_config()
483
+ for k, v in config.items():
484
+ if (
485
+ k not in inference_params
486
+ ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
487
+ inference_params[k] = v
488
+ data = json.dumps({"prompt": prompt, **inference_params})
489
+ elif provider == "amazon": # amazon titan
490
+ ## LOAD CONFIG
491
+ config = litellm.AmazonTitanConfig.get_config()
492
+ for k, v in config.items():
493
+ if (
494
+ k not in inference_params
495
+ ): # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
496
+ inference_params[k] = v
497
+
498
+ data = json.dumps(
499
+ {
500
+ "inputText": prompt,
501
+ "textGenerationConfig": inference_params,
502
+ }
503
+ )
504
+ else:
505
+ data = json.dumps({})
506
+
507
+ ## COMPLETION CALL
508
+ accept = "application/json"
509
+ contentType = "application/json"
510
+ if stream == True:
511
+ if provider == "ai21":
512
+ ## LOGGING
513
+ request_str = f"""
514
+ response = client.invoke_model(
515
+ body={data},
516
+ modelId={modelId},
517
+ accept=accept,
518
+ contentType=contentType
519
+ )
520
+ """
521
+ logging_obj.pre_call(
522
+ input=prompt,
523
+ api_key="",
524
+ additional_args={
525
+ "complete_input_dict": data,
526
+ "request_str": request_str,
527
+ },
528
+ )
529
+
530
+ response = client.invoke_model(
531
+ body=data, modelId=modelId, accept=accept, contentType=contentType
532
+ )
533
+
534
+ response = response.get("body").read()
535
+ return response
536
+ else:
537
+ ## LOGGING
538
+ request_str = f"""
539
+ response = client.invoke_model_with_response_stream(
540
+ body={data},
541
+ modelId={modelId},
542
+ accept=accept,
543
+ contentType=contentType
544
+ )
545
+ """
546
+ logging_obj.pre_call(
547
+ input=prompt,
548
+ api_key="",
549
+ additional_args={
550
+ "complete_input_dict": data,
551
+ "request_str": request_str,
552
+ },
553
+ )
554
+
555
+ response = client.invoke_model_with_response_stream(
556
+ body=data, modelId=modelId, accept=accept, contentType=contentType
557
+ )
558
+ response = response.get("body")
559
+ return response
560
+ try:
561
+ ## LOGGING
562
+ request_str = f"""
563
+ response = client.invoke_model(
564
+ body={data},
565
+ modelId={modelId},
566
+ accept=accept,
567
+ contentType=contentType
568
+ )
569
+ """
570
+ logging_obj.pre_call(
571
+ input=prompt,
572
+ api_key="",
573
+ additional_args={
574
+ "complete_input_dict": data,
575
+ "request_str": request_str,
576
+ },
577
+ )
578
+ response = client.invoke_model(
579
+ body=data, modelId=modelId, accept=accept, contentType=contentType
580
+ )
581
+ except client.exceptions.ValidationException as e:
582
+ if "The provided model identifier is invalid" in str(e):
583
+ raise BedrockError(status_code=404, message=str(e))
584
+ raise BedrockError(status_code=400, message=str(e))
585
+ except Exception as e:
586
+ raise BedrockError(status_code=500, message=str(e))
587
+
588
+ response_body = json.loads(response.get("body").read())
589
+
590
+ ## LOGGING
591
+ logging_obj.post_call(
592
+ input=prompt,
593
+ api_key="",
594
+ original_response=json.dumps(response_body),
595
+ additional_args={"complete_input_dict": data},
596
+ )
597
+ print_verbose(f"raw model_response: {response}")
598
+ ## RESPONSE OBJECT
599
+ outputText = "default"
600
+ if provider == "ai21":
601
+ outputText = response_body.get("completions")[0].get("data").get("text")
602
+ elif provider == "anthropic":
603
+ outputText = response_body["completion"]
604
+ model_response["finish_reason"] = response_body["stop_reason"]
605
+ elif provider == "cohere":
606
+ outputText = response_body["generations"][0]["text"]
607
+ elif provider == "meta":
608
+ outputText = response_body["generation"]
609
+ else: # amazon titan
610
+ outputText = response_body.get("results")[0].get("outputText")
611
+
612
+ response_metadata = response.get("ResponseMetadata", {})
613
+ if response_metadata.get("HTTPStatusCode", 500) >= 400:
614
+ raise BedrockError(
615
+ message=outputText,
616
+ status_code=response_metadata.get("HTTPStatusCode", 500),
617
+ )
618
+ else:
619
+ try:
620
+ if len(outputText) > 0:
621
+ model_response["choices"][0]["message"]["content"] = outputText
622
+ except:
623
+ raise BedrockError(
624
+ message=json.dumps(outputText),
625
+ status_code=response_metadata.get("HTTPStatusCode", 500),
626
+ )
627
+
628
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
629
+ prompt_tokens = len(encoding.encode(prompt))
630
+ completion_tokens = len(
631
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
632
+ )
633
+
634
+ model_response["created"] = int(time.time())
635
+ model_response["model"] = model
636
+ usage = Usage(
637
+ prompt_tokens=prompt_tokens,
638
+ completion_tokens=completion_tokens,
639
+ total_tokens=prompt_tokens + completion_tokens,
640
+ )
641
+ model_response.usage = usage
642
+ return model_response
643
+ except BedrockError as e:
644
+ exception_mapping_worked = True
645
+ raise e
646
+ except Exception as e:
647
+ if exception_mapping_worked:
648
+ raise e
649
+ else:
650
+ import traceback
651
+
652
+ raise BedrockError(status_code=500, message=traceback.format_exc())
653
+
654
+
655
+ def _embedding_func_single(
656
+ model: str,
657
+ input: str,
658
+ client: Any,
659
+ optional_params=None,
660
+ encoding=None,
661
+ logging_obj=None,
662
+ ):
663
+ # logic for parsing in - calling - parsing out model embedding calls
664
+ ## FORMAT EMBEDDING INPUT ##
665
+ provider = model.split(".")[0]
666
+ inference_params = copy.deepcopy(optional_params)
667
+ inference_params.pop(
668
+ "user", None
669
+ ) # make sure user is not passed in for bedrock call
670
+ modelId = (
671
+ optional_params.pop("model_id", None) or model
672
+ ) # default to model if not passed
673
+ if provider == "amazon":
674
+ input = input.replace(os.linesep, " ")
675
+ data = {"inputText": input, **inference_params}
676
+ # data = json.dumps(data)
677
+ elif provider == "cohere":
678
+ inference_params["input_type"] = inference_params.get(
679
+ "input_type", "search_document"
680
+ ) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
681
+ data = {"texts": [input], **inference_params} # type: ignore
682
+ body = json.dumps(data).encode("utf-8")
683
+ ## LOGGING
684
+ request_str = f"""
685
+ response = client.invoke_model(
686
+ body={body},
687
+ modelId={modelId},
688
+ accept="*/*",
689
+ contentType="application/json",
690
+ )""" # type: ignore
691
+ logging_obj.pre_call(
692
+ input=input,
693
+ api_key="", # boto3 is used for init.
694
+ additional_args={
695
+ "complete_input_dict": {"model": modelId, "texts": input},
696
+ "request_str": request_str,
697
+ },
698
+ )
699
+ try:
700
+ response = client.invoke_model(
701
+ body=body,
702
+ modelId=modelId,
703
+ accept="*/*",
704
+ contentType="application/json",
705
+ )
706
+ response_body = json.loads(response.get("body").read())
707
+ ## LOGGING
708
+ logging_obj.post_call(
709
+ input=input,
710
+ api_key="",
711
+ additional_args={"complete_input_dict": data},
712
+ original_response=json.dumps(response_body),
713
+ )
714
+ if provider == "cohere":
715
+ response = response_body.get("embeddings")
716
+ # flatten list
717
+ response = [item for sublist in response for item in sublist]
718
+ return response
719
+ elif provider == "amazon":
720
+ return response_body.get("embedding")
721
+ except Exception as e:
722
+ raise BedrockError(
723
+ message=f"Embedding Error with model {model}: {e}", status_code=500
724
+ )
725
+
726
+
727
+ def embedding(
728
+ model: str,
729
+ input: Union[list, str],
730
+ api_key: Optional[str] = None,
731
+ logging_obj=None,
732
+ model_response=None,
733
+ optional_params=None,
734
+ encoding=None,
735
+ ):
736
+ ### BOTO3 INIT ###
737
+ # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
738
+ aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
739
+ aws_access_key_id = optional_params.pop("aws_access_key_id", None)
740
+ aws_region_name = optional_params.pop("aws_region_name", None)
741
+ aws_bedrock_runtime_endpoint = optional_params.pop(
742
+ "aws_bedrock_runtime_endpoint", None
743
+ )
744
+
745
+ # use passed in BedrockRuntime.Client if provided, otherwise create a new one
746
+ client = init_bedrock_client(
747
+ aws_access_key_id=aws_access_key_id,
748
+ aws_secret_access_key=aws_secret_access_key,
749
+ aws_region_name=aws_region_name,
750
+ aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
751
+ )
752
+ if type(input) == str:
753
+ embeddings = [
754
+ _embedding_func_single(
755
+ model,
756
+ input,
757
+ optional_params=optional_params,
758
+ client=client,
759
+ logging_obj=logging_obj,
760
+ )
761
+ ]
762
+ else:
763
+ ## Embedding Call
764
+ embeddings = [
765
+ _embedding_func_single(
766
+ model,
767
+ i,
768
+ optional_params=optional_params,
769
+ client=client,
770
+ logging_obj=logging_obj,
771
+ )
772
+ for i in input
773
+ ] # [TODO]: make these parallel calls
774
+
775
+ ## Populate OpenAI compliant dictionary
776
+ embedding_response = []
777
+ for idx, embedding in enumerate(embeddings):
778
+ embedding_response.append(
779
+ {
780
+ "object": "embedding",
781
+ "index": idx,
782
+ "embedding": embedding,
783
+ }
784
+ )
785
+ model_response["object"] = "list"
786
+ model_response["data"] = embedding_response
787
+ model_response["model"] = model
788
+ input_tokens = 0
789
+
790
+ input_str = "".join(input)
791
+
792
+ input_tokens += len(encoding.encode(input_str))
793
+
794
+ usage = Usage(
795
+ prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + 0
796
+ )
797
+ model_response.usage = usage
798
+
799
+ return model_response
litellm/llms/cloudflare.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ import httpx
9
+ from litellm.utils import ModelResponse, Usage
10
+ from .prompt_templates.factory import prompt_factory, custom_prompt
11
+
12
+
13
+ class CloudflareError(Exception):
14
+ def __init__(self, status_code, message):
15
+ self.status_code = status_code
16
+ self.message = message
17
+ self.request = httpx.Request(method="POST", url="https://api.cloudflare.com")
18
+ self.response = httpx.Response(status_code=status_code, request=self.request)
19
+ super().__init__(
20
+ self.message
21
+ ) # Call the base class constructor with the parameters it needs
22
+
23
+
24
+ class CloudflareConfig:
25
+ max_tokens: Optional[int] = None
26
+ stream: Optional[bool] = None
27
+
28
+ def __init__(
29
+ self,
30
+ max_tokens: Optional[int] = None,
31
+ stream: Optional[bool] = None,
32
+ ) -> None:
33
+ locals_ = locals()
34
+ for key, value in locals_.items():
35
+ if key != "self" and value is not None:
36
+ setattr(self.__class__, key, value)
37
+
38
+ @classmethod
39
+ def get_config(cls):
40
+ return {
41
+ k: v
42
+ for k, v in cls.__dict__.items()
43
+ if not k.startswith("__")
44
+ and not isinstance(
45
+ v,
46
+ (
47
+ types.FunctionType,
48
+ types.BuiltinFunctionType,
49
+ classmethod,
50
+ staticmethod,
51
+ ),
52
+ )
53
+ and v is not None
54
+ }
55
+
56
+
57
+ def validate_environment(api_key):
58
+ if api_key is None:
59
+ raise ValueError(
60
+ "Missing CloudflareError API Key - A call is being made to cloudflare but no key is set either in the environment variables or via params"
61
+ )
62
+ headers = {
63
+ "accept": "application/json",
64
+ "content-type": "application/json",
65
+ "Authorization": "Bearer " + api_key,
66
+ }
67
+ return headers
68
+
69
+
70
+ def completion(
71
+ model: str,
72
+ messages: list,
73
+ api_base: str,
74
+ model_response: ModelResponse,
75
+ print_verbose: Callable,
76
+ encoding,
77
+ api_key,
78
+ logging_obj,
79
+ custom_prompt_dict={},
80
+ optional_params=None,
81
+ litellm_params=None,
82
+ logger_fn=None,
83
+ ):
84
+ headers = validate_environment(api_key)
85
+
86
+ ## Load Config
87
+ config = litellm.CloudflareConfig.get_config()
88
+ for k, v in config.items():
89
+ if k not in optional_params:
90
+ optional_params[k] = v
91
+
92
+ print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
93
+ if model in custom_prompt_dict:
94
+ # check if the model has a registered custom prompt
95
+ model_prompt_details = custom_prompt_dict[model]
96
+ prompt = custom_prompt(
97
+ role_dict=model_prompt_details.get("roles", {}),
98
+ initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
99
+ final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
100
+ bos_token=model_prompt_details.get("bos_token", ""),
101
+ eos_token=model_prompt_details.get("eos_token", ""),
102
+ messages=messages,
103
+ )
104
+
105
+ # cloudflare adds the model to the api base
106
+ api_base = api_base + model
107
+
108
+ data = {
109
+ "messages": messages,
110
+ **optional_params,
111
+ }
112
+
113
+ ## LOGGING
114
+ logging_obj.pre_call(
115
+ input=messages,
116
+ api_key=api_key,
117
+ additional_args={
118
+ "headers": headers,
119
+ "api_base": api_base,
120
+ "complete_input_dict": data,
121
+ },
122
+ )
123
+
124
+ ## COMPLETION CALL
125
+ if "stream" in optional_params and optional_params["stream"] == True:
126
+ response = requests.post(
127
+ api_base,
128
+ headers=headers,
129
+ data=json.dumps(data),
130
+ stream=optional_params["stream"],
131
+ )
132
+ return response.iter_lines()
133
+ else:
134
+ response = requests.post(api_base, headers=headers, data=json.dumps(data))
135
+ ## LOGGING
136
+ logging_obj.post_call(
137
+ input=messages,
138
+ api_key=api_key,
139
+ original_response=response.text,
140
+ additional_args={"complete_input_dict": data},
141
+ )
142
+ print_verbose(f"raw model_response: {response.text}")
143
+ ## RESPONSE OBJECT
144
+ if response.status_code != 200:
145
+ raise CloudflareError(
146
+ status_code=response.status_code, message=response.text
147
+ )
148
+ completion_response = response.json()
149
+
150
+ model_response["choices"][0]["message"]["content"] = completion_response[
151
+ "result"
152
+ ]["response"]
153
+
154
+ ## CALCULATING USAGE
155
+ print_verbose(
156
+ f"CALCULATING CLOUDFLARE TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}"
157
+ )
158
+ prompt_tokens = litellm.utils.get_token_count(messages=messages, model=model)
159
+ completion_tokens = len(
160
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
161
+ )
162
+
163
+ model_response["created"] = int(time.time())
164
+ model_response["model"] = "cloudflare/" + model
165
+ usage = Usage(
166
+ prompt_tokens=prompt_tokens,
167
+ completion_tokens=completion_tokens,
168
+ total_tokens=prompt_tokens + completion_tokens,
169
+ )
170
+ model_response.usage = usage
171
+ return model_response
172
+
173
+
174
+ def embedding():
175
+ # logic for parsing in - calling - parsing out model embedding calls
176
+ pass
litellm/llms/cohere.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time, traceback
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Choices, Message, Usage
8
+ import litellm
9
+ import httpx
10
+
11
+
12
+ class CohereError(Exception):
13
+ def __init__(self, status_code, message):
14
+ self.status_code = status_code
15
+ self.message = message
16
+ self.request = httpx.Request(
17
+ method="POST", url="https://api.cohere.ai/v1/generate"
18
+ )
19
+ self.response = httpx.Response(status_code=status_code, request=self.request)
20
+ super().__init__(
21
+ self.message
22
+ ) # Call the base class constructor with the parameters it needs
23
+
24
+
25
+ class CohereConfig:
26
+ """
27
+ Reference: https://docs.cohere.com/reference/generate
28
+
29
+ The class `CohereConfig` provides configuration for the Cohere's API interface. Below are the parameters:
30
+
31
+ - `num_generations` (integer): Maximum number of generations returned. Default is 1, with a minimum value of 1 and a maximum value of 5.
32
+
33
+ - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default value is 20.
34
+
35
+ - `truncate` (string): Specifies how the API handles inputs longer than maximum token length. Options include NONE, START, END. Default is END.
36
+
37
+ - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.75.
38
+
39
+ - `preset` (string): Identifier of a custom preset, a combination of parameters such as prompt, temperature etc.
40
+
41
+ - `end_sequences` (array of strings): The generated text gets cut at the beginning of the earliest occurrence of an end sequence, which will be excluded from the text.
42
+
43
+ - `stop_sequences` (array of strings): The generated text gets cut at the end of the earliest occurrence of a stop sequence, which will be included in the text.
44
+
45
+ - `k` (integer): Limits generation at each step to top `k` most likely tokens. Default is 0.
46
+
47
+ - `p` (number): Limits generation at each step to most likely tokens with total probability mass of `p`. Default is 0.
48
+
49
+ - `frequency_penalty` (number): Reduces repetitiveness of generated tokens. Higher values apply stronger penalties to previously occurred tokens.
50
+
51
+ - `presence_penalty` (number): Reduces repetitiveness of generated tokens. Similar to frequency_penalty, but this penalty applies equally to all tokens that have already appeared.
52
+
53
+ - `return_likelihoods` (string): Specifies how and if token likelihoods are returned with the response. Options include GENERATION, ALL and NONE.
54
+
55
+ - `logit_bias` (object): Used to prevent the model from generating unwanted tokens or to incentivize it to include desired tokens. e.g. {"hello_world": 1233}
56
+ """
57
+
58
+ num_generations: Optional[int] = None
59
+ max_tokens: Optional[int] = None
60
+ truncate: Optional[str] = None
61
+ temperature: Optional[int] = None
62
+ preset: Optional[str] = None
63
+ end_sequences: Optional[list] = None
64
+ stop_sequences: Optional[list] = None
65
+ k: Optional[int] = None
66
+ p: Optional[int] = None
67
+ frequency_penalty: Optional[int] = None
68
+ presence_penalty: Optional[int] = None
69
+ return_likelihoods: Optional[str] = None
70
+ logit_bias: Optional[dict] = None
71
+
72
+ def __init__(
73
+ self,
74
+ num_generations: Optional[int] = None,
75
+ max_tokens: Optional[int] = None,
76
+ truncate: Optional[str] = None,
77
+ temperature: Optional[int] = None,
78
+ preset: Optional[str] = None,
79
+ end_sequences: Optional[list] = None,
80
+ stop_sequences: Optional[list] = None,
81
+ k: Optional[int] = None,
82
+ p: Optional[int] = None,
83
+ frequency_penalty: Optional[int] = None,
84
+ presence_penalty: Optional[int] = None,
85
+ return_likelihoods: Optional[str] = None,
86
+ logit_bias: Optional[dict] = None,
87
+ ) -> None:
88
+ locals_ = locals()
89
+ for key, value in locals_.items():
90
+ if key != "self" and value is not None:
91
+ setattr(self.__class__, key, value)
92
+
93
+ @classmethod
94
+ def get_config(cls):
95
+ return {
96
+ k: v
97
+ for k, v in cls.__dict__.items()
98
+ if not k.startswith("__")
99
+ and not isinstance(
100
+ v,
101
+ (
102
+ types.FunctionType,
103
+ types.BuiltinFunctionType,
104
+ classmethod,
105
+ staticmethod,
106
+ ),
107
+ )
108
+ and v is not None
109
+ }
110
+
111
+
112
+ def validate_environment(api_key):
113
+ headers = {
114
+ "accept": "application/json",
115
+ "content-type": "application/json",
116
+ }
117
+ if api_key:
118
+ headers["Authorization"] = f"Bearer {api_key}"
119
+ return headers
120
+
121
+
122
+ def completion(
123
+ model: str,
124
+ messages: list,
125
+ api_base: str,
126
+ model_response: ModelResponse,
127
+ print_verbose: Callable,
128
+ encoding,
129
+ api_key,
130
+ logging_obj,
131
+ optional_params=None,
132
+ litellm_params=None,
133
+ logger_fn=None,
134
+ ):
135
+ headers = validate_environment(api_key)
136
+ completion_url = api_base
137
+ model = model
138
+ prompt = " ".join(message["content"] for message in messages)
139
+
140
+ ## Load Config
141
+ config = litellm.CohereConfig.get_config()
142
+ for k, v in config.items():
143
+ if (
144
+ k not in optional_params
145
+ ): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
146
+ optional_params[k] = v
147
+
148
+ data = {
149
+ "model": model,
150
+ "prompt": prompt,
151
+ **optional_params,
152
+ }
153
+
154
+ ## LOGGING
155
+ logging_obj.pre_call(
156
+ input=prompt,
157
+ api_key=api_key,
158
+ additional_args={
159
+ "complete_input_dict": data,
160
+ "headers": headers,
161
+ "api_base": completion_url,
162
+ },
163
+ )
164
+ ## COMPLETION CALL
165
+ response = requests.post(
166
+ completion_url,
167
+ headers=headers,
168
+ data=json.dumps(data),
169
+ stream=optional_params["stream"] if "stream" in optional_params else False,
170
+ )
171
+ ## error handling for cohere calls
172
+ if response.status_code != 200:
173
+ raise CohereError(message=response.text, status_code=response.status_code)
174
+
175
+ if "stream" in optional_params and optional_params["stream"] == True:
176
+ return response.iter_lines()
177
+ else:
178
+ ## LOGGING
179
+ logging_obj.post_call(
180
+ input=prompt,
181
+ api_key=api_key,
182
+ original_response=response.text,
183
+ additional_args={"complete_input_dict": data},
184
+ )
185
+ print_verbose(f"raw model_response: {response.text}")
186
+ ## RESPONSE OBJECT
187
+ completion_response = response.json()
188
+ if "error" in completion_response:
189
+ raise CohereError(
190
+ message=completion_response["error"],
191
+ status_code=response.status_code,
192
+ )
193
+ else:
194
+ try:
195
+ choices_list = []
196
+ for idx, item in enumerate(completion_response["generations"]):
197
+ if len(item["text"]) > 0:
198
+ message_obj = Message(content=item["text"])
199
+ else:
200
+ message_obj = Message(content=None)
201
+ choice_obj = Choices(
202
+ finish_reason=item["finish_reason"],
203
+ index=idx + 1,
204
+ message=message_obj,
205
+ )
206
+ choices_list.append(choice_obj)
207
+ model_response["choices"] = choices_list
208
+ except Exception as e:
209
+ raise CohereError(
210
+ message=response.text, status_code=response.status_code
211
+ )
212
+
213
+ ## CALCULATING USAGE
214
+ prompt_tokens = len(encoding.encode(prompt))
215
+ completion_tokens = len(
216
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
217
+ )
218
+
219
+ model_response["created"] = int(time.time())
220
+ model_response["model"] = model
221
+ usage = Usage(
222
+ prompt_tokens=prompt_tokens,
223
+ completion_tokens=completion_tokens,
224
+ total_tokens=prompt_tokens + completion_tokens,
225
+ )
226
+ model_response.usage = usage
227
+ return model_response
228
+
229
+
230
+ def embedding(
231
+ model: str,
232
+ input: list,
233
+ api_key: Optional[str] = None,
234
+ logging_obj=None,
235
+ model_response=None,
236
+ encoding=None,
237
+ optional_params=None,
238
+ ):
239
+ headers = validate_environment(api_key)
240
+ embed_url = "https://api.cohere.ai/v1/embed"
241
+ model = model
242
+ data = {"model": model, "texts": input, **optional_params}
243
+
244
+ if "3" in model and "input_type" not in data:
245
+ # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
246
+ data["input_type"] = "search_document"
247
+
248
+ ## LOGGING
249
+ logging_obj.pre_call(
250
+ input=input,
251
+ api_key=api_key,
252
+ additional_args={"complete_input_dict": data},
253
+ )
254
+ ## COMPLETION CALL
255
+ response = requests.post(embed_url, headers=headers, data=json.dumps(data))
256
+ ## LOGGING
257
+ logging_obj.post_call(
258
+ input=input,
259
+ api_key=api_key,
260
+ additional_args={"complete_input_dict": data},
261
+ original_response=response,
262
+ )
263
+ """
264
+ response
265
+ {
266
+ 'object': "list",
267
+ 'data': [
268
+
269
+ ]
270
+ 'model',
271
+ 'usage'
272
+ }
273
+ """
274
+ if response.status_code != 200:
275
+ raise CohereError(message=response.text, status_code=response.status_code)
276
+ embeddings = response.json()["embeddings"]
277
+ output_data = []
278
+ for idx, embedding in enumerate(embeddings):
279
+ output_data.append(
280
+ {"object": "embedding", "index": idx, "embedding": embedding}
281
+ )
282
+ model_response["object"] = "list"
283
+ model_response["data"] = output_data
284
+ model_response["model"] = model
285
+ input_tokens = 0
286
+ for text in input:
287
+ input_tokens += len(encoding.encode(text))
288
+
289
+ model_response["usage"] = {
290
+ "prompt_tokens": input_tokens,
291
+ "total_tokens": input_tokens,
292
+ }
293
+ return model_response
litellm/llms/custom_httpx/azure_dall_e_2.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time, json, httpx, asyncio
2
+
3
+
4
+ class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
5
+ """
6
+ Async implementation of custom http transport
7
+ """
8
+
9
+ async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
10
+ if "images/generations" in request.url.path and request.url.params[
11
+ "api-version"
12
+ ] in [ # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
13
+ "2023-06-01-preview",
14
+ "2023-07-01-preview",
15
+ "2023-08-01-preview",
16
+ "2023-09-01-preview",
17
+ "2023-10-01-preview",
18
+ ]:
19
+ request.url = request.url.copy_with(
20
+ path="/openai/images/generations:submit"
21
+ )
22
+ response = await super().handle_async_request(request)
23
+ operation_location_url = response.headers["operation-location"]
24
+ request.url = httpx.URL(operation_location_url)
25
+ request.method = "GET"
26
+ response = await super().handle_async_request(request)
27
+ await response.aread()
28
+
29
+ timeout_secs: int = 120
30
+ start_time = time.time()
31
+ while response.json()["status"] not in ["succeeded", "failed"]:
32
+ if time.time() - start_time > timeout_secs:
33
+ timeout = {
34
+ "error": {
35
+ "code": "Timeout",
36
+ "message": "Operation polling timed out.",
37
+ }
38
+ }
39
+ return httpx.Response(
40
+ status_code=400,
41
+ headers=response.headers,
42
+ content=json.dumps(timeout).encode("utf-8"),
43
+ request=request,
44
+ )
45
+
46
+ time.sleep(int(response.headers.get("retry-after")) or 10)
47
+ response = await super().handle_async_request(request)
48
+ await response.aread()
49
+
50
+ if response.json()["status"] == "failed":
51
+ error_data = response.json()
52
+ return httpx.Response(
53
+ status_code=400,
54
+ headers=response.headers,
55
+ content=json.dumps(error_data).encode("utf-8"),
56
+ request=request,
57
+ )
58
+
59
+ result = response.json()["result"]
60
+ return httpx.Response(
61
+ status_code=200,
62
+ headers=response.headers,
63
+ content=json.dumps(result).encode("utf-8"),
64
+ request=request,
65
+ )
66
+ return await super().handle_async_request(request)
67
+
68
+
69
+ class CustomHTTPTransport(httpx.HTTPTransport):
70
+ """
71
+ This class was written as a workaround to support dall-e-2 on openai > v1.x
72
+
73
+ Refer to this issue for more: https://github.com/openai/openai-python/issues/692
74
+ """
75
+
76
+ def handle_request(
77
+ self,
78
+ request: httpx.Request,
79
+ ) -> httpx.Response:
80
+ if "images/generations" in request.url.path and request.url.params[
81
+ "api-version"
82
+ ] in [ # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
83
+ "2023-06-01-preview",
84
+ "2023-07-01-preview",
85
+ "2023-08-01-preview",
86
+ "2023-09-01-preview",
87
+ "2023-10-01-preview",
88
+ ]:
89
+ request.url = request.url.copy_with(
90
+ path="/openai/images/generations:submit"
91
+ )
92
+ response = super().handle_request(request)
93
+ operation_location_url = response.headers["operation-location"]
94
+ request.url = httpx.URL(operation_location_url)
95
+ request.method = "GET"
96
+ response = super().handle_request(request)
97
+ response.read()
98
+
99
+ timeout_secs: int = 120
100
+ start_time = time.time()
101
+ while response.json()["status"] not in ["succeeded", "failed"]:
102
+ if time.time() - start_time > timeout_secs:
103
+ timeout = {
104
+ "error": {
105
+ "code": "Timeout",
106
+ "message": "Operation polling timed out.",
107
+ }
108
+ }
109
+ return httpx.Response(
110
+ status_code=400,
111
+ headers=response.headers,
112
+ content=json.dumps(timeout).encode("utf-8"),
113
+ request=request,
114
+ )
115
+
116
+ time.sleep(int(response.headers.get("retry-after")) or 10)
117
+ response = super().handle_request(request)
118
+ response.read()
119
+
120
+ if response.json()["status"] == "failed":
121
+ error_data = response.json()
122
+ return httpx.Response(
123
+ status_code=400,
124
+ headers=response.headers,
125
+ content=json.dumps(error_data).encode("utf-8"),
126
+ request=request,
127
+ )
128
+
129
+ result = response.json()["result"]
130
+ return httpx.Response(
131
+ status_code=200,
132
+ headers=response.headers,
133
+ content=json.dumps(result).encode("utf-8"),
134
+ request=request,
135
+ )
136
+ return super().handle_request(request)
litellm/llms/custom_httpx/bedrock_async.py ADDED
File without changes
litellm/llms/gemini.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types, traceback, copy
2
+ import json
3
+ from enum import Enum
4
+ import time
5
+ from typing import Callable, Optional
6
+ from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
7
+ import litellm
8
+ import sys, httpx
9
+ from .prompt_templates.factory import prompt_factory, custom_prompt
10
+
11
+
12
+ class GeminiError(Exception):
13
+ def __init__(self, status_code, message):
14
+ self.status_code = status_code
15
+ self.message = message
16
+ self.request = httpx.Request(
17
+ method="POST",
18
+ url="https://developers.generativeai.google/api/python/google/generativeai/chat",
19
+ )
20
+ self.response = httpx.Response(status_code=status_code, request=self.request)
21
+ super().__init__(
22
+ self.message
23
+ ) # Call the base class constructor with the parameters it needs
24
+
25
+
26
+ class GeminiConfig:
27
+ """
28
+ Reference: https://ai.google.dev/api/python/google/generativeai/GenerationConfig
29
+
30
+ The class `GeminiConfig` provides configuration for the Gemini's API interface. Here are the parameters:
31
+
32
+ - `candidate_count` (int): Number of generated responses to return.
33
+
34
+ - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
35
+
36
+ - `max_output_tokens` (int): The maximum number of tokens to include in a candidate. If unset, this will default to output_token_limit specified in the model's specification.
37
+
38
+ - `temperature` (float): Controls the randomness of the output. Note: The default value varies by model, see the Model.temperature attribute of the Model returned the genai.get_model function. Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will produce responses that are more varied and creative, while a value closer to 0.0 will typically result in more straightforward responses from the model.
39
+
40
+ - `top_p` (float): Optional. The maximum cumulative probability of tokens to consider when sampling.
41
+
42
+ - `top_k` (int): Optional. The maximum number of tokens to consider when sampling.
43
+ """
44
+
45
+ candidate_count: Optional[int] = None
46
+ stop_sequences: Optional[list] = None
47
+ max_output_tokens: Optional[int] = None
48
+ temperature: Optional[float] = None
49
+ top_p: Optional[float] = None
50
+ top_k: Optional[int] = None
51
+
52
+ def __init__(
53
+ self,
54
+ candidate_count: Optional[int] = None,
55
+ stop_sequences: Optional[list] = None,
56
+ max_output_tokens: Optional[int] = None,
57
+ temperature: Optional[float] = None,
58
+ top_p: Optional[float] = None,
59
+ top_k: Optional[int] = None,
60
+ ) -> None:
61
+ locals_ = locals()
62
+ for key, value in locals_.items():
63
+ if key != "self" and value is not None:
64
+ setattr(self.__class__, key, value)
65
+
66
+ @classmethod
67
+ def get_config(cls):
68
+ return {
69
+ k: v
70
+ for k, v in cls.__dict__.items()
71
+ if not k.startswith("__")
72
+ and not isinstance(
73
+ v,
74
+ (
75
+ types.FunctionType,
76
+ types.BuiltinFunctionType,
77
+ classmethod,
78
+ staticmethod,
79
+ ),
80
+ )
81
+ and v is not None
82
+ }
83
+
84
+
85
+ def completion(
86
+ model: str,
87
+ messages: list,
88
+ model_response: ModelResponse,
89
+ print_verbose: Callable,
90
+ api_key,
91
+ encoding,
92
+ logging_obj,
93
+ custom_prompt_dict: dict,
94
+ acompletion: bool = False,
95
+ optional_params=None,
96
+ litellm_params=None,
97
+ logger_fn=None,
98
+ ):
99
+ try:
100
+ import google.generativeai as genai
101
+ except:
102
+ raise Exception(
103
+ "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
104
+ )
105
+ genai.configure(api_key=api_key)
106
+
107
+ if model in custom_prompt_dict:
108
+ # check if the model has a registered custom prompt
109
+ model_prompt_details = custom_prompt_dict[model]
110
+ prompt = custom_prompt(
111
+ role_dict=model_prompt_details["roles"],
112
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
113
+ final_prompt_value=model_prompt_details["final_prompt_value"],
114
+ messages=messages,
115
+ )
116
+ else:
117
+ prompt = prompt_factory(
118
+ model=model, messages=messages, custom_llm_provider="gemini"
119
+ )
120
+
121
+ ## Load Config
122
+ inference_params = copy.deepcopy(optional_params)
123
+ inference_params.pop(
124
+ "stream", None
125
+ ) # palm does not support streaming, so we handle this by fake streaming in main.py
126
+ config = litellm.GeminiConfig.get_config()
127
+ for k, v in config.items():
128
+ if (
129
+ k not in inference_params
130
+ ): # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in
131
+ inference_params[k] = v
132
+
133
+ ## LOGGING
134
+ logging_obj.pre_call(
135
+ input=prompt,
136
+ api_key="",
137
+ additional_args={"complete_input_dict": {"inference_params": inference_params}},
138
+ )
139
+ ## COMPLETION CALL
140
+ try:
141
+ _model = genai.GenerativeModel(f"models/{model}")
142
+ response = _model.generate_content(
143
+ contents=prompt,
144
+ generation_config=genai.types.GenerationConfig(**inference_params),
145
+ )
146
+ except Exception as e:
147
+ raise GeminiError(
148
+ message=str(e),
149
+ status_code=500,
150
+ )
151
+
152
+ ## LOGGING
153
+ logging_obj.post_call(
154
+ input=prompt,
155
+ api_key="",
156
+ original_response=response,
157
+ additional_args={"complete_input_dict": {}},
158
+ )
159
+ print_verbose(f"raw model_response: {response}")
160
+ ## RESPONSE OBJECT
161
+ completion_response = response
162
+ try:
163
+ choices_list = []
164
+ for idx, item in enumerate(completion_response.candidates):
165
+ if len(item.content.parts) > 0:
166
+ message_obj = Message(content=item.content.parts[0].text)
167
+ else:
168
+ message_obj = Message(content=None)
169
+ choice_obj = Choices(index=idx + 1, message=message_obj)
170
+ choices_list.append(choice_obj)
171
+ model_response["choices"] = choices_list
172
+ except Exception as e:
173
+ traceback.print_exc()
174
+ raise GeminiError(
175
+ message=traceback.format_exc(), status_code=response.status_code
176
+ )
177
+
178
+ try:
179
+ completion_response = model_response["choices"][0]["message"].get("content")
180
+ if completion_response is None:
181
+ raise Exception
182
+ except:
183
+ original_response = f"response: {response}"
184
+ if hasattr(response, "candidates"):
185
+ original_response = f"response: {response.candidates}"
186
+ if "SAFETY" in original_response:
187
+ original_response += "\nThe candidate content was flagged for safety reasons."
188
+ elif "RECITATION" in original_response:
189
+ original_response += "\nThe candidate content was flagged for recitation reasons."
190
+ raise GeminiError(
191
+ status_code=400,
192
+ message=f"No response received. Original response - {original_response}",
193
+ )
194
+
195
+ ## CALCULATING USAGE
196
+ prompt_str = ""
197
+ for m in messages:
198
+ if isinstance(m["content"], str):
199
+ prompt_str += m["content"]
200
+ elif isinstance(m["content"], list):
201
+ for content in m["content"]:
202
+ if content["type"] == "text":
203
+ prompt_str += content["text"]
204
+ prompt_tokens = len(encoding.encode(prompt_str))
205
+ completion_tokens = len(
206
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
207
+ )
208
+
209
+ model_response["created"] = int(time.time())
210
+ model_response["model"] = "gemini/" + model
211
+ usage = Usage(
212
+ prompt_tokens=prompt_tokens,
213
+ completion_tokens=completion_tokens,
214
+ total_tokens=prompt_tokens + completion_tokens,
215
+ )
216
+ model_response.usage = usage
217
+ return model_response
218
+
219
+
220
+ def embedding():
221
+ # logic for parsing in - calling - parsing out model embedding calls
222
+ pass
litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt ADDED
@@ -0,0 +1,2523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0xDEADBEA7/DialoGPT-small-rick
2
+ 1Basco/DialoGPT-small-jake
3
+ 2early4coffee/DialoGPT-medium-deadpool
4
+ 2early4coffee/DialoGPT-small-deadpool
5
+ 2gud/DialogGPT-small-Koopsbot
6
+ ABBHISHEK/DialoGPT-small-harrypotter
7
+ AIDynamics/DialoGPT-medium-MentorDealerGuy
8
+ AJ/DialoGPT-small-ricksanchez
9
+ AJ/rick-discord-bot
10
+ AJ/rick-sanchez-bot
11
+ AJ-Dude/DialoGPT-small-harrypotter
12
+ AK270802/DialoGPT-small-harrypotter
13
+ ATGdev/DialoGPT-small-harrypotter
14
+ AVeryRealHuman/DialoGPT-small-TonyStark
15
+ AbhinavSaiTheGreat/DialoGPT-small-harrypotter
16
+ AccurateIsaiah/DialoGPT-small-jefftastic
17
+ AccurateIsaiah/DialoGPT-small-mozark
18
+ AccurateIsaiah/DialoGPT-small-mozarkv2
19
+ AccurateIsaiah/DialoGPT-small-sinclair
20
+ AdharshJolly/HarryPotterBot-Model
21
+ AdrianGzz/DialoGPT-small-harrypotter
22
+ Aero/Tsubomi-Haruno
23
+ AetherIT/DialoGPT-small-Hal
24
+ AiPorter/DialoGPT-small-Back_to_the_future
25
+ Aibox/DialoGPT-small-rick
26
+ Akjder/DialoGPT-small-harrypotter
27
+ AllwynJ/HarryBoy
28
+ AnthonyNelson/DialoGPT-small-ricksanchez
29
+ Apisate/DialoGPT-small-jordan
30
+ ArJakusz/DialoGPT-small-stark
31
+ Aran/DialoGPT-medium-harrypotter
32
+ Aran/DialoGPT-small-harrypotter
33
+ Arcktosh/DialoGPT-small-rick
34
+ AriakimTaiyo/DialoGPT-cultured-Kumiko
35
+ AriakimTaiyo/DialoGPT-medium-Kumiko
36
+ AriakimTaiyo/DialoGPT-revised-Kumiko
37
+ AriakimTaiyo/DialoGPT-small-Kumiko
38
+ AriakimTaiyo/DialoGPT-small-Rikka
39
+ ArtemisZealot/DialoGTP-small-Qkarin
40
+ Aruden/DialoGPT-medium-harrypotterall
41
+ Aspect11/DialoGPT-Medium-LiSBot
42
+ Asuramaru/DialoGPT-small-rintohsaka
43
+ Atchuth/DialoGPT-small-MichaelBot
44
+ Augustvember/WOKKAWOKKA
45
+ Augustvember/WokkaBot3
46
+ Augustvember/test
47
+ Augustvember/wokka2
48
+ Augustvember/wokka4
49
+ Augustvember/wokka5
50
+ Augustvember/wokkabottest2
51
+ AvatarXD/DialoGPT-medium-Blitzo
52
+ Awsaf/DialoGPT-medium-eren
53
+ Awsaf/large-eren
54
+ Axcel/DialoGPT-small-rick
55
+ Ayjayo/DialoGPT-medium-AyjayoAI
56
+ Ayran/DialoGPT-medium-harry-potter-1-through-3
57
+ Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6-e18
58
+ Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6
59
+ Ayran/DialoGPT-small-gandalf
60
+ Ayran/DialoGPT-small-harry-potter-1-through-3
61
+ Azuris/DialoGPT-medium-envy
62
+ Azuris/DialoGPT-medium-senorita
63
+ Azuris/DialoGPT-small-envy
64
+ BW/TEST
65
+ Backedman/DialoGPT-small-Anika
66
+ BalajiSathesh/DialoGPT-small-harrypotter
67
+ Batsy24/DialoGPT-medium-Twilight_BellaBot
68
+ Batsy24/DialoGPT-small-Twilight_EdBot
69
+ Bee-Garbs/DialoGPT-real-cartman-small
70
+ Biasface/DDDC
71
+ Biasface/DDDC2
72
+ BigTooth/DialoGPT-Megumin
73
+ BigTooth/DialoGPT-small-tohru
74
+ BigTooth/Megumin-v0.2
75
+ BigeS/DialoGPT-small-Rick
76
+ Bimal/my_bot_model
77
+ BinksSachary/DialoGPT-small-shaxx
78
+ BinksSachary/ShaxxBot
79
+ BinksSachary/ShaxxBot2
80
+ BlightZz/DialoGPT-medium-Kurisu
81
+ BlightZz/MakiseKurisu
82
+ BlueGamerBeast/DialoGPT-small-Morgana
83
+ BotterHax/DialoGPT-small-harrypotter
84
+ Broadus20/DialoGPT-small-joshua
85
+ BrunoNogueira/DialoGPT-kungfupanda
86
+ Brykee/DialoGPT-medium-Morty
87
+ Bubb-les/DisloGPT-medium-HarryPotter
88
+ Camzure/MaamiBot-test
89
+ Canadiancaleb/DialoGPT-small-jesse
90
+ Canadiancaleb/DialoGPT-small-walter
91
+ CasualHomie/DialoGPT-small-harrypotter
92
+ Chae/botman
93
+ Chakita/Friends
94
+ Chalponkey/DialoGPT-small-Barry
95
+ ChaseBread/DialoGPT-small-harrypotter
96
+ Chiuchiyin/DialoGPT-small-Donald
97
+ ChrisVCB/DialoGPT-medium-cmjs
98
+ ChrisVCB/DialoGPT-medium-ej
99
+ Chuah/DialoGPT-small-harrypotter
100
+ ChukSamuels/DialoGPT-small-Dr.FauciBot
101
+ Ciruzzo/DialoGPT-small-harrypotter
102
+ ClaudeCOULOMBE/RickBot
103
+ Cloudy/DialoGPT-CJ-large
104
+ ClydeWasTaken/DialoGPT-small-joshua
105
+ CodeDanCode/CartmenBot
106
+ CodeDanCode/SP-KyleBot
107
+ CoderBoy432/DialoGPT-small-harrypotter
108
+ CoderEFE/DialoGPT-marxbot
109
+ Connor/DialoGPT-small-rick
110
+ Connorvr/BrightBot-small
111
+ CopymySkill/DialoGPT-medium-atakan
112
+ Corvus/DialoGPT-medium-CaptainPrice-Extended
113
+ Corvus/DialoGPT-medium-CaptainPrice
114
+ Coyotl/DialoGPT-test-last-arthurmorgan
115
+ Coyotl/DialoGPT-test2-arthurmorgan
116
+ Coyotl/DialoGPT-test3-arthurmorgan
117
+ CracklesCreeper/Piglin-Talks-Harry-Potter
118
+ Cryptikdw/DialoGPT-small-rick
119
+ Cthyllax/DialoGPT-medium-PaladinDanse
120
+ CurtisBowser/DialoGPT-medium-sora-two
121
+ CurtisBowser/DialoGPT-medium-sora
122
+ CurtisBowser/DialoGPT-small-sora
123
+ CyberMuffin/DialoGPT-small-ChandlerBot
124
+ DARKVIP3R/DialoGPT-medium-Anakin
125
+ Daivakai/DialoGPT-small-saitama
126
+ Dawit/DialogGPT-small-ironman
127
+ Daymarebait/Discord_BOT_RICK
128
+ DecafNosebleed/DialoGPT-small-ScaraBot
129
+ Denny29/DialoGPT-medium-asunayuuki
130
+ Devid/DialoGPT-small-Miku
131
+ Dilmk2/DialoGPT-small-harrypotter
132
+ Dimedrolza/DialoGPT-small-cyberpunk
133
+ DingleyMaillotUrgell/homer-bot
134
+ Doiman/DialoGPT-medium-harrypotter
135
+ DongHai/DialoGPT-small-rick
136
+ Doquey/DialoGPT-small-Luisbot1
137
+ Doquey/DialoGPT-small-Michaelbot
138
+ Doxophobia/DialoGPT-medium-celeste
139
+ Dragoniod1596/DialoGPT-small-Legacies
140
+ Dreyzin/DialoGPT-medium-avatar
141
+ DueLinx0402/DialoGPT-small-harrypotter
142
+ Duugu/jakebot3000
143
+ Dyzi/DialoGPT-small-landcheese
144
+ EEE/DialoGPT-medium-brooke
145
+ EEE/DialoGPT-small-aang
146
+ EEE/DialoGPT-small-yoda
147
+ ESPersonnel/DialoGPT-small-got
148
+ Eagle3ye/DialoGPT-small-PeppaPig
149
+ Elzen7/DialoGPT-medium-harrypotter
150
+ Emi2160/DialoGPT-small-Neku
151
+ EmileAjar/DialoGPT-small-harrypotter
152
+ EmileAjar/DialoGPT-small-peppapig
153
+ Erikaka/DialoGPT-small-loki
154
+ EstoyDePaso/DialoGPT-small-harrypotter
155
+ EuropeanTurtle/DialoGPT-small-mrcobb
156
+ ExEngineer/DialoGPT-medium-jdt
157
+ Exilon/DialoGPT-large-quirk
158
+ EzioDD/house
159
+ FFF000/dialogpt-FFF
160
+ FangLee/DialoGPT-small-Kirito
161
+ Filosofas/DialoGPT-medium-PALPATINE
162
+ Flampt/DialoGPT-medium-Sheldon
163
+ For/sheldonbot
164
+ FosterPatch/GoT-test
165
+ Fu10k/DialoGPT-medium-Rick
166
+ GabbyDaBUNBUN/DialoGPT-medium-PinkiePie
167
+ Galaxy/DialoGPT-small-hermoine
168
+ GamerMan02/DialoGPT-medium-gamerbot
169
+ Gappy/DialoGPT-small-Zhongli
170
+ Geezy/DialoGPT-small-guy
171
+ GenDelport/DialoGPT-small-harrypotter
172
+ Gowtham25/DialoGPT-small-jackie
173
+ Gregor-Davies/DialoGPT-small-rick
174
+ Greysan/DialoGPT-medium-TOH
175
+ Guard-SK/DialoGPT-medium-ricksanchez
176
+ Guard-SK/DialoGPT-small-ricksanchez
177
+ GunjanPantha/DialoGPT-small-gameofthrones
178
+ Guy0/DialoGPT-small-Batmanbotty
179
+ HAttORi/DialoGPT-Medium-zerotwo
180
+ HackyHackyMan/DialoGPT-small-harrypotter
181
+ Hadron/DialoGPT-medium-nino
182
+ Hallzy/Peterbot
183
+ Hamas/DialoGPT-large-jake
184
+ Hamas/DialoGPT-large-jake2
185
+ Hamas/DialoGPT-large-jake3
186
+ Hamas/DialoGPT-large-jake4
187
+ Hamhams/DialoGPT-small-rick
188
+ HansAnonymous/DialoGPT-medium-rick
189
+ HansAnonymous/DialoGPT-small-shrek
190
+ HarryPuttar/HarryPotterDC
191
+ Harshal6927/Jack_Sparrow_GPT
192
+ Harshal6927/Tony_Stark_GPT
193
+ Havokx/DialoGPT-small-Rick
194
+ Heldhy/DialoGPT-small-tony
195
+ Heldhy/testingAgain
196
+ MagnusChase7/DialoGPT-medium-harrypotter
197
+ Htenn/DialoGPT-small-spongebob
198
+ Htenn/DialoGPT-small-spongebobv2
199
+ HueJanus/DialoGPT-small-ricksanchez
200
+ HypNyx/DialoGPT-small-DwightBot
201
+ HypNyx/DialoGPT-small-Thanos
202
+ HypedKid/PeterBot
203
+ ILoveThatLady/DialoGPT-small-rickandmorty
204
+ ITNODove/DialoGPT-medium-cyberbones
205
+ Icemiser/chat-test
206
+ Ilyabarigou/Genesis-harrybotter
207
+ ImAPizza/DialoGPT-medium-albert
208
+ ImAPizza/DialoGPT-medium-alberttwo
209
+ Invincible/Chat_bot-Harrypotter-medium
210
+ Invincible/Chat_bot-Harrypotter-small
211
+ Invincible/DialoGPT-medium-harryPotter
212
+ Istiaque190515/Sherlock
213
+ Istiaque190515/harry_bot_discord
214
+ Istiaque190515/harry_potter
215
+ ItoYagura/DialoGPT-medium-tohru
216
+ ItzJorinoPlays/DialoGPT-small-PickleRick
217
+ J-Chiang/DialoGPT-small-thor
218
+ JDS22/DialoGPT-medium-HarryPotterBot
219
+ Jedi33/tonystarkAI
220
+ Jeffrey/DialoGPT-small-Jeffrey
221
+ JimmyHodl/DialoGPT-medium
222
+ Jllama/dialoGPT-small-Joshua-test
223
+ Jonesy/DialoGPT-medium_Barney
224
+ Jonesy/FG_OLD
225
+ Jonesy/DialoGPT-small_JT
226
+ Julianqll/DialoGPT-small-finalmorty
227
+ Julianqll/DialoGPT-small-ricksanchez
228
+ KAIHATSU/DialoGPT-small-rick
229
+ KENNETHFOO/DialoGPT-medium-harrypotter
230
+ KOSTAS/DialoGPT-small-Cleverbot
231
+ KP2500/KPBot
232
+ Kai0857/DialoGPT-small-harrypotter
233
+ Kail91/DialoGPT-small-PeraltaBot
234
+ Kairu/DialoGPT-small-Rick
235
+ Kairu/RICKBOT
236
+ KakoSi/Smolmm3
237
+ KakoSi/opaazzi
238
+ Kaledmgo/DialoGPT-small-donajulia
239
+ Kargan/DialoGPT-small-randombot
240
+ KaydenSou/Joshua
241
+ Keen/DialoGPT-small-potter
242
+ KekLord/DialoGPT-small-rick3
243
+ Keqing/Keqing-Siesta
244
+ Keqipig/DialoGPT-small-spamton
245
+ KhanAdeeb/model-tony-stark
246
+ KingCodeSquid/Octavian
247
+ KingCodeSquid/Octavian2
248
+ Kirili4ik/ruDialoGpt3-medium-finetuned-telegram
249
+ KnutZuidema/DialoGPT-small-morty
250
+ Konggate/DialoGPT-small-harrypotter
251
+ Koriyy/DialoGPT-medium-gf
252
+ Koro/DialoGPT-medium-rickandmorty
253
+ Koro/DialoGPT-small-rickandmorty
254
+ KringleClaus/Dialog-santa
255
+ KrispyIChris/DialoGPT-small-harrypotter
256
+ Kryptone/Burobot
257
+ Kryptone/RinAI
258
+ Kryptone/monikAI-Unstable
259
+ Kryptone/monikAI
260
+ Kshaunish/DialoGPT-small-rick
261
+ Kush/DialoGPT-small-harrypotter
262
+ LARACHNIDE/DialogGPT-small-sw
263
+ LactoseLegend/DialoGPT-small-Rick
264
+ Laezor/DialoGPT-small-witcher1
265
+ Laezor/DialoGPT-small-yakuza_0
266
+ LaiJY/DialoGPTChatbot
267
+ Laptop/DialoGPT-small-gandalf
268
+ Lenza/DialoGPT-medium-Kobayashi
269
+ Leonel/DialoGPT-small-chandler
270
+ Leostronkest/DialoGPT-small-michael
271
+ Leostronkest/DialoGPT
272
+ Leviii03/Dialogpt-small-Jake99
273
+ Lizardon/Peterbot
274
+ Lovery/Aqua
275
+ Lucdi90/DialoGPT-medium-XiaoBot
276
+ LuckyWill/DialoGPT-small-JakeBot
277
+ Lurka/DialoGPT-medium-isseibot
278
+ Lurka/DialoGPT-medium-kon
279
+ Luxiere/DialoGPT-medium-tyrion
280
+ MAUtastic/DialoGPT-medium-RickandMortyBot
281
+ MCUxDaredevil/DialoGPT-small-rick
282
+ MS366/DialoGPT-small-vision
283
+ MadhanKumar/DialoGPT-small-HarryPotter
284
+ MadhanKumar/HarryPotter-Bot
285
+ MagmaCubes1133/DialoGPT-large-rick
286
+ Mandy/DialoGPT-small-Mikasa
287
+ Manthan/DialoGPT-small-harrypotter
288
+ Mara/DialoGPT-medium-harrypotter
289
+ MathiasVS/DialoGPT-small-RickAndMorty
290
+ MaxW0748/DialoGPT-small-Rick
291
+ MayankGupta/DialoGPT-small-harrypotter
292
+ MichaelTheLearner/DialoGPT-medium-harry
293
+ Midhunkrishna/DialoGPT-small-bjk
294
+ Mierln/SmartHarry
295
+ MightyCoderX/DialoGPT-medium-EdwardElric
296
+ ModzabazeR/small-okaberintaro
297
+ Mohsin272/DialoGPT-medium-harrypotter
298
+ Mona/DialoGPT-small-harrypotter
299
+ MoonlitEtherna/DialoGPT-small-Nyivae
300
+ MrDuckerino/DialoGPT-medium-Rick
301
+ MrE/DialoGPT-medium-SARGE
302
+ MrE/DialoGPT-medium-SARGER1
303
+ MrE/DialoGPT-medium-SARGER3
304
+ MrGentle/DeltaModel-genius1
305
+ MrZ/DialoGPT-small-Rick
306
+ Mythiie/DialoGPT-small-Modeus
307
+ N8Daawg/chat_bot
308
+ NASABOI/MachineLearningAI
309
+ nabarun/DialoGPT-small-joshua
310
+ NamPE/DialoGPT-medium-Aqua-konosuba
311
+ NamPE/DialoGPT-medium-Takanashi-Rikka
312
+ NamPE/DialoGPT-small-satouhina
313
+ NanniKirby/DialoGPT-medium-bapi
314
+ NanniKirby/bapismall
315
+ Naturealbe/DialoGPT-small-harrypotter-2
316
+ Naturealbe/DialoGPT-small-harrypotter
317
+ Navigator/DialoGPT-medium-martymcfly
318
+ Navya2608/DialoGPT-medium-chandler
319
+ Navya2608/DialoGPT-medium-rachel
320
+ Navya2608/DialoGPT-small-tonystarkscript
321
+ Necrozma/harrypotterbot
322
+ Nekoism/Zhongli-Beta
323
+ NibrasShami/DialopGPT-small-HarryPotter
324
+ NickCavarretta/DialoGPT-small-laffy
325
+ Nihwy/DialoSqui
326
+ NikhilKrishna/DialoGPT-medium-harrypotter
327
+ Ninja5000/DialoGPT-medium-HarryPotter
328
+ Ninja5000/DialoGPT-medium-TWEWYJoshua
329
+ Niphredil/DialoGPT-small-lotr
330
+ Nisarg2701/DialoGPT-medium-Rick
331
+ NoLawz/DialoGPT-medium-hagrid
332
+ NoLawz/DialoGPT-medium-harrypotter
333
+ NoLawz/DialoGPT-medium-spongebob
334
+ Nova/DialoGPT-medium-Lelouch
335
+ NovaChrono/twervy
336
+ Obesitycart/ChatBot
337
+ Obscurity/DialoGPT-Medium-707
338
+ Oji/DialoGPT-small-Rick
339
+ Optimal/Harry
340
+ P4RZ1V4L/DialoGPT-Medium-Tony
341
+ PVAbhiram2003/DialoGPT-medium-RickandMorty
342
+ Paradocx/Dialogpt-mid-hpai
343
+ Pensador777critico/DialoGPT-small-RickandMorty
344
+ PhilipTheGreat/DiabloGPT-small-Traveller
345
+ PinoCorgi/DialoGPT-small-Shrek1
346
+ Piumi/DialogGPT-small-harrypotter
347
+ Plencers/DialoGPT-small-homer
348
+ Poly-Pixel/shrek-medium-full
349
+ Poly-Pixel/shrek-medium
350
+ Poly-Pixel/shrek-test-small
351
+ Pupihed/DialoGPT-small-shrek
352
+ PurpleJacketGuy/My_Jarvis
353
+ PurpleJacketGuy/My_Jarvis_2
354
+ RAhul03/DialoGPT-small-harrypotter
355
+ REAP3R/Chat-bot
356
+ REZERO/DialoGPT-medium-saitama
357
+ RTM/ChatBot
358
+ RTM/Lucky
359
+ RTurk/DialoGPT-small-TIMBOT
360
+ Radicalkiddo/DialoGPT-small-Radical
361
+ Rashid11/DialoGPT-small-rick
362
+ Rathod/DialoGPT-small-harrypotter
363
+ Redolid/DialoGPT-small-Rick
364
+ Rei/DialoGPT-medium-kurisu
365
+ RifsxD/DialoGPT-medium-raifu
366
+ RishabhRawatt/DialoGPT-small-Rickmorty
367
+ RishabhRawatt/DialoGPT-small-kela
368
+ Ritchie/DialoGPT-small-Rickandmorty
369
+ RizqFarIDN/DialoGPT-medium-harrypotter
370
+ RizqFarIDN/DialoGPT-small-harrypotter
371
+ RobinMari/DialoGPT-small-mikoto
372
+ Royce23/DialoGPT-small-almas
373
+ Rush11/DialoGPT-small-HarryPotter
374
+ Ryanar/DialoGPT-medium-Zelda
375
+ Ryukie/DialoGPT-small-Rick
376
+ S34NtheGuy/DialoGPT-medium-Glass_Of_Water
377
+ S34NtheGuy/DialoGPT-medium-Mona
378
+ S34NtheGuy/DialoGPT-small-Harry282
379
+ S34NtheGuy/DialoGPT-small-MJOLNIR_Soul
380
+ S34NtheGuy/DialoGPT-small-cursedryno
381
+ S34NtheGuy/DialoGPT-small-pikamew362
382
+ S34NtheGuy/DialoGPT-small-wetterlettuce
383
+ SJSui/RickBot
384
+ SPGT/LiveSafe-DialoGPT
385
+ SaffronIce/DialoGPT-medium-Jett
386
+ Salma-2/DialoGPT-small-harrypotter
387
+ Sammigooof/Peterbot
388
+ SarahhhUwU/DialoGPT-small-ally
389
+ Sarumomo/DialoGPT-small-test
390
+ Saviour/ChandlerBot
391
+ Saz/DialoGPT-small-paimon
392
+ Saz/DialoGPT-small-saz
393
+ Science-geek32/DialoGPT-small-doctor
394
+ Science-geek32/DialoGPT-small-doctor2.0
395
+ Scoops/SandalBot
396
+ ScottaStrong/DialogGPT-medium-Scott
397
+ ScottaStrong/DialogGPT-medium-joshua
398
+ ScottaStrong/DialogGPT-small-Scott
399
+ ScottaStrong/DialogGPT-small-joshua
400
+ Sebastianthecrab/DialoGPT-small-melchior
401
+ Sedge/DialoGPT-small-Sedge
402
+ Shakaw/DialoGPT-small-spongebot
403
+ ShayoGun/DialoGPT-small-shayo
404
+ Sheel/DialoGPT-small-harrypotter
405
+ Sheerwin02/DialoGPT-medium-mikasa
406
+ Sheerwin02/DialoGPT-small-isla
407
+ Sherman/DialoGPT-medium-joey
408
+ Shike/DialoGPT_medium_harrypotter
409
+ Shinx/DialoGPT-medium-myheroacademia
410
+ NaturesDisaster/DialoGPT-large-Neku
411
+ NaturesDisaster/DialoGPT-small-Neku
412
+ ShiroNeko/DialoGPT-small-rick
413
+ Shubham-Kumar-DTU/DialoGPT-small-goku
414
+ SilentMyuth/sarcastic-model
415
+ SilentMyuth/stableben
416
+ SirBastianXVII/DialoGPT-small-TVD
417
+ Sired/DialoGPT-small-trumpbot
418
+ Siyris/DialoGPT-medium-SIY
419
+ Siyris/SIY
420
+ Skywhy/DialoGPT-medium-Churchyy
421
+ Snaky/StupidEdwin
422
+ Soapsy/DialoGPT-mid-cartman
423
+ SonMooSans/DialoGPT-small-joshua
424
+ SonMooSans/test
425
+ Sora4762/DialoGPT-small-naruto
426
+ Sora4762/DialoGPT-small-naruto1.1
427
+ Soumyajit1008/DialoGPT-small-harryPotterssen
428
+ SpacyGalaxy/DialoGPT-medium-Gandalf
429
+ Spectrox/emmybot
430
+ Spirax/DialoGPT-medium-sheldon
431
+ Spoon/DialoGPT-small-engineer
432
+ Stabley/DialoGPT-small-evelynn
433
+ Stevo/DiagloGPT-medium-spamton
434
+ Stoned-Code/DioloGPT-large-Rick-SC-420
435
+ Sunnydx/BillCipherBot
436
+ TTYU/DialoGPT-small-trump
437
+ TVLG/DialoGPT-small-Iroh-Bot
438
+ Taramiko/DialoGPT-small-hoshiyo_kojima
439
+ Taramiko/Hoshiyo_Kojima
440
+ Tejasvb/DialoGPT-small-rick
441
+ Tejasvb/DialogGPT-small-rick
442
+ ThatSkyFox/DialoGPT-medium-joshua
443
+ ThatSkyFox/DialoGPT-small-joshua
444
+ The-Programmer-With-Cool-Pens/TifaBotAIPackage
445
+ TheCatsMoo/DialoGGPT-small-joshua
446
+ TheDiamondKing/DialoGPT-small-harrypotter
447
+ ThePeachOx/DialoGPT-small-harry
448
+ TheReverendWes/DialoGPT-small-rick
449
+ TheTUFGuy/HermioneChatBot
450
+ Thejas/DialoGPT-small-Stewei
451
+ Thejas/DialoGPT-small-elon
452
+ ThoracicCosine/DialoGPT-small-harrypotter
453
+ Tidum/DialoGPT-large-Michael
454
+ Toadally/DialoGPT-small-david_mast
455
+ Tofu05/DialoGPT-large-boon2
456
+ Tofu05/DialoGPT-med-boon3
457
+ TofuBoy/DialoGPT-medium-Yubin2
458
+ TofuBoy/DialoGPT-medium-boon
459
+ Tr1ex/DialoGPT-small-rick
460
+ TrebleJeff/DialoGPT-small-Michael
461
+ TrimPeachu/Deadpool
462
+ Trixzy/rickai-v1
463
+ Tropics/DialoGPT-small-peppa
464
+ UKJ5/DialoGPT-small-harrypotter
465
+ Username1/Mourinhio-medium
466
+ Username1/Mourinho
467
+ Username1/Wenger
468
+ VLRevolution/DialogGPT-small-GGODMODEL
469
+ VMET/DialoGPT-small-dumbassbot
470
+ VaguelyCynical/DialoGPT-small-RickSanchez
471
+ Vampiro/DialoGPT-small-dante_b
472
+ Vampiro/DialoGPT-small-dante_c
473
+ VariableZee/DialoGPT-small-ivylia03
474
+ Verge/Peterbot
475
+ VincentButterfield/DialoGPT-small-harrypotter
476
+ VishalArun/DialoGPT-medium-harrypotter
477
+ Vitafeu/DialoGPT-medium-ricksanchez
478
+ VulcanBin/DialoGPT-small-cortana
479
+ WarrenK-Design/DialoGPT-small-Rick
480
+ Wessel/DiabloGPT-medium-harrypotter
481
+ White/white-bot
482
+ Whitez/DialoGPT-small-twety
483
+ Wise/DialogGPT-small-JC
484
+ WoutN2001/james3
485
+ WurmWillem/DialoGPT-medium-RickandMorty3
486
+ Xeouz/Ultron-Small
487
+ XuguangAi/DialoGPT-small-Harry
488
+ XuguangAi/DialoGPT-small-Leslie
489
+ XuguangAi/DialoGPT-small-Rick
490
+ Yankee/test1234
491
+ Zane/Ricky
492
+ Zane/Ricky3
493
+ Zeer0/DialoGPT-small-ZerO
494
+ Zen1/Derekbot
495
+ Zen1/test1
496
+ Zeph/DialoGPT-small-rick
497
+ Zephaus/Chromrepo
498
+ Zixtrauce/BDBot
499
+ Zixtrauce/BDBot4Epoch
500
+ Zixtrauce/BaekBot
501
+ Zixtrauce/BrandonBot
502
+ Zixtrauce/BrandonBot2
503
+ Zixtrauce/JohnBot
504
+ Zixtrauce/SelfAwareness
505
+ Zuha/DialoGPT-small-gandalf
506
+ a01709042/DialoGPT-medium
507
+ aadilhassan/Chandlerbot
508
+ aashutosh2102/DialoGPT-smalll-harrypotter
509
+ abhiramtirumala/DialoGPT-sarcastic
510
+ abhisht/DialoGPT-medium-Emilybot
511
+ abjbpi/DS_small
512
+ abjbpi/Dwight_Schrute
513
+ aced/DialoGPT-medium-3PO
514
+ adviksinghania/DialoGPT-medium-rick
515
+ af1tang/personaGPT
516
+ aggb/DialogGPT-small-AGGB-B
517
+ aimiekhe/yummv1
518
+ aimiekhe/yummv2
519
+ aishanisingh/DiagloGPT-small-michaelscott
520
+ aishanisingh/DialoGPT-small-harrypotter
521
+ akaushik1/DialoGPT-small-kaiser
522
+ akhooli/personachat-arabic
523
+ alankar/DialoGPT-small-rick
524
+ alipsezzar/DialoGPT-medium-harrypotter
525
+ alistair7/bbt-diagpt2-model
526
+ aluserhuggingface/DialoGPT-small-harrypotter
527
+ alvinkobe/DialoGPT-medium-steve_biko
528
+ alvinkobe/DialoGPT-small-KST
529
+ andikarachman/DialoGPT-small-sheldon
530
+ anduush/DialoGPT-small-Rick
531
+ ange/DialoGPT-medium-Monke
532
+ ankimt01/DialoGPT-small-anch
533
+ ann101020/le2sbot-hp
534
+ anshengli2/DialogGPT-small-Bot
535
+ anweasha/DialoGPT-small-Chandler
536
+ anweasha/DialoGPT-small-Jake
537
+ aplnestrella/Aladdin-Bot
538
+ arampacha/DialoGPT-medium-simpsons
539
+ archmagos/HourAI
540
+ ardatasc/miniMe-version1
541
+ arifbhrn/DialogGPT-small-Rickk
542
+ arnav7633/DialoGPT-medium-tony_stark
543
+ aryanbhosale/DialoGPT-medium-harrypotter
544
+ asad/DialoGPT-small-harryporter_bot
545
+ ashwinchandran13/DialoGPT-small-harrypotter
546
+ astrobreazy/DialoGPT-small-harrypotter
547
+ atkh6673/DialoGPT-small-harrypotter
548
+ atkh6673/DialoGPT-small-trump
549
+ atomsspawn/DialoGPT-small-dumbledore
550
+ augustojaba/DialoGPT-small-harrypotter
551
+ avinashshrangee/DialoGPT-small-Ricky
552
+ awvik360/DialoGPT-medium-plemons
553
+ awvik360/DialoGPT-medium-plemons2
554
+ awvik360/DialoGPT-small-plemons
555
+ aydin/DialoGPT-medium-michael
556
+ ayush19/rick-sanchez
557
+ b0shakk/DialoGPT-small-Ragnar
558
+ balta/DialoGPT-small-TestBot
559
+ banden/DialoGPT-medium-RickBot
560
+ banden/DialoGPT-small-LokiBot
561
+ beatajackowska/DialoGPT-RickBot
562
+ benajtil/DialoGPT-small-Daddyben
563
+ benajtil/DialoGPT-small-RickAndMortyScripts
564
+ benjaminbeilharz/dialoGPT-small-empatheticdialogues-generation
565
+ benmrtnz27/DialoGPT-small-misato
566
+ bensuydam/CartmanBot
567
+ bestminerevah/DialoGPT-small-thetenthdoctor
568
+ bhaden94/LokiDiscordBot-medium
569
+ bhavya689/DialoGPT-large-chandler
570
+ bleachybrain/DialoGPT-med-ss
571
+ bmdonnell/DialoGPT-medium-harrypotter
572
+ bonebambi/DialoGPT-small-ThakirClone
573
+ bookemdan/DialoGPT-small-harrypotter
574
+ boran/berkbot
575
+ boydster/DialoGPT-small-gollum
576
+ brimeggi/testbot2
577
+ brokentx/newbrokiev2
578
+ bspans/DialoGPT-small-yoda
579
+ byeongal/Ko-DialoGPT
580
+ bypequeno/DialoGPT-small-michaelscott
581
+ caps1994/DialoGPT-small-chrisbot-caps1994
582
+ caps1994/DialoGPT-small-chrisbot
583
+ caps1994/DialoGPT-small-harrypotter-caps1994
584
+ cartyparty/DialoGPT-small-harrypotter
585
+ cartyparty/DialoGPT-small-iteration1
586
+ cartyparty/DialoGPT-small-nerdherd
587
+ cedpsam/chatbot_fr
588
+ centon21/DialoGPT-small-harrypotter
589
+ chaitrabhat/DialoGPT-small-rick
590
+ chamindu/DialoGPT-medium-hermione
591
+ chamodkarunasena/DialoGPT-medium-sokka
592
+ chan030609/DialoGPT-medium-JAB
593
+ chan030609/DialoGPT-small-JAB
594
+ chellver24/DialoGPT-medium-chizuru_ichinose
595
+ chip/DialoGPT-small-chizuru
596
+ thu-coai/blenderbot-400M-esconv
597
+ clairesb/kindness_bot
598
+ clairesb/kindness_bot_repo
599
+ clancystudios/DialoGPT-medium-Morty
600
+ clayfox/DialoGPT-medium-Hiccup
601
+ clayfox/DialoGPT-small-Hiccup
602
+ cocoaclef/DialoGPT-small-kohaku
603
+ codealtgeek/DiabloGPT-medium-rickmorty
604
+ colochoplay/DialoGTP-small-harrypotter
605
+ conniezyj/DialoGPT-small-snape
606
+ cookirei/DialoGPT-medium-Joreyar
607
+ cosmic/DialoGPT-Rick
608
+ cosmicray001/prod-harry
609
+ cosmicray001/small-harry
610
+ crystalgate/DialoGPT-small-rick
611
+ cumtowndiscord/DialoGPT-small-joshua
612
+ cutiebunny639/DialoGPT-small-harry
613
+ d4rk/harry
614
+ danildany/DialoGPT-small-MichaelScott
615
+ danny481/DialoGPT-small-datnguyenchatbot
616
+ danny481/DialoGPT-small-harrypotter
617
+ danny481/Final_ChatBot
618
+ darkzek/chickenbot-jon-snow
619
+ darthboii/DialoGPT-small-PickleRick
620
+ darthboii/DialoGPT-small-Rick
621
+ dats/DialoGPT-small-harrypotter
622
+ dattam/DialoGPT-medium-TonyStarkBot
623
+ dead69/GPT-small-yoda
624
+ deepparag/Aeona
625
+ deepparag/DumBot-Beta
626
+ deepparag/DumBot
627
+ delvan/DialoGPT-medium-DwightV1
628
+ df4rfrrf/DialoGPT-medium-Aerith
629
+ dhanushlnaik/amySan
630
+ disdamoe/DialoGPT-small-moe
631
+ disdamoe/TheGreatManipulator
632
+ disdamoe/TheManipulator
633
+ divi/Peterbot
634
+ dk16gaming/DialoGPT-small-HarryPotter
635
+ dkminer81/Tromm
636
+ dreamline2/DialoGPT-small-joshua-demo
637
+ dukeme/DialoGPT-small-RDBotv1
638
+ eclare/DialoGPT-small-SCHAEFER
639
+ educhav/Austin-DialoGPT-small
640
+ educhav/Elijah-DialoGPT-small
641
+ educhav/J-DialoGPT-small
642
+ educhav/Sam-DialoGPT-small
643
+ eklrivera/DialoGPT-small-harrypotter
644
+ eldritch-axolotl/Rick
645
+ ericklasco/DialoGPT-small-erickHarryPotter
646
+ ericzhou/DialoGPT-Medium-Rick
647
+ ericzhou/DialoGPT-Medium-Rick_v2
648
+ ericzhou/DialoGPT-medium-elon
649
+ ericzhou/tsundere_v1
650
+ estehpanas/pascalbot
651
+ ethzhou/jooby
652
+ ethzhou/joobyChat
653
+ ethzhou/newJooby
654
+ f00d4tehg0dz/Peppa
655
+ f00d4tehg0dz/Yoda
656
+ facebook/blenderbot-1B-distill
657
+ facebook/blenderbot-3B
658
+ facebook/blenderbot-400M-distill
659
+ facebook/blenderbot-90M
660
+ facebook/blenderbot_small-90M
661
+ faketermz/DialoGPT
662
+ fatemaMeem98/DialoGPT-medium-HermioneGrangerBot
663
+ felinecity/DioloGPT-small-KaeyaBot
664
+ felinecity/DioloGPT-small-KaeyaBot2
665
+ felinecity/DioloGPT-small-LisaBot
666
+ felinecity/ScaraBot
667
+ fibruh/DialoGPT-small-harrypotter
668
+ flakje/DialoGPT-small-Marty
669
+ flooptherocket/DialogGPT-small-rick
670
+ ftnvir/DialoGPT-medium-bullyMaguire
671
+ gabtan99/dialogpt-tagalog-medium-10
672
+ gabtan99/dialogpt-tagalog-medium-20
673
+ gabtan99/dialogpt-tagalog-medium-30
674
+ gabtan99/dialogpt-tagalog-medium
675
+ gfdream/dialogpt-small-familyguy
676
+ gfdream/dialogpt-small-harrypotter
677
+ ghhostboy/DialoGPT-medium-connorDBH3-1
678
+ ghhostboy/DialoGPT-medium-connorDBH3-21
679
+ gizmo-dev/DialoGPT-small-jake
680
+ gorkemgoknar/gpt2chatbotenglish
681
+ grayson124/chatbotwaifu
682
+ grounddominator/DialoGPT-lar-Rick
683
+ gusintheshell/DialoGPT-small-rickbot
684
+ gwima/ryan-sackmott
685
+ hama/Doctor_Bot
686
+ hama/Harry_Bot
687
+ hama/barney_bot
688
+ hama/me0.01
689
+ hama/rick_bot
690
+ heabeoun/DiabloGPT-small-nuon-conv
691
+ henryoce/DialoGPT-small-rick-and-morty
692
+ hervetusse/DialogGPT-small-harrypotter
693
+ hireddivas/DialoGPT-small-ray
694
+ hireddivas/DialoGPT-small-scully
695
+ hireddivas/dialoGPT-small-mulder
696
+ hireddivas/dialoGPT-small-phil
697
+ hireddivas/dialoGPT-small-sonic
698
+ honguyenminh/old-zhongli
699
+ houssaineamzil/DialoGPT-small-joey
700
+ hrv/DialoGPT-small-rick-morty
701
+ hyunwoongko/blenderbot-9B
702
+ hyunwoongko/reddit-3B
703
+ hyunwoongko/reddit-9B
704
+ iamalpharius/GPT-Small-BenderBot
705
+ ianc89/hagrid
706
+ ignkai/DialoGPT-medium-spider-man-updated
707
+ ilikeapple12/DialoGPT-small-Phos
708
+ imran2part/DialogGPT-small-Doctor
709
+ imrit1999/DialoGPT-small-MCU
710
+ myynirew/DialoGPT-medium-ettengiv
711
+ myynirew/DialoGPT-medium-leirbag
712
+ myynirew/DialoGPT-small-awazimuruk
713
+ ionite/DialoGPT-large-Sh0rtiAI-v2
714
+ ionite/DialoGPT-medium-IoniteAI
715
+ ionite/DialoGPT-medium-McKayAI-v2
716
+ ionite/DialoGPT-medium-McKayAI
717
+ ionite/DialoGPT-medium-Sh0rtiAI
718
+ ionite/DialoGPT-medium-mohnjilesAI
719
+ ionite/DialoGPT-medium-orangeAI
720
+ ironman123/DialoGPT-small-harrypotter
721
+ ishraaqparvez/DialoGPT-small-harrypotter
722
+ jackky46/DialoGPT-medium-got
723
+ jahz/DialoGPT-medium-FF8
724
+ jalensmh/DialoGPT-medium-jalenbot
725
+ jalensmh/DialoGPT-small-exophoria
726
+ jamestop00/DialoGPT-spike-medium
727
+ jasper/DialoGPT-large-homersimpson
728
+ jchen/DialoGPT-evan
729
+ jeanlks/DialogGPT-small-gayvid
730
+ jeanlks/DialogGPT-small-pato
731
+ jfhr1999/CharacterTest
732
+ jogp10/DialoGPT-medium-arya
733
+ jollmimmim/DialoGPT-small-monkeydluffy
734
+ jordanhagan/DialoGPT-medium-NegaNetizen
735
+ josephmagnayon/DialoGPT-medium-Alfred
736
+ josepjulia/RepoHumanChatBot
737
+ josh8/DialoGPT-medium-josh
738
+ josh8/DialoGPT-small-josh
739
+ jpsxlr8/DialoGPT-small-harrypotter
740
+ jth1903/DialoGPT-small-rick
741
+ julianolf/DialoGPT-small-harrypotter
742
+ kaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaot1k/DialoGPT-small-Wanda
743
+ kagennotsuki/DialoGPT-medium-radion
744
+ kche0138/DialoGPT-medium-DIO
745
+ kingabzpro/DialoGPT-small-Rick-Bot
746
+ kipiiler/Rickbot
747
+ knightbat/harry-potter
748
+ kripanshudixit/DialoGPT-small-phoenix
749
+ kris/DialoGPT-small-spock
750
+ kris/DialoGPT-small-spock3
751
+ kris/DialoGPT-small-spock4
752
+ kris/DialoGPT-small-spock5
753
+ kshitiz/testing-bot-repo
754
+ kunalbhargava/DialoGPT-small-housebot
755
+ kvothe28/DiabloGPT-small-Rick
756
+ l41n/c3rbs
757
+ lain2/Peterbot
758
+ lanejm/DialoGPT-small-hagrid
759
+ lapacc33/DialoGPT-medium-rick
760
+ life4free96/DialogGPT-med-TeiaMoranta
761
+ life4free96/DialogGPT-med-TeiaMoranta3
762
+ light/small-rickk
763
+ limivan/DialoGPT-small-c3po
764
+ cosmicroxks/DialoGPT-small-scott
765
+ logube/DialogGPT_small_harrypotter
766
+ lonewanderer27/DialoGPT-small-Joshua
767
+ lonewanderer27/KeitaroBot
768
+ lonewanderer27/YoshinoriBot
769
+ lonewanderer27/YuriBot
770
+ lovellyweather/DialoGPT-medium-johnny
771
+ luca-martial/DialoGPT-Elon
772
+ lucas-bo/DialogGPT-small-yoda
773
+ ludowoods/KujouSara
774
+ lulueve3/DialoGPT-medium-Kokkoro
775
+ lulueve3/DialoGPT-medium-Kokkoro2
776
+ madbuda/DialoGPT-got-skippy
777
+ madbuda/DialoGPT-medium-skippy
778
+ majonez57/JoeBot
779
+ manav/dialogpt-large-kanye-reddit
780
+ manav/dialogpt-medium-berkeley-reddit
781
+ maniacGhost24/MichaelScott-bot-push-small
782
+ manraf/DialoGPT-smmall-harrypotter
783
+ matprado/DialoGPT-small-rick-sanchez
784
+ maxxx2021/DialGPT-small-harrypotter
785
+ mdc1616/DialoGPT-large-sherlock
786
+ melon422/DialoGPT-medium-MelonBot
787
+ melon422/DialoGPT-medium-MelonBot2
788
+ mewmew/DialoGPT-small-rick
789
+ michelleshx/DialoGPT-small-michelle-discord-bot
790
+ microsoft/DialoGPT-large
791
+ microsoft/DialoGPT-medium
792
+ microsoft/DialoGPT-small
793
+ mikabeebee/Peterbot
794
+ milayue/neosh-bot1
795
+ minsiam/DialoGPT-medium-harrypotterbot
796
+ minsiam/DialoGPT-small-harrypotterbot
797
+ miogfd1234/ll
798
+ mittalnishit/DialoGPT-medium-rickman2
799
+ mittalnishit/DialoGPT-small-rickman
800
+ mjstamper/DialoGPT-small-samwise
801
+ mk3smo/dialogpt-med-ahiru
802
+ mk3smo/dialogpt-med-duck2
803
+ mk3smo/dialogpt-med-duck3
804
+ mk3smo/dialogpt-med-duck5
805
+ mk3smo/dialogpt-med-duckfinal
806
+ mk3smo/dialogpt-med-stt3
807
+ mklucifer/DialoGPT-medium-DEADPOOL
808
+ mklucifer/DialoGPT-small-DEADPOOL
809
+ mluengas/DialogGPT-small-michaelscott
810
+ model-mili/DailoGPT-Yukub-v3
811
+ model-mili/DialoGPT-small-Sapph-v1
812
+ model-mili/DialoGPT-small-Yukub-v2
813
+ model-mili/DialoGPT-small-Yukub
814
+ mohammedks713/DialoGPT-small-harrypotter
815
+ mohammedks713/DialoGPT-small-jonsnow
816
+ mra1ster/DialoGPT_scully_small
817
+ muhardianab/DialoGPT-small-theoffice
818
+ munezah/DialoGPT-small-aot
819
+ munezah/DialoGPT-small-sherlock
820
+ mutamuta/DialoGPT-small-rick
821
+ mutamuta/DialoGPT-spongebob-small
822
+ namanrana16/DialoGPT-small-TrumpBot
823
+ nanometeres/DialoGPT-medium-halbot
824
+ nanometeres/DialoGPT-small-halbot
825
+ ncoop57/DiGPTame-medium
826
+ niharikadeokar/DialoGPT-small-Jakebot
827
+ nikhilpatil2532000/DialoGPT-small-harrypotter
828
+ nimrazaheer/DialoGPT-small-harrypotter
829
+ nitishk/IronStarkBot
830
+ nlokam/DialoGPT-digibot3.0-new
831
+ nlokam/Digibot
832
+ nlokam/ada_V.3
833
+ nlokam/ada_V.6
834
+ nlokam/ada_V.7
835
+ nlokam/books_to_bots_v.00
836
+ noobed/DialoGPT-small-astley
837
+ norie4/DialoGPT-small-kyutebot
838
+ norie4/DialoGPT-small-memoji
839
+ not7even/DialoGPT-small-7evenpool
840
+ npc-engine/exported-bart-light-gail-chatbot
841
+ ntjrrvarma/DialoGPT-small-RickBot
842
+ nwl/DialoGPT-small-enhypen
843
+ nytestalkerq/DialoGPT-medium-joshua
844
+ oakkas/Dialge-small-harrypotter-oguz
845
+ odinmay/joebot
846
+ odinmay/zackbotmodel
847
+ ogpat123/DialoGPT-small-Michael
848
+ ogpat23/Jules-Chatbot
849
+ omkar1309/RickBot
850
+ omnimokha/DialoGPT-medium-jakeamal
851
+ omnimokha/DialoGPT-small-jakeamal
852
+ omnimokha/jakebot2
853
+ oododo/DialoGPT-small-elon
854
+ otto-camp/DialoGPT-small-RickBot
855
+ overgrowth/jokeboy
856
+ owencubes/DialoGPT-small-Josuke
857
+ paladinx00/rh-bender
858
+ parigaswetha/DialoGPT-small-jakeperalta
859
+ parthsinha/DialoGPT-small-rickandmorty
860
+ pashin/DialoGPT-small-ironman-2
861
+ pashin/DialoGPT-small-ironman-3
862
+ pashin/DialoGPT-small-ironman1
863
+ pastlecry/DialoGPT-small-harrypotter
864
+ peamjo/DialoGPT-small-morty
865
+ person123/DialoGPT-small-petergriffin
866
+ pewriebontal/DialoGPT-medium-Pewpewbon
867
+ phantom-deluxe/dialoGPT-RickBot
868
+ phantom-deluxe/dialoGPT-harry
869
+ phozon/harry-potter-medium
870
+ piyushdubey/DialoGPT-Mi
871
+ pompeiifreckles/DialoGPT-medium-Rick
872
+ ppn/DialoGPT-small-harrypotter
873
+ pranavtharoor/test
874
+ professional/DialoGPT-small-joshua
875
+ ps2102/DialoGPT-small-harrypotter
876
+ psblade/DialoGPT-medium-PotterBot
877
+ puugz/DialoGPT-small-spiderman
878
+ qwerty/DialoGPT-small-rick
879
+ r3cdhummingbird/DialoGPT-medium-joshua
880
+ r3dhummingbird/DialoGPT-medium-joshua
881
+ r3dhummingbird/DialoGPT-medium-neku
882
+ r3dhummingbird/DialoGPT-small-harrypotter
883
+ r3dhummingbird/DialoGPT-small-neku
884
+ rachelcorey/DialoGPT-medium-kramer
885
+ rachelcorey/DialoGPT-medium-niles
886
+ rafakat/Botsuana-rick
887
+ rahul26/DialoGPT-small-rickandmorty
888
+ rahulMishra05/discord-chat-bot
889
+ raj2002jain/DialoGPT-small-Light
890
+ ravephelps/DialoGPT-small-MichaelSbott
891
+ redbloodyknife/DialoGPT-medium-shayo
892
+ rhollings/DialoGPT_small_steverogers
893
+ richiellei/Childe
894
+ richiellei/Childe3
895
+ richiellei/DialoGPT-small-rick
896
+ richielleisart/Childe
897
+ ridwanpratama/DialoGPT-small-misaki
898
+ rinz/DialoGPT-small-Harry-Potterrr
899
+ rlagusrlagus123/XTC20000
900
+ rlagusrlagus123/XTC4096
901
+ rmicheal48/DialoGPT-small-steven_universe
902
+ rodrigodz/DialoGPT-medium-dxd
903
+ romuNoob/Mine
904
+ romuNoob/test
905
+ rovai/AI
906
+ rovai/CARRIE
907
+ rovai/Chat_pytorch1
908
+ rovai/chatbotmedium1
909
+ rovai/chatbotmedium2
910
+ rovai/chatbotmedium3
911
+ rovai/chatbotmedium4
912
+ rovai/chatbotone
913
+ rpeng35/DialoGPT-small-erenyeager
914
+ rrtong/DialoGPT-medium-shang-chi
915
+ rsd511/DialoGPT-small-house
916
+ rsedlr/RickBot
917
+ rsedlr/RickBotExample
918
+ ruriko/bacqua
919
+ ruriko/konoaqua
920
+ ruriko/konodio
921
+ sachdevkartik/DialoGPT-small-rick
922
+ saintseer121323/DialoGPT-small-kotonoha
923
+ sakai026/Chizuru
924
+ sakai026/Mizuhara
925
+ sam213/DialoGPT-small-harrypotter
926
+ sambotx4/scamantha
927
+ samuelssonm/DialoGPT-small-rick
928
+ sanjanareddy226/JakeBot
929
+ sankalpjha1/mr.bot_haary
930
+ satkinson/DialoGPT-medium-marvin
931
+ satkinson/DialoGPT-small-marvin
932
+ satvikag/chatbot
933
+ satvikag/chatbot2
934
+ sergunow/movie-chat
935
+ setiadia/DialogGPT-small-HPBot
936
+ shelb-doc/DialoGPT-medium-ash
937
+ shihab/HarryPotter
938
+ shonuff/DialoGPT-medium-konosuba
939
+ shreeshaaithal/DialoGPT-small-Michael-Scott
940
+ shreeshaaithal/Discord-AI-bot
941
+ shreeshaaithal/whatsapp-medium-bot-2
942
+ sidkhuntia/harrypotter
943
+ sifclairhelix/DialoGPT-small-harrypot
944
+ simrana5/RickBotExample
945
+ skynex/DialoGPT-small-batman
946
+ skynex/DialoGPT-small-finalbatman
947
+ sleekmike/DialoGPT-small-joshua
948
+ smilesandtea/DialoGPT-medium-Rick
949
+ smmzhu/DialoGPT-small-SZ
950
+ solfer/DialoGPT-small-ryuji
951
+ spockinese/DialoGPT-small-sherlock
952
+ sreyanghosh/DialoGPT-medium-joker
953
+ srirachasenpai/DialoGPT-medium-harrypotter
954
+ srv/DialoGPT-medium-Breaking_Bad
955
+ ssam/DialoGPT-small-RickmfSanchez
956
+ ssspider/DialoGPT-medium-harrypotter
957
+ stfuowned/nek
958
+ stfuowned/rick
959
+ sthom/DialoGPT-small-tin
960
+ sudip/bot1
961
+ sudoabrar/DialoGPT-small-dwight
962
+ suhasjain/DailoGPT-small-harrypotter
963
+ swapnil165/DialoGPT-small-Rick
964
+ terter/rick-bot-test-v2
965
+ thatoneguy267/DialoGPT-small-Oscar
966
+ thatoneguy267/bruhpleasehelpme
967
+ theChanChanMan/DialoGPT-small-chandler
968
+ thefryingpan/gpt-neo-125M-splishy
969
+ theiconik/hermione-granger
970
+ thesamuelpena/Dialog-medium-Sonic
971
+ thesamuelpena/Dialog-medium-masterchief
972
+ thetlwin/DialoGPT-small-ironman
973
+ thinhda/chatbot
974
+ thu-coai/CDial-GPT2_LCCC-base
975
+ thu-coai/CDial-GPT_LCCC-base
976
+ thu-coai/CDial-GPT_LCCC-large
977
+ ticet11/DialoGPT-small-BOBBY
978
+ timslams666/DialoGPT-small-rick
979
+ tinega/DialoGPT-small-harrypotter
980
+ tngo/DialoGPT-small-HankHill
981
+ toiletwater/DialoGPT-medium-ironman
982
+ tom1804/HP
983
+ tom1804/HP_last
984
+ tom1804/hp_new
985
+ tomascerejo12/DialoGPT-small-Rick
986
+ tosin/dialogpt_mwoz
987
+ tosin/dialogpt_sv
988
+ toyfreak/DialoGPT-small-addy
989
+ toyfreak/DialoGPT-small-shy
990
+ tpri/DialoGPT-small-pa
991
+ tprincessazula/Dialog-GPT-small-AANG
992
+ tprincessazula/Dialog-GPT-small-KATARA-AVATAR
993
+ tprincessazula/Dialog-GPT-small-SOKKA-AVATAR
994
+ tprincessazula/Dialog-GPT-small-harrypotter
995
+ transfaeries/DialoGPT-Discord
996
+ transfaeries/DialoGPT-medium-Discord-1.0
997
+ transfaeries/DialoGPT-small-Discord-1.0
998
+ transfaeries/Twilight-Sparkle-GPT
999
+ trig/DialoGPT-small-harrypotter
1000
+ trig/multiverse-second
1001
+ trig/multiverse
1002
+ trig/sokka-chatbot-test
1003
+ trig/tlok-test
1004
+ troythewar/DialogGPT-small-harrypotter
1005
+ truthisneverlinear/EleventhDoctor
1006
+ ttntran/DialoGPT-small-human
1007
+ tuantt/GroundNet
1008
+ ughvom/Ginger
1009
+ ughvom/britnayBOTMAIN
1010
+ umr55766/DialogGPT-small-peppa-pig
1011
+ usamazaheer/DialoGPT-small-harrypotter
1012
+ uutkras/Pandabot
1013
+ uyharold86/DialoGPT-small-RickAndMorty
1014
+ valarikv/DialoGPT-small-bateman
1015
+ vibranium19/DialoGPT-medium-jake
1016
+ victordata/DialoGPT-small-Rick
1017
+ victorswedspot/DialoGPT-small-gandalf
1018
+ vijayv500/DialoGPT-small-Big-Bang-Theory-Series-Transcripts
1019
+ vijote/DialoGPT-small-Morty
1020
+ vivek-g-2009/DialoGPT-medium-harrypotter
1021
+ vlco-o/NLboto_o-aki-dialogpt
1022
+ vlco-o/NLboto_o-small-dialogpt
1023
+ wadeed/DialogGPT-small-chandlerbingg
1024
+ wanderer/DialoGPT-small-Phoebe
1025
+ wjching/DialoGPT-small-ricksanchez
1026
+ won/DialoGPT-small-harrypotter
1027
+ worms3401/DialoGPT-small-Eleonora
1028
+ worsterman/DialoGPT-small-mulder
1029
+ wtrClover/DialoGPT-small-Flutterbot
1030
+ wtrClover/DialoGPT-small-TwilightBot
1031
+ xdmason/pretrainedCas
1032
+ xiaoheiqaq/DialoGPT-mediumJojo
1033
+ xiaoheiqaq/DialoGPT-smallharrypotter
1034
+ yahya1994/DialoGPT-small-AOT-Eren
1035
+ yahya1994/DialoGPT-small-DN-L
1036
+ yahya1994/DialoGPT-small-DN-Light
1037
+ yahya1994/DialoGPT-small-DN-Ryuk
1038
+ yahya1994/DialoGPT-small-Gintama-Gintoki
1039
+ yahya1994/DialoGPT-small-Parasyte-Migi
1040
+ yahya1994/DialoGPT-small-ReZero-Rem
1041
+ yahya1994/DialoGPT-small-ReZero-Subaru
1042
+ yahya1994/DialoGPT-small-Ryuk
1043
+ yusufmorsi/georgebot
1044
+ zaydzuhri/lelouch-medium
1045
+ zemi/jakebot
1046
+ zen-satvik/BotGPT-medium-HP
1047
+ zentos/DialoGPT-small-spongebob
1048
+ zinary/DialoGPT-small-rick-new
1049
+ zuto37/DialoGPT-small-sadao
1050
+ Maxwere/DiabloGPT-medium-maxbot
1051
+ Grungle/DialoGPT-medium-butters
1052
+ sadkat/technoai
1053
+ Grungle/DialoGPT-medium-butters2
1054
+ kookyklavicle/sean-diaz-bot
1055
+ kookyklavicle/sean-diaz
1056
+ Aquasp34/DialoGPT-small-aqua1
1057
+ zenham/khemx
1058
+ aryanbhosale/smartharrypotterbot
1059
+ Britain/DialoGPT-small-ZifBotTwoFixed
1060
+ Britain/DialoGPT-small-DanyBotThree
1061
+ infinitylyj/DialogGPT-small-rick
1062
+ infinitylyj/DialogGPT-small-general
1063
+ infinitylyj/DialogGPT-medium-general
1064
+ jackyv/DialoGPT-small-pinocchio
1065
+ Freak55/DialoGPT-small-Phoenix-Wright
1066
+ Britain/DialoGPT-small-DanyBotThreeFixed
1067
+ Britain/DialoGPT-small-DanyBotTwo
1068
+ P4RZ1V4L/DialoGPT-medium-tonystark
1069
+ Britain/DialoGPT-small-DanyBotTwoNew
1070
+ zenham/mskeen_m_e4_16h
1071
+ zenham/khemx_m_e4_16h
1072
+ zenham/wail_m_e4_16h_2k
1073
+ RTM/vilang
1074
+ BeanBoi50404/DialoGPT-small-PeppaPigButBetter
1075
+ nabin19677/small-cartman
1076
+ Prime2911/DialoGPT-small-handsomejack
1077
+ Starry/KARENTRIES
1078
+ dietconk/DialogGPT-small-Orange
1079
+ mafeu/DialoGPT-medium-willem
1080
+ Prime2911/DialoGPT-medium-handsomejack
1081
+ Meowren/DialoGPT-small-Rick-Bot
1082
+ DB13067/Peterbot
1083
+ Savitar/DialoGPT-medium-RickandMorty
1084
+ MolePatrol/Olbot
1085
+ erinchocolate/DialoGPT-small-harrypotter
1086
+ Valouzze/FairuvenIA
1087
+ MehSatho/Tai-medium-Hermione
1088
+ Valouzze/MegaIA
1089
+ Makinitas/DialoGPT-small-RickAndMortyScripts
1090
+ darthrussel/DialoGPT-small-rickandmorty
1091
+ vanilladucky/Friends_chatting_bot
1092
+ vanilladucky/Friends_chatting_bot_redefined
1093
+ chocoduck/Joey_bot
1094
+ duanxingjuan/DialoGPT-medium-DEMON_SLAYER
1095
+ pinkducky/Monica_Bot
1096
+ Starry/HELLORUKAS
1097
+ pinkducky/Rachel_Bot
1098
+ trig/multiverse-third
1099
+ pinkducky/Ross_Bot
1100
+ duanxingjuan/DialoGPT-large-DEMON_SLAYER_v1
1101
+ duanxingjuan/DialoGPT-large-DEMON
1102
+ duanxingjuan/DialoGPT-large-DEMON1
1103
+ issue89/DialoGPT-small-house
1104
+ LeonLi279/DialoGPT-small-harrypotter
1105
+ MolePatrol/DialoGPT-Medium-ConnerBot
1106
+ MolePatrol/DialoGPT-Medium-MoleBot
1107
+ TheDaydreamer/ricky
1108
+ BeamBee/DialoGPT-small-Lavenza
1109
+ Garsic/DialoGPT-medium-pecorine
1110
+ CallForEcho/DialoGPT-small-harrypotter
1111
+ BeamBee/DialoGPT-small-LavenzaNumTwo
1112
+ Meowren/MichaelScottBott
1113
+ shalpin87/dialoGPT-homer-simpson
1114
+ darthrussel/DialoGPT-small-homerbot-halfdata
1115
+ TheGoldenToaster/DialoGPT-medium-Woody
1116
+ bemich/DialoGPT-small-GeorgeCostanza
1117
+ AAAA-4/DialoGPT-small-player_03
1118
+ Teyronebigdick/DialoGPT-small-harrypotter
1119
+ Sammith/DialoGPT-small-miachael
1120
+ Nxtxn01/DialoGPT-small-harrypotter
1121
+ Teyronebigdick/DialoGPT-small-terrydavis
1122
+ mczolly/DialoGPT-small-the-doctor
1123
+ crazypegasus/GPT-JonSnow
1124
+ MrYiRen/DialoGPT-small-harrypotter
1125
+ TropicalJuice/Dialog-PeterGriffin
1126
+ TheGoldenToaster/DialoGPT-medium-Bot
1127
+ MrYiRen/DialoGPT-small-harrypotter2
1128
+ gulgulglut/DialoGPT-small-Rick
1129
+ trev/DialoGPT-small-MLP
1130
+ RAJESHNEMANI/Chatbot_AI
1131
+ lilapapazian/DialoGPT-small-harrypotter
1132
+ Alethea/GPT2-chitchat
1133
+ florentiino/DialoGPT-small-harrypotter
1134
+ NUTELEX/Eva
1135
+ jessicammow/DialoGPT-small-ronswanson
1136
+ MrYiRen/DialoGPT-small-ZC
1137
+ jessicammow/DialoGPT-medium-leslieknope
1138
+ AmbricJohnson5888/death
1139
+ AmbricJohnson5888/claura
1140
+ DarrellTimothy/DialoGPT-small-harrypotter
1141
+ RarePizzaDog/Apes_Bot
1142
+ iyedr8/DialoGPT-small-rick
1143
+ MEDT/ChatBot
1144
+ NonzeroCornet34/DialoGPT-small-hansolo
1145
+ NonzeroCornet34/DialoGPT-small-philbot
1146
+ atomsspawn/DialoGPT-medium-dumbledore
1147
+ florentiino/DialoGPT-small-rick
1148
+ ShibaDeveloper/DialoGPT-small-harrypotter
1149
+ sahilnare78/DialogGPT-medium-harrypotter
1150
+ Garsic/DialoGPT-medium-jill
1151
+ mdm/DialoGPT-small-Kanye
1152
+ ScyKindness/Hatsune_Miku
1153
+ aaaacash/DialoGPT-large-michaelscott
1154
+ AntoDono/DialoGPT-Harry
1155
+ BFMeriem/model
1156
+ BFMeriem/chatbot-model
1157
+ StringCheese/Dialog-small-bigbang
1158
+ jakewillms17/capcake-model
1159
+ Shivierra/DialoGPT-small-technoblade
1160
+ Scaprod/DialoGPT-small-arbiter
1161
+ Tlacaelel/DialoGPT-small-jarvis
1162
+ spuun/kekbot-beta-1
1163
+ Coma/Beter
1164
+ Wavepaw/DialoGPT-medium-WardenIngo
1165
+ Akarsh3053/potter-chat-bot
1166
+ MachineBabs/RickBot
1167
+ MachineBabs/DocBrown
1168
+ spuun/kekbot-beta-1-medium
1169
+ MEDT/Chatbot_Medium
1170
+ tosin/dialogpt_mwoz_idioms
1171
+ tosin/dialogpt_afriwoz_wolof
1172
+ aakhilv/tonystark
1173
+ spuun/kekbot-beta-2-medium
1174
+ xiaoGato/DialoGPT-small-villanelle
1175
+ Jonesy/DialoGPT-small_FG
1176
+ deathknight67/DialoGPT-medium-joshua
1177
+ kyriinx/DialoGPT-small-glyph
1178
+ Jonesy/DialoGPT-medium_FG
1179
+ spuun/kekbot-beta-3-medium
1180
+ Lisia/DialoGPT-small-connor
1181
+ awvik360/DialoGPT-medium-plemons-04262022
1182
+ Jonesy/LisaOnIce
1183
+ kvnaraya/DialoGPT-small-michael
1184
+ Hyperspace/DialoGPT-small-Hyperdrive
1185
+ Azuris/DialoGPT-medium-ekidona
1186
+ aditeyabaral/sonobois
1187
+ Jonesy/HomersNightOut
1188
+ Andrei0086/Chat-small-bot
1189
+ awvik360/UncleRuckus
1190
+ captainswiftfox/rickandmorty
1191
+ radicalrascal/DialoGPT-medium-jimmy
1192
+ dmoz47/DialoGPT-small-peterparker
1193
+ niprestige/GPT-small-DusabeBot
1194
+ Shakerlicious/DialoGPT-small-descentbot
1195
+ atomsspawn/DialoGPT-small-shelbot
1196
+ atomsspawn/DialoGPT-small-sheldon
1197
+ Willow/DialoGPT-medium-willow
1198
+ IsekaiMeta/dapprf
1199
+ farjvr/DialoGPT-small-Mortyfar
1200
+ InSaiyan/DialoGPT-small-harrypotter
1201
+ IsekaiMeta/dapprf3
1202
+ emolyscheisse/DialoGPT-small-mandybot
1203
+ IsekaiMeta/dapprf4
1204
+ qgdmonilla/DialoGPT-small-harrypotter
1205
+ NHStudios/DialoGPT-small-jake
1206
+ Shakerlicious/DialoGPT-small-raquelbot
1207
+ annasham/DialoGPT-small-myneighborTotoro
1208
+ CaptAdorable/RickBot
1209
+ Willow/DialoGPT-large-willow
1210
+ Kabutopusu/DialoGPT-medium-NITWMae
1211
+ HarmlessTarget/DialoGPT-medium-Bender
1212
+ soni69/DialoGPT-medium-holmes
1213
+ captainswiftfox/DialoGPT-small-rick
1214
+ kathywu/DialoGPT-small-kathy
1215
+ mybot/DialoGPT-medium-harrypotter
1216
+ Dedemg1988/DialoGPT-small-michaelscott
1217
+ pedrobaiainin/DialoGPT-small-harrypotter
1218
+ kathywu/DialoGPT-medium-kathy
1219
+ SNCannon/DialoGPT-medium-merc
1220
+ THE-DDLM/DialoGPT-sebastian
1221
+ fatirali/DialoGPT-medium-harrypotter
1222
+ TejasARathod/DialoGPT-medium-BatmanBot
1223
+ Varick/dialo-jarvis
1224
+ Robinsd/HarryBot
1225
+ dipstheman/DialoGPT-small-humanconversation
1226
+ dipstheman/DialoGPT-small-humanconversationpart
1227
+ LinkTheSinger/DialoGPT-small-Kanna
1228
+ LinkTheSinger/DialoGPT-small-Kannav4
1229
+ Robinsd/HarryBot4
1230
+ SomeRandomGuy/tony
1231
+ Meowren/HumanBot
1232
+ marcoperez/DialoGPT-small-rickandmorty
1233
+ LarsBell/DialoGPT-small-billyloomis
1234
+ okwach/mawaidhaChatbot
1235
+ LooksLikeIveLost/DialoGPT-medium-me
1236
+ okwach/mawaidhaChatbot2
1237
+ thebyy/DialoGPT-small-mortyisarick
1238
+ rongina/DialoGPT-small-cartman
1239
+ fransoa/arrombado-dms
1240
+ ionite/DialoGPT-medium-MarkAI
1241
+ ddrmaster1000/DialoGPT-medium-rick
1242
+ PeritusDux/DialoGPT-small-rick
1243
+ HomerChatbot/HomerSimpson
1244
+ t8oo/DialoGPT-small-zeni
1245
+ t8oo/DialoGPT-small-zenigata
1246
+ sexomq/DialoGPT-medium-TeoBot
1247
+ Char135/DialoGPT-medium-sebastian
1248
+ HomerChatbot/DialoGPT-small-HomerSimpson
1249
+ trev/Twilight-Sparkle
1250
+ gigikenneth/family-guy-bot
1251
+ ulises801/DialoGPT-medium-rick
1252
+ fujuta/DialoGPT-medium-HarryPotter
1253
+ fujuta/DialoGPT-medium-RonWeasley
1254
+ fujuta/DialoGPT-medium-HermioneGrander
1255
+ deepparag/Aeona-Beta
1256
+ HomerChatbot/DialoGPT-small-homersimpsonbot
1257
+ redcy/FrasierBotv1
1258
+ ElMuchoDingDong/DialoGPT-medium-AudreyHepburn
1259
+ natdon/DialoGPT_Michael_Scott
1260
+ ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v3
1261
+ deathmite/DiabloGPT-small-potaru
1262
+ ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v4
1263
+ DaBaap/Chat-Bot-Batman
1264
+ Iwa/bot
1265
+ badlawyer/DialoGPT-medium-sherlock-bot
1266
+ thanhchauns2/DialoGPT-medium-Luna
1267
+ jayklaws0606/DialoGPT-small-jayBot
1268
+ RUCAIBox/mvp
1269
+ Flem/DialoGPT-medium-alastor
1270
+ keans/DialoGPT-small-highjacker
1271
+ jayklaws0606/dgpt-small-jaybot
1272
+ CodeMaestro/DialoGPT-small-TChalla
1273
+ ElMuchoDingDong/AudreyBotBlenderBot
1274
+ stfuowned/rickfinal
1275
+ DuskSigma/DialogGPTHomerSimpson
1276
+ hireddivas/dialoGPT-small-sonic2
1277
+ N0NAne/DialoGPT-small-harrypotter
1278
+ tinkoff-ai/response-quality-classifier-tiny
1279
+ tinkoff-ai/response-quality-classifier-base
1280
+ tinkoff-ai/response-quality-classifier-large
1281
+ tinkoff-ai/response-toxicity-classifier-base
1282
+ RUCAIBox/mvp-open-dialog
1283
+ RUCAIBox/mtl-open-dialog
1284
+ RUCAIBox/mvp-multi-task
1285
+ Cirilaron/DialoGPT-medium-raiden
1286
+ BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch
1287
+ lucataco/DialogGPT-med-Rick
1288
+ lucataco/DialoGPT-medium-rafa
1289
+ gloomyworm/DialoGPT-small-ortho
1290
+ kozlovtsev/DialoGPT-medium-harrypotter
1291
+ Cirilaron/DialoGPT-medium-jetstreamsam
1292
+ lucataco/DialoGPT-medium-omar
1293
+ lucataco/DialoGPT-medium-milo
1294
+ daedalus2003/HouseBot
1295
+ SallyXue/DialoGPT-small-harrypotter
1296
+ Averium/DialoGPT-medium-TailsBot
1297
+ nlokam99/ada_sample
1298
+ nlokam99/ada_sample_2
1299
+ nlokam99/ada_sample_3
1300
+ nlokam/adanimals_V1
1301
+ spuun/kekbot-beta-4-medium
1302
+ quirkys/DialoGPT-small-harrypotter
1303
+ markofhope/DialoGPT-medium-HarringtonBot
1304
+ AntoDono/DialoGPT-Bopy-Alpha-1.01
1305
+ Hermite/DialoGPT-large-hermite
1306
+ robinhad/gpt2-uk-conversational
1307
+ Browbon/DialoGPT-small-LucaChangretta
1308
+ gloomyworm/DialoGPT-medium-ortho
1309
+ Browbon/DialoGPT-medium-LucaChangretta
1310
+ Fluffypillow/DialoGPT-small-Rem
1311
+ Hermite/DialoGPT-large-hermite2
1312
+ Bman/DialoGPT-medium-peppapig
1313
+ ZipperXYZ/DialoGPT-medium-TheWorldMachine
1314
+ AlyxTheKitten/DialoGPT-medium-AgedBlaine-2
1315
+ Averium/DialoGPT-medium-TailsBot1.1
1316
+ Elijah629/DialoGPT-mrsanai
1317
+ ZipperXYZ/DialoGPT-medium-TheWorldMachine2
1318
+ damianruel/DialoGPT-medium-MySon
1319
+ ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive
1320
+ Elijah629/DialoGPT-shrek
1321
+ AlyxTheKitten/DialoGPT-medium-Jimmis-2
1322
+ dennis-fast/DialoGPT-ElonMusk
1323
+ Sealgair/DialoGPT-medium-Eyden
1324
+ crystallyzing/DialoGPT-small-nishikiyama
1325
+ crystallyzing/DialoGPT-small-kiryu
1326
+ NikkiTiredAf/DialoGPT-small-billy2
1327
+ Evokus/DialoGPT-small-harrypotter
1328
+ mcimmy/DialoGPT-small-bob
1329
+ Laggrif/DialoGPT-medium-Luke
1330
+ Laggrif/DialoGPT-medium-3PO
1331
+ ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2
1332
+ prprakash/DialoGPT-small-TonyStark
1333
+ sexomq/TeoBot-Romanian-medium
1334
+ Bman/DialoGPT-medium-dora
1335
+ Hermite/DialoGPT-large-hermite3
1336
+ Averium/FabioBot
1337
+ arem/DialoGPT-medium-rickandmorty
1338
+ soProf1998/DialoGPT-small-chattyrick
1339
+ soProf1998/DialoGPT-medium-chattyrick
1340
+ Dorin/DialoGPT-small-Rick
1341
+ OptimalHoiboy/DialoGPT-small-kasumai
1342
+ Hartmann/DialoGPT-small-koishikomeiji
1343
+ Konbai/DialoGPT-small-akagi
1344
+ Konbai/DialoGPT-small-akagi2
1345
+ JazzyLucas/DialoGPT-small-TonyStark
1346
+ mystery/DialoGPT-small-pinkiepie
1347
+ sexomq/TeoBot-Romanian-medium2
1348
+ erikycd/chatbot_hadita
1349
+ infinix/Sheldon-bot
1350
+ JamesonSpiff/chatBot_test_model
1351
+ Akito1961/DialoGPT-small-C3PO
1352
+ Naturealbe/DialoGPT-small-Technoblade
1353
+ zR0clu/DialoGPT-medium-Mr.Roboto
1354
+ reso/DialoGPT-medium-v3ga
1355
+ trimox/tryingnewstuff
1356
+ Nakul24/YC_Bot
1357
+ casperthegazer/DiabloGPT-medium-lukedot
1358
+ JamesStratford/PLord-bot-DialoGPT-medium
1359
+ CaptPyrite/DialoGPT-small-cat
1360
+ SafeTorpedo/DialoGPT-small-MichaelBot
1361
+ brianveebee/DialoGPT-medium-bender
1362
+ myynirew/DialoGPT-medium-shouko01
1363
+ myynirew/2-0OKUOHS
1364
+ smmzhu/DialoGPT-medium-sam
1365
+ myynirew/shouko0-3
1366
+ myynirew/dumbbot
1367
+ Lamia/DialoGPT-small-Sundrop
1368
+ ashtrindade/chatbot-stacey
1369
+ tinkoff-ai/ruDialoGPT-small
1370
+ tinkoff-ai/ruDialoGPT-medium
1371
+ 24adamaliv/DialoGPT-medium-Will
1372
+ cybertelx/DialoGPT-small-drunkic0n
1373
+ Rick-C137/DialoGPT-small-rick
1374
+ debyve/dumbbot
1375
+ Amir-UL/JimBot
1376
+ BoxCrab/DialoGPT-small-Strider
1377
+ AbdalK25/DialoGPT-small-TheWiseBot
1378
+ casperthegazer/DialoGT-gandalf-urdot
1379
+ pineappleSoup/DialoGPT-medium-707
1380
+ Nakul24/AD_ChatBot
1381
+ TeaTM/DialoGPT-small-bushcat
1382
+ ionite/DialoGPT-medium-NakaAI
1383
+ Creepton/DDLCYuri-DialoGPT-small
1384
+ TeaTM/DialoGPT-large-bushcat
1385
+ yazinga/DialoGPT-medium-scout
1386
+ throwaway112358112358/DialoGPT-medium-script
1387
+ Jingna/test_hpv_discord
1388
+ anonchickenlegs/sartoshi-bot
1389
+ xander-cross/DialoGPT-small-EvilMortyTheBot
1390
+ Bman/DialoGPT-medium-shrek
1391
+ Yank2901/DialoGPT-small-Rick
1392
+ akshatpandeyme/DialoGPT-small-manpreet
1393
+ Jenwvwmabskvwh/DialoGPT-small-josh444
1394
+ akshatpandeyme/DialoGPT-small-parthiv
1395
+ akshatpandeyme/DialoGPT-small-ParthivBot
1396
+ seeksery/DialoGPT-calig
1397
+ akshatpandeyme/DialoGPT-small-AnyaBot
1398
+ Jordine/shitter
1399
+ model-attribution-challenge/DialoGPT-large
1400
+ seeksery/DialoGPT-calig2
1401
+ obl1t/DialoGPT-medium-Jotaro
1402
+ trickstters/DialoGPT-small-evanbot
1403
+ trickstters/evanbot-gpt
1404
+ AriakimTaiyo/gpt2-chat
1405
+ Yank2901/DialoGPT-small-Harry
1406
+ lizz27/DialoGPT-small-baymax
1407
+ obl1t/DialoGPT-medium-Jolyne
1408
+ seeksery/DialoGPT-calig3
1409
+ Jenwvwmabskvwh/DialoGPT-small-josh445
1410
+ trickstters/evbot2
1411
+ Jenwvwmabskvwh/DialoGPT-small-josh450
1412
+ lizz27/DialoGPT-medium-BaymaxBot
1413
+ soop/DialoGPT-medium-BaymaxBot
1414
+ abelblue3/DialoGPT-medium-baymax
1415
+ priyankac/DialoGPT-medium-BaymaxBot
1416
+ Ironpanther1/Testing
1417
+ tosin/dialogpt_afriwoz_pidgin
1418
+ Anon25/DialoGPT-Medium-BaymaxBot
1419
+ GoldenRedstone/DialoGPT-medium-Phoenix-Wright
1420
+ Primobot/DialoGPT-small-harrypotter
1421
+ Lyem/LyemBotv1
1422
+ JamesSantosxx/DialoGPT-small-harrypotter
1423
+ Lyem/LyemBotv2
1424
+ Ironpanther1/ArtoriaBot
1425
+ Swervin7s/DialoGPT-medium-anakin
1426
+ DogH2O/DialoGPT-small-naruto
1427
+ NoPeanuts/DialoGPT-small-po
1428
+ Gravitygaming/homerai
1429
+ Lyem/LyemBotv3
1430
+ celine45688/LuTing
1431
+ antwortemir/shouko04
1432
+ SebastianS/MetalSebastian
1433
+ notaproblem00/DialoGPT-small-bakugou
1434
+ myodoctor/DIALOGPT-medium-HarryPotterBot
1435
+ aniketface/DialoGPT-medium-elon
1436
+ noiseBase/DialoGPT-small-HarryPotter
1437
+ karan21/DialoGPT-medium-rickandmorty
1438
+ karan21/DialoGPT-medium-guin
1439
+ Sophiejs/DialoGPT-small-BlaineBot
1440
+ skouras/DialoGPT-small-swda
1441
+ skouras/DialoGPT-small-maptask
1442
+ TheodoreAinsley/LindaGold
1443
+ AlbedoAI/DialoGPT-large-Albedo
1444
+ AlbedoAI/DialoGPT-large-Albedo2
1445
+ willmay/DialoGPT-medium-will
1446
+ AlbedoAI/DialoGPT-medium-Albedo
1447
+ chulainn/DialoGPT-medium-Zuko
1448
+ ctoner2653/DialoGPT-medium-RickBoty
1449
+ Number4/DialoGPT-medium-harrypotter
1450
+ yummyhat/DialoGPT-small-spike
1451
+ EllyPony/flutterbot
1452
+ Suryansh-23/DialoGPT-small-MichaelScottOffice
1453
+ Cirilaron/DialoGPT-medium-vergil
1454
+ Izuuk/izuuk
1455
+ shungyan/Diablo-small-harrypotter
1456
+ bhavyasharma/DialoGPT-small-harrypotter
1457
+ nintwentydo/rickbot
1458
+ tylersfoot/DialoGPT-medium-rick
1459
+ EJoftheVern/DialoGPT-medium-shaggy
1460
+ xtraXpert/DialoGPT-small-RickAndMorty2
1461
+ ANIKEThash/DialoGPT-medium-character
1462
+ Noonw/DialoGPT-small-hijackersexurmom
1463
+ fat32man/elon_answers
1464
+ MinhP/DialoGPT-small-themis
1465
+ Noonw/DialoGPT-small-osamaflyplane
1466
+ Noonw/DialoGPT-small-ladenflyplane
1467
+ Noonw/DialoGPT-small-ladenonjet
1468
+ MinhP/DialoGPT-small-franco
1469
+ Karan59/DialoGPT-small-evaModel
1470
+ marblyso/DialoGPT-medium-marblesbagel
1471
+ Jojo17/DialoGPT-small-RickAndMorty
1472
+ deseipel/medium-LucyClarke_
1473
+ DiscordBackup/model0000
1474
+ SirSpiffy/IvanModel
1475
+ woodmtaylor/DialoGPT-small-Heej
1476
+ woodmtaylor/DialoGPT-medium-Heej
1477
+ OctaviusI/marisaV0
1478
+ ChloeMJM/DialoGPT-small-rick
1479
+ JDesignEra/DialoGPT-small-Anya
1480
+ MrE/DialoGPT-medium-SARGER4
1481
+ aarya-c111/DialoGPT-small-Rogers
1482
+ bozlucas/DialoGPT-medium-HermioneBot
1483
+ LasseVKP/DialoGPT-Mogens
1484
+ metaloopa/DialoGPT-medium-Rintaro
1485
+ ingen51/DialoGPT-medium-GPT4
1486
+ Divyesh/DialoGPT-medium-harrypotter
1487
+ Natsuki-Chan/DialoGPT-medium-luz
1488
+ akira2001/DialoGPT-medium-harrypotter
1489
+ osueng02/DialoGPT-small-STAN_BOT
1490
+ osueng02/DialoGPT-medium-STAN_BOT
1491
+ wormed/DialoGPT-small-denai
1492
+ RehanP123/DialoGPT-medium-kermit.old
1493
+ Nakul24/SM_Bot
1494
+ chulainn/DialoGPT-medium-Ragnar
1495
+ aniketface/DialoGPT-product
1496
+ shohanursobuj/DialoGPT
1497
+ marblyso/DialoGPT-medium-hero
1498
+ marblyso/DialoGPT-medium-kel
1499
+ marblyso/DialoGPT-medium-aubrey
1500
+ akil191/small-test-harryakakakaka
1501
+ sanpellegrino/CoryBot
1502
+ Arqhero/DialoGPT-small-adventuretime
1503
+ chulainn/DialoGPT-medium-Tyrion
1504
+ VTG/MentalHealthChatbotv1
1505
+ luminolblue/HomunculusGPT-testbot
1506
+ Paulina354/DialoGPT-small-rickandmorty
1507
+ khuranagarvit019/MentalHealthChatbot
1508
+ VirtualizedTrash/Chatbot
1509
+ pedrocaribe/DialoGPT-medium-LL
1510
+ queenaccila/DialoGPT-small-kashiwagi
1511
+ GarfExit/DialogGPT-medium-707
1512
+ marblyso/DialoGPT-medium-shepherd
1513
+ Spectre29/DialoGPT-small-Kaisa
1514
+ Spectre29/Kaisa-converse-model
1515
+ ZedTheUndead/Rick_fragment
1516
+ marblyso/DialoGPT-medium-mari
1517
+ Delicious/DialoGPT-small-harrypotter
1518
+ BBHKR/DialoGPT-small-jacksparrow
1519
+ Guwon/DialoGPT-small-Quincy
1520
+ epeicher/DialoGPT-small-homer-2
1521
+ timmychanga/DialoGPT-small-ashley
1522
+ mywateriswet/ShuanBot
1523
+ epeicher/DialoGPT-small-flanders
1524
+ Super-McTea/DialoGPT-small-McTea
1525
+ Eronzin/meuBotzindoEron
1526
+ Techdra/DialoGPT-large-theboy
1527
+ Eronzin/DialoGPT-small-Frodo
1528
+ gtgillott/gib
1529
+ AwesomeDWNJ/EmiBot
1530
+ CJ3/DialoGPT-medium-amber3
1531
+ GamerMan02/DialoGPT-medium-gamerbot2
1532
+ GamerMan02/DialoGPT-medium-gamerbot1
1533
+ Insomnic/DialoGPT-small-harrypotter
1534
+ Super-McTea/DialoGPT-small-McTeaV2
1535
+ FelipeJoazeiro/chatbot-morty
1536
+ microsoft/GODEL-v1_1-base-seq2seq
1537
+ microsoft/GODEL-v1_1-large-seq2seq
1538
+ Rencist/DialoGPT-small-rick
1539
+ scorpiofrens/DialoGPT-medium-ergon
1540
+ somemusicnerdwoops/DialoGPT-small-shadow
1541
+ powchang/DialoGPT2-medium-CAiFE
1542
+ ratneshrt/DialoGPT-small-Artico
1543
+ somemusicnerdwoops/DialoGPT-distilgpt2-sonicfandub
1544
+ Tsec-Research/DialoGPT-chandler-penny
1545
+ neonon/DialoGPT-medium-cloy
1546
+ ddae208s/DialoGPT-small-dimitri
1547
+ mossfarmer/VRANAK
1548
+ Matax/Aristrathor3000
1549
+ brownanchovy/Harry
1550
+ Overlrd/DialoGPT-small-cartman
1551
+ epeicher/DialoGPT-large-homer
1552
+ comradesocrates/DialoGPT-medium-stranger
1553
+ Rakublu/DialoGPT-small-yasuo
1554
+ neonon/DialoGPT-medium-htccc
1555
+ Alt41r/gpt-simpson
1556
+ Nimit-Jjw/DialoGPT-chandler-penny
1557
+ Quoc123/DialoGPT-small-AQUA
1558
+ marblyso/DialoGPT-medium-pearl
1559
+ estus2/rick-superu-rick2
1560
+ marblyso/DialoGPT-medium-marina
1561
+ rovenmusic/DialoGPT-small-melodybot
1562
+ deseipel/small-LucyClarke_
1563
+ rovenmusic/DialoGPT-small-melodybotv2
1564
+ rovenmusic/DialoGPT-small-melodybotv3
1565
+ epeicher/DialoGPT-medium-homer
1566
+ andrewkroening/GalaxyFarAway-DialoGPT-HanSolo
1567
+ nams/nams-bot
1568
+ Nicktherat/DialoGPT-medium-endella
1569
+ alfirsaafauzulh/DialoGPT-small-KamuiBastion
1570
+ rovenmusic/DialoGPT-small-melodyv10
1571
+ somesh212/Harry_Potter-BOT
1572
+ somesh212/Harry_Potter_botDialoGPT_Som2
1573
+ jmagine/DialoGPT-small-metahead
1574
+ somesh212/Harry_Potter_botDialoGPT_Som3
1575
+ rovenmusic/DialoGPT-small-melodyvfinal
1576
+ jmagine/DialoGPT-small-jmagine
1577
+ jmagine/DialoGPT-small-funded
1578
+ jmagine/DialoGPT-small-jimj
1579
+ andrewkroening/GalaxyFarAway-DialoGPT-LukeSkywalker
1580
+ andrewkroening/GalaxyFarAway-DialoGPT-Threepio
1581
+ andrewkroening/GalaxyFarAway-DialoGPT-Vader
1582
+ andrewkroening/GalaxyFarAway-DialoGPT-LeiaOrgana
1583
+ andrewkroening/GalaxyFarAway-DialoGPT-Yoda
1584
+ Wizardd/DialoGPT-small-sheldon
1585
+ BenKJH/DialoGPT-small-lucybotasg
1586
+ Ananjas/AwooAI
1587
+ Ananjas/AwooV2
1588
+ kookyklavicle/gpt-sean-diaz
1589
+ kookyklavicle/SeanDiazBot2
1590
+ Ananjas/AwooV3
1591
+ Overlrd/DialoGPT-medium-cartman
1592
+ Ananjas/AwooV6
1593
+ mathecas/HarryPotterBotAI
1594
+ Karina256/DialoGPT-small-dory
1595
+ Tony8657/DialoGPT-small-TonyStarkBot
1596
+ SebastianS/my_mim
1597
+ TFS668/DialoGPT-small-Rick
1598
+ redhoff/DialoGPT-Medium-RedBot
1599
+ FeriVOQ/DialoGPT-small-joshua
1600
+ Triobloid/DialoGPT-small-lianaharrypotter
1601
+ quinnzie/DialoGPT-small-sinister
1602
+ FarziBuilder/DialoGPT-medium-harrypotter
1603
+ sohampatil/DialoGPT-small-mentalchatbot
1604
+ gtkarber/DialoGPT-medium-columbo
1605
+ PaddlePaddle/plato-mini
1606
+ Junkan/DialoGPT-medium-Bilbo
1607
+ ThatSkyFox/DialoGPT-medium-whatsapp
1608
+ Ar4ikov/DialogAgentGPT2
1609
+ reallygoodtechdeals/Bingocat-ai-Dialo-GPT-medium
1610
+ thmauler/crashed
1611
+ OptionaI/DialoGPT-small-beepboopy
1612
+ davebathhews/DialoGPT-OTIS
1613
+ GGOM/SipBotGGOM
1614
+ davebathhews/DialoGPT-OTISBOT
1615
+ GGOM/WillBotGGOM
1616
+ GGOM/ElyasBotGGOM
1617
+ reallygoodtechdeals/steve-ai-Dialo-GPT-medium
1618
+ Crushtoe/DialoGPT-small-vangluss
1619
+ apotempest/DialoGPT-medium-geralt
1620
+ DiogoSabec/DialoGPT-small-joshua
1621
+ WaleedArif/DialoGPT-small-Micheal
1622
+ Crushtoe/DialoGPT-medium-vangluss
1623
+ Crushtoe/GODEL-v1_1-base-seq2seq-vangluss
1624
+ DiogoSabec/BOT
1625
+ Le033/DialoGPT-small-rickmorty
1626
+ Filosofas/DialoGPT-medium-PALPATINE2
1627
+ JadansTower/jobot
1628
+ NTMNathan/DialoGPT-small-harrypotter
1629
+ Ashypaws/DialoGPT-medium-Ashybot
1630
+ wmdosborne/DialoGPT-medium-kyritebot
1631
+ worms3402/DialoGPT-small-automata2
1632
+ Pi3141/DialoGPT-small-elon
1633
+ Grendar/Dialo-GPT-medium-shiro
1634
+ Pi3141/DialoGPT-medium-elon
1635
+ Pi3141/DialoGPT-medium-elon-2
1636
+ JoshuaPawlik/DialoGPT-medium-joshua
1637
+ Pi3141/DialoGPT-medium-elon-3
1638
+ josephthen3320/DialoGPT-small-walter
1639
+ robbiegwald/Rick
1640
+ Gurtej/Drbot
1641
+ Hereward/DialoGPT_medium_ObiWan_Kenobi
1642
+ Giu888/DialoGPT-small-sao
1643
+ Grendar/blenderbot-400M-distill-Shiro
1644
+ keeg8/Book-0-1500
1645
+ keeg8/Book-1500-1700
1646
+ keeg8/Book-1850-1900
1647
+ keeg8/Book-1700-1850
1648
+ karlreimond/DialoGPT-small-harrypotter
1649
+ lenartlola/SpongeBob
1650
+ lenartlola/rick-bot
1651
+ Deedlit/DialoGPT-small-southpark
1652
+ babylasagne/DialoGPT-small-narryuto
1653
+ babylasagne/DialoGPT-small-harry
1654
+ babylasagne/DialoGPT-small-spider
1655
+ babylasagne/DialoGPT-small-batman
1656
+ BradHeffernan/rickNmortyModel
1657
+ UmUDev/DialoGPT-medium-AlexVN
1658
+ ukikunz/gas-kenji-medium
1659
+ ukikunz/gas-kenji
1660
+ Isokeel/DialoGPT-medium-KMbot
1661
+ KakoSi/AcciGPT-smol
1662
+ Spoofed/DiabloGPT-small-peter
1663
+ sophiadt/DialoGPT-medium-707
1664
+ UmUDev/DialoGPT-medium-Alex
1665
+ PygmalionAI/pygmalion-350m
1666
+ sophiadt/DialoGPT-medium-reigen
1667
+ rexfi/DialoGPT-small-peter
1668
+ rexfi/NafezBot-DialoGPT
1669
+ caps1994/chris-bot
1670
+ rexfi/RickyBot
1671
+ allenai/cosmo-xl
1672
+ woodmtaylor/DialoGPT-large-Dumpling
1673
+ rexfi/MikeScottBot
1674
+ apfallinus/RickBot
1675
+ apfallinus/HarryBot
1676
+ apfallinus/MedBot
1677
+ apfallinus/AeonaBot
1678
+ apfallinus/BatmanBot
1679
+ apfallinus/AiBot
1680
+ LostXOR/TotallyNotARobot
1681
+ gachaddict/DialoGPT-medium-ike
1682
+ OctaviusI/staging
1683
+ PygmalionAI/pygmalion-1.3b
1684
+ Terrymir/DialoGPT-medium-Soraka
1685
+ SantiPingui58/DialoGPT-small-hika
1686
+ ss1612/montana-chat
1687
+ MrEmpty/DialoGPT-small-rickandmorty
1688
+ shikiskhakis/DialoGPT-small-blackdoom
1689
+ alexandreteles/GPTChizuru
1690
+ Chae/scottbot_med
1691
+ AhmedMostafa/DialoGPT-small-Rick
1692
+ metkoon/30dollarceo
1693
+ Dinocroth/DialoGPT-medium-Trevor-PhilipsV2
1694
+ metkoon/MatBot
1695
+ SmallQ/DialoGPT-small-Anya
1696
+ bigbossa/aiko6
1697
+ GK123/DialoGPT-medium-hanbot
1698
+ TheHappyDrone/DialoGPT-medium-salesman
1699
+ Pcik/DialoGPT-medium-Jaiden
1700
+ TheHappyDrone/DialoGPT-medium-Nexus-Nova
1701
+ Pcik/DialoGPT-medium-Dante
1702
+ AlmightyDeathCheater/DialoGPT-medium-harrypotter
1703
+ Pcik/DialoGPT-medium-Kirby
1704
+ Starry/COUNTNARC
1705
+ TheHappyDrone/DialoGPT-medium-Nexus-Nova-turing-v2
1706
+ wetwoteraq/DialoGPT-medium-aqua
1707
+ wetwoteraq/DialoGPT-small-peter
1708
+ wetwoteraq/DialoGPT-medium-peter
1709
+ lilexo2/DialoGPT-medium-Monica
1710
+ momo10/DialoGPT-small-harryPotter
1711
+ Antale123/ConorBot
1712
+ shikiskhakis/DialoGPT-small-xemnas
1713
+ Ecook/DialoGPT-medium-Ecook
1714
+ PygmalionAI/pygmalion-2.7b
1715
+ FowlerF/DiscordChatBot
1716
+ JoeRoganfan-69420/DialoGPT-medium-HarryPotterbot
1717
+ dusty310/DialoGPT-medium-Misaki
1718
+ Gurtej/Drbot2
1719
+ Gurtej/Drbot3
1720
+ Gurtej/Drbot4
1721
+ Gurtej/Drbot5
1722
+ Gurtej/Drbot6
1723
+ Gurtej/Drbot7
1724
+ Gurtej/Drbot8
1725
+ Gurtej/Drbot9
1726
+ PygmalionAI/pygmalion-6b
1727
+ Gurtej/Drbot11
1728
+ navygup/Mood-Tracker
1729
+ Maraslumunnus/DialoGPT-small-ivern
1730
+ DAS9051/BatemanChatBot
1731
+ SmallQLALA/DialoGPT-small-Anya
1732
+ RinkaDev/GPT-Peppa-Pig
1733
+ thu-coai/blenderbot-1B-augesc
1734
+ siyaT/DialoGPT-harrypotter-small
1735
+ keircare/DialoGPT-small-RickSanchez
1736
+ shiiiroe/DialoGPT-medium-kirito
1737
+ jdakillah/Rick
1738
+ kielljoy/DialoGPT-small-stupidspecialkay
1739
+ Ashypaws/DialoGPT-medium-Kitaibot
1740
+ jdakillah/RICK-V2
1741
+ jdakillah/Bender
1742
+ jdakillah/Generalbot
1743
+ kielljoy/DialoGPT-medium-ryanbot
1744
+ emre/spanish-dialoGPT
1745
+ vuminhtue/DialoGPT-large-HarryPotter3
1746
+ ralphsorz/DialoGPT-small-samwise
1747
+ SumYin/DialoGPT-small-Homer
1748
+ JamesRoy/DGPT-DC
1749
+ Blizzchor/DialoGPT-medium-HarryBotter
1750
+ gjhghjk/rick
1751
+ gjhghjk/rick2
1752
+ SumYin/ZeroTwo-Medium-DialoGPT
1753
+ Blizzchor/DialoGPT-medium-gamora
1754
+ Mydia2/DialoGPT-small-Flonnealive
1755
+ AL-CT/DialoGPT-small-slayer
1756
+ DhruvShek/Webraft-Ai
1757
+ arno2077/DiabloGPT-small-harrypotter
1758
+ keyonecs/fourept-debique-gpt
1759
+ Blizzchor/DialoGPT-medium-QuillLord
1760
+ callmeclover/Stinger-CONVRS_MODL
1761
+ aminFelah/DialogueGPT-very-small-harryPotter
1762
+ Keijuro/aeris-dialogpt
1763
+ Abdelrahman853/DialoGPT-small-echo
1764
+ Bearfoot/DialoGPT-medium-shrek
1765
+ arthme2/jay
1766
+ arthme2/DialoGPT-medium-Jay
1767
+ 42meow/DialoGPT-medium-42meow
1768
+ Peeepy/Evie
1769
+ HorniFolks/Unicorn
1770
+ waifu-workshop/pygmalion-6b
1771
+ agenttylostudios/DialoGPT-small-Bocchi
1772
+ GregariousJamie/DialoGPT-small-jamie
1773
+ Fuwaguwa/DialoGPT-Medium-AzurLaneMusashi-v8
1774
+ s3nh/DialoGPT-large-Rick
1775
+ s3nh/DialoGPT-large-Morty
1776
+ s3nh/DialoGPT-small-morty
1777
+ Givinghawk/GPT-Morty
1778
+ DhruvShek/swearbot
1779
+ grart/DialoGPT-small-gillion
1780
+ interpixle/Sir_Caladan
1781
+ s3nh/DialoGPT-tony-montana
1782
+ s3nh/DialoGPT-small-harry-potter-goblet-of-fire
1783
+ s3nh/DialoGPT-small-hermione-granger-goblet-of-fire
1784
+ s3nh/DialoGPT-small-woody-toy-story
1785
+ s3nh/DialoGPT-small-buzz-toy-story
1786
+ puj0/DialoGPT-small-joshua
1787
+ julianvd49/DialoGPT-medium-EllieBot
1788
+ Sreyas/DialoGPT-small-elit
1789
+ DiscordRequestsAPI/DialoGPT-medium-NurDeeps
1790
+ MarinHinawa/DialoGPT-medium-Ene
1791
+ polandball/polanball
1792
+ whoami24142/DialoGPT-small-padilha
1793
+ DiscordRequestsAPI/NurDeeps-Bot
1794
+ Vaibhav-rm/GPT2-Shri-v1
1795
+ chrisrowles/DialoGPT-small-chrisrowles
1796
+ espeon98/DialoGPT-kenny-bot
1797
+ espeon98/DialoGPT-kenny-bot-2
1798
+ polandball/GPT-Polen
1799
+ chrisrowles/DialoGPT-medium-chrisrowles
1800
+ DiscordRequestsAPI/NurDeeps-Bot-2
1801
+ steerevo88/DialoGPT-small-baiken
1802
+ akiFQC/japanese-dialogpt-small-aozora
1803
+ Ngao/DialoGPT-small-ngao
1804
+ Mineroero/DialoGPT-medium-M4SOPMOD
1805
+ simple2312/DialoGPT-nayeon
1806
+ nemowet88/DialoGPT-small-ricktest
1807
+ Abraxas3d/house
1808
+ vampiregirl/DialoGPT-medium-lennoxram
1809
+ aisingapore/coherence-momentum
1810
+ simple2312/DialoGPT-Ellie
1811
+ simple2312/DialoGPT-Twice
1812
+ testaws/DialoGPT-small-joshua
1813
+ nemowet88/output-pythia-test
1814
+ Gurtej/Drbot12
1815
+ Gurtej/Drbot13
1816
+ Gurtej/Drbot14
1817
+ Gurtej/Drbot16
1818
+ EZSNoVa/DialogGPT-medium-NoVa
1819
+ mattallio/Archivist-medium-dialoGPT
1820
+ rlatt/DialoGPT-small-RickSanchez
1821
+ Lyforth/DialoGPT-Medium-Maribelle
1822
+ kittenwhiperer/Deadpool
1823
+ KumquatJoe/DialoGPT-medium-MaleToucherBot
1824
+ lmkhoa/GODEL_base_model
1825
+ JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023
1826
+ LrxLcs/DialogGPT2-SMAL
1827
+ Delcos/internal_chat_model_e2
1828
+ euvu/DialoGPT-small-harrypotter
1829
+ LrxLcs/GPT2-V2
1830
+ LrxLcs/GPT2-Test
1831
+ euvu/euvu-rickbot
1832
+ Weeeeeeeeeeeee00/DialoGPT-small-harrypotter
1833
+ slyslasher24/DialoGPT-Medium-Pondweed
1834
+ slyslasher24/DialoGPT-Small-Pondweed
1835
+ bradydawg/AI-Bot2
1836
+ aisingapore/rumour-detection-twitter
1837
+ RatInChat/Pilup7575
1838
+ rlatt/DialoGPT-large-RickSanchez
1839
+ Kira225784/Klarabot-test
1840
+ bigbossa/DialoGPT-small-aikogirl
1841
+ sckova/DialoGPT-small-joshua
1842
+ sckova/DialoGPT-medium-joshua
1843
+ sckova/DialoGPT-medium
1844
+ Beltenebros/DialoGPT-small-PerionOfGaul
1845
+ Byteno/DialoGPT-medium-glamrockfreddy
1846
+ audreycl/audreycl-testagain
1847
+ aisingapore/Lif3WayAp
1848
+ audreycl/DialoGPT-RoyalPurpleFish
1849
+ audreycl/DialoGPT-RPF
1850
+ Axelajs26/DialoGPT-small-alicetendou
1851
+ Noohance/DialoGPT-medium-noohbot
1852
+ Draptor/DialoGPT-small-coolco
1853
+ David042/DialoGPT-LucasBot
1854
+ Hobospider132/DialoGPT-Mahiru-Proto
1855
+ Draptor/DialoGPT-medium-moto
1856
+ aisingapore/SPANBert
1857
+ JYBX/DialoGPT-small-Penny
1858
+ JYBX/DialoGPT-small-Pennybot
1859
+ aisingapore/RoBERTa-base
1860
+ JYBX/DialoGPT-small-Amybot
1861
+ LuckyBor11/Figure
1862
+ FlyingGrayson0304/Gandalf-stupid-version
1863
+ BlinksFly/Harry_Potter-Ai
1864
+ PhilipN/DialoGPT-small-KeqingBot
1865
+ YTTD/DialoGPT-medium-sou
1866
+ PhilipN/DialoGPT-large-KeqingBot
1867
+ YTTD/DialoGPT-medium-souv2
1868
+ keonju/chat_bot
1869
+ MysteriousAmazon/DialoGPT-medium-alastor
1870
+ mICHPl/MINI_AI
1871
+ rlatt/DialoGPT-large-King-James-Bible-test
1872
+ v3nom1704/DialoGPT-small-potterbot
1873
+ Techcs002/DialoGPT-medium-AboTalkTest
1874
+ MysteriousAmazon/DialoGPT-medium-freddy
1875
+ ICAMPB204/DialoGPT-small-HarryPotter
1876
+ kelvinhang/DialoGPT-medium-badguy
1877
+ tatsumis6/MonikaAI
1878
+ kennethhendricks/DialoGPT-medium-PowPowGaming-Gen1
1879
+ rlatt/DialoGPT-large-King-James-Bible-test-accurate
1880
+ kennethhendricks/DialoGPT-medium-PowPowGaming
1881
+ kelvinhang/DialoGPT-medium-badguy2
1882
+ zami0011/qqpbksdj
1883
+ vladiyudi/Morty-data
1884
+ RazaK18/DialoGPT-small-harrypotter
1885
+ comradesocrates/DialoGPT-large-io
1886
+ kelvinhang/DialoGPT-medium-okakoro
1887
+ Monchic/chatwithkani
1888
+ zami0011/rickdick
1889
+ CallMeJeremy/DialoGPT-medium-THREEPIO
1890
+ Leomas/DialoGPT-medium-Leomas
1891
+ RehanP123/DialoGPT-large-kermit
1892
+ shahules786/Safetybot-T5-base
1893
+ huolongguo10/CDial-GPT2-LCCC-Base-copy
1894
+ yashR4J/TyrionBOT
1895
+ TakoIsATaco/DialoGPT-small-ShinAI
1896
+ MrLamBam/DialoGPT-medium-LUKEBot
1897
+ Zeda/DialoGPT-Medium-ZedaBot
1898
+ princedream/DialoGPT-small-harrypotter
1899
+ shahules786/Safetybot-mt5-base
1900
+ xiaomengdotcom/Chatgpt-harryP
1901
+ ProtonPLUS/Colab
1902
+ YTTD/DialoGPT-medium-saf
1903
+ jasondubon/HubermanGPT-small-v1
1904
+ YTTD/DialoGPT-medium-safv2
1905
+ YTTD/DialoGPT-medium-safv3
1906
+ kennethhendricks/DialoGPT-medium-jared-hendricks-gen1
1907
+ Cohee/pygmalion-6b-pyggyback-v6_40_v8p4_60
1908
+ DiogenesGois/DialoGPT-medium-Rick
1909
+ LordDanielDE/DialoGPT-medium-Hina
1910
+ ITG/DialoGPT-medium-spanish-chitchat
1911
+ kemsa51/DialoGPT-medium-cartman
1912
+ Mogwhy/DialoGPT-medium-Arrobot
1913
+ nRuaif/Pyg6B-V8P2
1914
+ Seer-luma/DialoGPT-small-SeerBot
1915
+ Dinoloverwii/DialoGPT-Sachibot
1916
+ flayeddie/Mike
1917
+ wooldover/krautbot
1918
+ kielljoy/DialoGPT-small-k
1919
+ WAHCLAN/DialoGPT-Medium-DAN
1920
+ ss1612/loki-chat
1921
+ IceBruhOne/mytestcharacter
1922
+ wooldover/pygbot
1923
+ IceBruhOne/DialoGPT-medium-subjectai
1924
+ YukioKoito/DialoGPT-small-ozua
1925
+ gaytrimoh/DialoGPT-small-harrypotter
1926
+ YukioKoito/DialoGPT-small-doog
1927
+ IceBruhOne/DialoGPT-medium-subjectai2
1928
+ custads23/DialoGPT-medium-aubrey
1929
+ HaHaMagpie/DialoGPT-small-phineas
1930
+ Carslo45/DialoGPT-medium-ddlc-monika
1931
+ zl111/ChatDoctor
1932
+ MarinHinawa/DialoGPT-medium-haruka
1933
+ custads23/DialoGPT-medium-basil
1934
+ IceBruhOne/DialoGPT-medium-complexai
1935
+ MarinHinawa/DialoGPT-medium-Shintaro
1936
+ jlsalty9999/DialoGPT-medium-Riddle
1937
+ custads23/DialoGPT-medium-mincy
1938
+ Wtfsquad/DialoGPT-small-pulpfictionVincent
1939
+ ss1612/erika-chatv4
1940
+ WAHCLAN/DialoGPT-Large-DAN
1941
+ Speedemon/jake-peralta-ai
1942
+ Speedemon/cobalt
1943
+ DeliveryBoy/DiabloGPT-medium-Kurisu
1944
+ AbbyRhea/DialoGPT-small-adrienbot
1945
+ monish162/kirthin-waifuu
1946
+ janna42/DialoGPT-small-phoenix
1947
+ AbbyRhea/DialoGPT-medium-AA
1948
+ FrozenSmoothie/DialoGPT-medium-star
1949
+ Fizi12341/astro_bot1234
1950
+ stiGGy/DialoGPT-medium-raymond
1951
+ patthebaker45/DialoGPT-small-Carlbot
1952
+ r4k4n1/DialoGPT-small-joshua
1953
+ Sukul/DialoGPT-small-Harsabot
1954
+ Sukul/DialoGPT-small-Harsabot1
1955
+ hihihotdog/DialoGPT-bot
1956
+ LarsJonasson/pythia-1.4b-deduped-sft-swedish
1957
+ mayaeary/pygmalion-6b-4bit-128g
1958
+ mayaeary/pygmalion-6b_dev-4bit-128g
1959
+ Inhaexpress/DialoGPT-medium-paimon
1960
+ sanyasna517/DialoGPT-medium-Zhongli
1961
+ StephenBrink/DialoGPT-small-will
1962
+ StanleyRoberts/Nix
1963
+ boudchicha/soluzione
1964
+ mayaeary/PPO_Pygway-V8p4_Dev-6b-4bit-128g
1965
+ ToborWinner/DialoGPT-medium-jolly
1966
+ mayaeary/PPO_Pygway-6b-Mix-4bit-128g
1967
+ ayushutkarsh/t3
1968
+ Inhaexpress/DialoGPT-medium-paimon2
1969
+ eepyblanky/DialoGPT-medium-malina
1970
+ eachadea/legacy-ggml-vicuna-13b-4bit
1971
+ eachadea/ggml-gpt4-x-alpaca-13b-native-4bit
1972
+ totallynotbrent/brotGPT
1973
+ Inhaexpress/DialoGPT-medium-harry_potter_ps
1974
+ robintan66/DialoGPT-small-harrypotter
1975
+ MajorCrayon7047/MadboneAssistantGPT-2
1976
+ VennuT/DialoGPT-medium-Alphinaud
1977
+ triple777/annicebot
1978
+ totallynotbrent/aaronGPTalpha
1979
+ Plaaasma/gerald-model
1980
+ yashugupta786/bart_large_xsum_samsum_conv_summarizer
1981
+ eachadea/legacy-ggml-vicuna-7b-4bit
1982
+ ColtonAi/Llmtrain
1983
+ ColtonAi/Chem4
1984
+ IchtacaKemeRaz/favabean
1985
+ Stromello/DialoGPT-medium-ZeroTwo
1986
+ totallynotbrent/brotGPTplus
1987
+ storminstakk/Stormin-Stakk
1988
+ ToddGoldfarb/Cadet-Tiny
1989
+ aghelan3/eggIncubationRepo
1990
+ hackathon-somos-nlp-2023/SalpiBloomZ_15949_input_1024-1b7
1991
+ JosephusCheung/Guanaco
1992
+ raymondho/DialoGPT-small-harry
1993
+ Capitalist/DialoGPT-small-rick
1994
+ gfgddfg/DialoGPT-small-qiu_chat
1995
+ eachadea/ggml-toolpaca-13b-4bit
1996
+ CNR223/DialoGPT-small-MasterO
1997
+ Abigaming75/Bot_wa
1998
+ pranitrai07/DialoGPT-medium-harrypotter
1999
+ IlyaGusev/saiga_7b_lora
2000
+ Ancestral/Dolly_Shygmalion-6b-4bit-128g
2001
+ Ancestral/PPO_Shygmalion-6b-4bit-128g
2002
+ wyskiski/winonabot
2003
+ hcpwr/DialoGPT-medium-samantha
2004
+ Roguwan/DialoGPT-medium-rogu
2005
+ totallynotbrent/aaronGPTplus
2006
+ Ancestral/Dolly_Malion-6b-4bit-128g
2007
+ vantozdad/DialoGPT-medium-Dumbledore
2008
+ Abyss-fyf/DialoGPT-small-discord
2009
+ CrystalzAura/DialoGPT-small-elysia
2010
+ eachadea/ggml-gpt4all-7b-4bit
2011
+ inu-ai/alpaca-guanaco-japanese-gpt-1b
2012
+ Husnul/pepper-bot-morty
2013
+ TheBloke/vicuna-13B-1.1-GPTQ
2014
+ CRD716/ggml-vicuna-1.1-quantized
2015
+ 4bit/pygmalion-6b-4bit-128g
2016
+ Reaver1092/DialoGPT-small-bones
2017
+ Ibnelaiq/Makise-Amadeus-Kurisu-small
2018
+ inu-ai/dolly-japanese-gpt-1b
2019
+ clawrex/DialoGPT-medium-walt
2020
+ IlyaGusev/saiga_13b_lora
2021
+ Zeda/DialoGPT-Large-ZedaBot
2022
+ Ibnelaiq/Makise-Amadeus-Kurisu
2023
+ Jaxon/DialoGPT-medium-kirito
2024
+ glitchie/bb
2025
+ Aqua002/DialoGPT-small-deadpool
2026
+ Aqua002/discord-chatbot
2027
+ lemoniada/Przembot
2028
+ Avitas8485/Dialogpt-small-v1
2029
+ Jprafol/DialoGPT-large-ARCHIBot
2030
+ Jprafol/DialoGPT-large-ARCHIBotV2
2031
+ spitfire4794/ben-ultra
2032
+ IlyaGusev/saiga_30b_lora
2033
+ NbAiLab/nb-gpt-j-6B-norpaca
2034
+ winglian/vicuna-self-reflect-13b
2035
+ 0x044/test-1
2036
+ 0x044/dgpt
2037
+ ss1612/erika-chatv6
2038
+ TestingCoder463632/DialoGPT-small-palpatine
2039
+ Blizzchor/DialoGPT-medium-BarryB
2040
+ sasha0552/pygmalion-6b-f16-ggml
2041
+ kavindu999/BetterEnglishGPT-v1
2042
+ kavindu999/BetterEnglishGPT-v2
2043
+ EnterNameBros/DialoGPT-small-FoxySan
2044
+ OrientalDude/DialoGPT-medium-GOKU
2045
+ Avitas8485/Dialogpt-medium-v1
2046
+ finex/pfe-mohamed-Harry
2047
+ Avitas8485/Dialogpt-medium-finetuned
2048
+ psyamk/DialoGPT-small-harrypotter
2049
+ Jamesonn/DialoGPT-small-jumin
2050
+ CNXT/CNXT
2051
+ Ilangraterol/Dataset_model
2052
+ IlyaGusev/saiga_30b_ggml
2053
+ Locutusque/gpt2-conversational-or-qa
2054
+ TrippingFollowing39/AMOGUS
2055
+ moomoomer/DialoGPT-medium-garfield
2056
+ PygmalionAI/pygmalion-7b
2057
+ Viperxyz/DialoGPT-small-Cartman
2058
+ Neko-Institute-of-Science/pygmalion-7b
2059
+ TehVenom/Pygmalion-7b-Merged-Safetensors
2060
+ BiaDd/DialoGPT-medium-Punko
2061
+ NewBreaker/chatglm-6b-int4
2062
+ TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors
2063
+ TehVenom/Pygmalion-7b-4bit-Q4_1-GGML
2064
+ userzyzz/piggySharded
2065
+ steinhaug/models-bck
2066
+ blueberrycheesecake/DialoGPT-small-misssophie
2067
+ Imablank/P1GM4L10N-7B-MERGED_WEIGHTS
2068
+ MrToast/idk
2069
+ SouroJ/DialoGPT-medium-Mordecai
2070
+ sasha0552/pygmalion-7b-bf16
2071
+ swajan/DialoGPT-small-Trail-1
2072
+ RobiKenobi/DialoGPT-medium-pete
2073
+ sasha0552/pygmalion-7b-f16-ggml
2074
+ sasha0552/pygmalion-7b-f16
2075
+ winglian/llama-adapter-13b
2076
+ MatLumber/Bisho
2077
+ iconical/MortyChatbotAI
2078
+ swajan/Trail-1
2079
+ swajan/Trail-2
2080
+ Misfit2/DialoGPT-large-Sonic
2081
+ ToddGoldfarb/Cadet-Medium
2082
+ ajpieroni/DiabloGPT-medium-medea
2083
+ AliiaR/DialoGPT-medium-empathetic-dialogues
2084
+ Chun121/ChocolaChat
2085
+ lemoniada/kicerobot
2086
+ Kazeyami-o7/DialoGPT-medium-beterbiffin
2087
+ Elucia/Diluc_Bot
2088
+ Elucia/Diluc_Bot_1.1
2089
+ Elucia/Diluc_Bot_1.2
2090
+ neurofumo/DialoGPT-small-joshua
2091
+ Elucia/Diluc_Bot_1.3
2092
+ GraphicStylz/Stylz
2093
+ naybiblu/ChizuruBot
2094
+ calvindoingstuff/DialoGPT-medium-luffy
2095
+ xZephy/DialoGPT-small-HelperBot
2096
+ crazywombat/DialoGPT-small-abandonware
2097
+ anshengli2/DialoGPT-small-counter-hate
2098
+ sephwalker3/piggy-7b
2099
+ apricxty/DialoGPT-small-chatbot
2100
+ leadmaister/langchain-prompt-master
2101
+ Covriar/DialoGPT-med-kiryu
2102
+ yesuns/DialoGPT-small-yesun
2103
+ davidviriato/DialoGPT-small-joshua
2104
+ VMware/open-llama-0.3T-7B-open-instruct-v1.1
2105
+ prabhguron/DialoGPT-small-harrypotter
2106
+ xHexyy/small-test
2107
+ malteos/bloom-6b4-clp-german-oasst-v0.1
2108
+ Pcik/DialoGPT-medium-Ruby
2109
+ sasha0552/pygmalion-7b-q4_0-ggml
2110
+ sasha0552/pygmalion-7b-q4_1-ggml
2111
+ sasha0552/pygmalion-7b-q5_0-ggml
2112
+ sasha0552/pygmalion-7b-q5_1-ggml
2113
+ sasha0552/pygmalion-7b-q8_0-ggml
2114
+ rjorg543/DialoGPT-small-ben
2115
+ eachadea/ggml-gpt4-x-vicuna-13b
2116
+ Tlethal/DialoGPT-small-harrypotter
2117
+ xHexyy/test2
2118
+ xHexyy/test3
2119
+ ldilov/stablelm-tuned-alpha-7b-4bit-128g-descact-sym-true-sequential
2120
+ AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token
2121
+ jun-ai/BeethovenBot
2122
+ channashi/DialoGPT-small-rocket
2123
+ biscuitbutb/biscuitbot-dialogpt-model
2124
+ ytrbqrkflbvbhy/DialoGPT-small-me-rus
2125
+ Pruz0/VescGPT
2126
+ IlyaGusev/saiga_7b_ggml
2127
+ IlyaGusev/saiga_13b_ggml
2128
+ TechTay/DialoGPT-small-Luciano
2129
+ BlackBull/yeet
2130
+ WAHCLAN/DialoGPT-Medium-SAM
2131
+ MistyIce/dialog-gpt-Heshan
2132
+ Pruz0/LennGPT
2133
+ Wanfq/MAKER-mwoz-full-kb-t5-base
2134
+ Wanfq/MAKER-mwoz-full-kb-t5-large
2135
+ Wanfq/MAKER-smd-condensed-kb-t5-base
2136
+ Wanfq/MAKER-smd-condensed-kb-t5-large
2137
+ Wanfq/MAKER-camrest-condensed-kb-t5-base
2138
+ Wanfq/MAKER-camrest-condensed-kb-t5-large
2139
+ Wanfq/MAKER-camrest-full-kb-t5-base
2140
+ Wanfq/MAKER-camrest-full-kb-t5-large
2141
+ Wanfq/MAKER-mwoz-condensed-kb-t5-base
2142
+ Wanfq/MAKER-mwoz-condensed-kb-t5-large
2143
+ raphaman/test
2144
+ Pruz0/HaLLGPT
2145
+ Binaryy/blender-bot-distill-finetuned
2146
+ alex297/DialoGPT-small-sparky
2147
+ Pruz0/GeoGPT
2148
+ Pruz0/PruzGPT
2149
+ dorkai/pygmalion-2.7b
2150
+ ikocx-to24/DialoGPT-medium-plankton
2151
+ th3d4nk/llamaModel1
2152
+ PygmalionAI/pygmalion-13b
2153
+ TehVenom/Pygmalion-13b-Merged
2154
+ ivaan01/TFG-Mauri
2155
+ alex297/DialoGPT-medium-fox
2156
+ Crataco/Pygmalion-1.3B-GGML
2157
+ SaintMcMuffins/DialoGPT-small-brain2.0
2158
+ dujade18/DialoGPT-medium-dwightoffice
2159
+ TehVenom/Pygmalion-13b-8bit-GPTQ
2160
+ helloerikaaa/chandlerGPT
2161
+ SaintMcMuffins/Brain2.1
2162
+ kb2c37g/DialoGPT-small-Rick
2163
+ alex297/DialoGPT-small-fox
2164
+ TeraSpace/dialofrednocontext
2165
+ EnterNameBros/DialoGPT-small-Senko
2166
+ EnterNameBros/DialoGPT-small-Senko-san
2167
+ 4bit/pyg-7b
2168
+ EnterNameBros/DialoGPT-small-Senko-san-ver
2169
+ Lumiras/rachbot
2170
+ kevintest1234/DialoGPT-small-harrypotter
2171
+ EnterNameBros/DialoGPT-small-Senko-san-ver-2
2172
+ EnterNameBros/DialoGPT-large-Senko-san-ver-2
2173
+ Delmarfish/Delmar
2174
+ diankymar/kitty
2175
+ TatonkaHF/ruDialoGpt3-medium-finetuned-russian-joke
2176
+ EggsInAJar/DialoGPT-small-MerrickBot
2177
+ DBoi/Mayreel2
2178
+ hosst/FridgeLLM
2179
+ loitran/DialoGPT-medium-peppapig
2180
+ Syamil/DialoGPT-small-pixal
2181
+ Avitas8485/Dialogpt-medium-v2
2182
+ Inhaexpress/DialoGPT-medium-harrypotter
2183
+ loitran/DialoGPT-medium-HarryPotter
2184
+ Syamil/DialoGPT-medium-pixal
2185
+ roykim/ko_chat
2186
+ Syamil/DialoGPT-medium-pixals
2187
+ minhcrafters/DialoGPT-small-Fukuya
2188
+ Warren00/DialoGPT-Med-peppa05a
2189
+ Syamil/DialoGPT-medium-pixalbot
2190
+ LelouchH/DiabloGPT-small-RaidenBot
2191
+ Inhaexpress/DialoGPT-medium-shrek124
2192
+ Inhaexpress/DialoGPT-medium-terra1
2193
+ nascar123/Discordtester000
2194
+ EnterNameBros/Offical-Senko-medium-update
2195
+ EnterNameBros/Offical-Senko-medium-update-2
2196
+ EnterNameBros/Offical-Senko-medium-update-3
2197
+ EnterNameBros/Senko-medium
2198
+ jiezhou1996/test
2199
+ ElMater06/SpaceCore
2200
+ EnterNameBros/Offical-Senko-medium
2201
+ EnterNameBros/Senko-san
2202
+ DBoi/Mayreel
2203
+ VMware/open-llama-0.7T-7B-open-instruct-v1.1
2204
+ Warren00/DialoGPT-Small-Peppa06_053123
2205
+ mpalacio/DialoGPT_ootwl
2206
+ protag07/DialoGPT-small-harrypotter
2207
+ h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2
2208
+ cosimoiaia/Loquace-70m
2209
+ cosimoiaia/Loquace-410m
2210
+ MareNoceda/DialoGPT-medium-Luz
2211
+ GarrisonBot/DialoGPT-medium-herbertgarrison
2212
+ cosimoiaia/Loquace-12B
2213
+ cosimoiaia/Loquace-7B
2214
+ Deojoandco/ahGPT-small-v1
2215
+ PeachHeles/bmo
2216
+ Rays236/DialoGPT-small-harrypotter
2217
+ Deojoandco/ahGPT-small-v2
2218
+ Syamil/DialoGPT-medium-newpixal
2219
+ Coderhuynin/DialoGPT-large-TonyStark
2220
+ SotirisLegkas/final_socratic_dialoGPT
2221
+ ademfatnassi/bonjourGPT-small
2222
+ ikocx-to24/DialoGPT-small-planktongpt2
2223
+ EricYou/RickBot
2224
+ Ayaakaa/DialoGPT-small-Yoisaki-Kanade
2225
+ DoesNoPro/DialoGPT-small-RaidenG
2226
+ rajeshbot/DialoGPT-medium-Harry-to-Hari
2227
+ DoesNoPro/DialoGPT-small-RaidenG2
2228
+ SamsonP/pygmalion-6b-sft
2229
+ Deojoandco/ahDialoGPT-small-v4
2230
+ Syamil/GPTNeo-PIXAL-Model
2231
+ Syamil/GPTNeo-PIXAL-new
2232
+ Lattori/DiabloGPT-small-ConanBot
2233
+ Badzee/DialoGPT-medium-jackbot
2234
+ meowsynth/DialoGPT-small-sophie
2235
+ EnterNameBros/Senko-san-medium-baby
2236
+ Deojoandco/ah-GPT2-v4
2237
+ cosimoiaia/Loquace-20B
2238
+ EnterNameBros/Senko-san-medium-fox
2239
+ MarkyMarx/DialoGPT-medium-jimmybot2
2240
+ DhruvShek/DialoGPT
2241
+ Doge22/DialoGPT-medium-max
2242
+ lyogavin/Anima33B
2243
+ steerevo88/testThotBot
2244
+ steerevo88/workingthotBot
2245
+ YTTD/DialoGPT-medium-keiji
2246
+ MisguidedKerbal/DialoGPT-medium-kerbal
2247
+ Blueify/DialoGPT-small-model-lotr
2248
+ steerevo88/newthotBot
2249
+ paripi/Malishka
2250
+ finex/pfe-mohamed2023-RON
2251
+ DhruvShek/CMDGPT
2252
+ finex/pfe-mohamed2023-Hermione
2253
+ SkylerBlu9/DialoGPT-medium-CitrAI
2254
+ SkylerBlu9/DialoGPT-medium-autismobot
2255
+ MisguidedKerbal/DialoGPT-kerbalV2
2256
+ EnterNameBros/Senko-san-medium-a
2257
+ dderr/testmodel
2258
+ priyanshdahiya/DialoGPT-small-rick
2259
+ Goodnoway/DialoGPT-nerbalV2
2260
+ WompWomp1/DialoGPT-medium-Kirin
2261
+ lyogavin/Anima33B-merged
2262
+ peytonai/DialoGPT-small-wali-joshua
2263
+ MisguidedKerbal/DialoGPT-kerbalV3
2264
+ WompWomp1/DialoGPT-medium-Kaori
2265
+ OmarDiab/DialoGPT-small-Amogus
2266
+ servetier/DialoGPT-large-miguel
2267
+ OmarDiab/DialoGPT-small-Amogus-2
2268
+ steveglover/falcon-7b-instruct-telco-chat
2269
+ Lazycuber/Janemalion-6B
2270
+ Goodnoway/DialoGPT-nerbalV4
2271
+ gvij/gpt-j-6B-alpaca-gpt4
2272
+ papahawk/keya-560m
2273
+ JavRedstone/DialoGPT-small-tesseractist
2274
+ imuncomfortable/DiabloGPT-small-CocoAtarashi
2275
+ Amod/falcon7b-fine-tuned-therapy-merged
2276
+ Oshirigami1980/DialoGPT-medium-Steven
2277
+ Drevanil/DialoGPT-small-try
2278
+ Yaewe/1
2279
+ DataHammer/mozi_emotional_7b
2280
+ udxyz/HarryPotterBot
2281
+ Kasyapa/DialoGPT-medium-hagridbot
2282
+ lyogavin/Anima33B-DPO-Belle-1k
2283
+ JeanL-0/TestingModel-01
2284
+ TejasC2/DialoGPT-TejasBot
2285
+ lyogavin/Anima33B-DPO-Belle-1k-merged
2286
+ InterruptAI/Interrupt-350M
2287
+ Lucideds/Lucideds
2288
+ EnterNameBros/Senko-san-medium-sc
2289
+ EnterNameBros/Senko-san-medium-scl
2290
+ DaddySen/tighnari
2291
+ ettevyemerald/DialoGPT-medium-beomgyu
2292
+ minhcrafters/DialoGPT-small-mindwandering
2293
+ JNDankwah/DialoGPT-small-ThorCB
2294
+ minhcrafters/DialoGPT-medium-Zephirel
2295
+ papahawk/falcon-40b
2296
+ sonntt/DialoGPT-small-mindwandering
2297
+ pundapog/DialoGPT-medium-ethanbot
2298
+ TheBloke/Pygmalion-7B-SuperHOT-8K-GGML
2299
+ TheBloke/Pygmalion-7B-SuperHOT-8K-fp16
2300
+ pobierz69/model-6b-read-desc
2301
+ sidca/Cam
2302
+ EnterNameBros/Senko-san-medium-abc
2303
+ abhi-8/DialoGPT-medium-Michael
2304
+ abhi-8/DialoGPT-medium-Rick
2305
+ abhi-8/DialoGPT-medium-Joshua-twevy
2306
+ spitfire4794/dialogpt-small-rick
2307
+ abhi-8/Joshua-bot
2308
+ Justus-Jonas/Imaginary-Embeddings-Classic
2309
+ Justus-Jonas/Imaginary-Embeddings-SpeakerTokens
2310
+ Justus-Jonas/Imaginary-Embeddings-SpeakerTokens-STP
2311
+ spitfire4794/dialogpt-small-morty
2312
+ Kauru/DialoGPT-medium-Ranni
2313
+ crazydamns/DialoGPT-Johnny2
2314
+ jpandeinge/DialoGPT-medium-Oshiwambo-Bot
2315
+ custads23/pygmalion-1.3b
2316
+ HatCha01/DialoGPT-small-Batman
2317
+ crazydamns/DialoGPT-Johnny3
2318
+ assembleteams/curiouspi
2319
+ Kauru/DialoGPT-medium-Ranniv2
2320
+ SatwikShrivastava/narutoAI-chatbot
2321
+ digitalmax1/max
2322
+ adr2432/small-Joshua-Bot
2323
+ ObsessedCitrus/DialoGPT-small-PeterBot_ChatBot
2324
+ suarkadipa/HubermanGPT-small-v1
2325
+ suarkadipa/HarryPotterGPT-small-v1
2326
+ wevie1978/DialoGPT-medium-Kebb
2327
+ kopeqwerty/DialoGPT-medium-idotbot
2328
+ zelalt/Chatbot_T5-Prmtrs
2329
+ jarvissss/DialoGPT-medium-idotbot
2330
+ Magmadue/DiabloGPT-small-ei
2331
+ nicbull/DialoGPT-small-cryptonic
2332
+ nicbull/DialoGPT-small-cryptonic2
2333
+ chloe0x0/DialoGPT-small-Muty
2334
+ chloe0x0/mutyGPT
2335
+ alexwang05/DialoGPT-small-soph
2336
+ BHAndersonJr/DialoGPT-small-fry
2337
+ timothykim04/DialoGPT-medium-timothykim
2338
+ timothykim04/DialoGPT-medium-harrypotter
2339
+ Luca999/Limitlessai99
2340
+ Madgimmy/DiabloGPT-small-Madgimmy
2341
+ chloe0x0/mutyGPT-v2
2342
+ nuggster/DialoGPT-small-ianbot
2343
+ we1kkk/llama2-hf-qlora-oasst1
2344
+ IlyaGusev/saiga2_7b_lora
2345
+ IlyaGusev/gigasaiga_lora
2346
+ jliu03/JustinBot
2347
+ heliosbrahma/falcon-7b-finetuned-mental-health-conversational
2348
+ drunknmonk/GPT-Chandler
2349
+ jun-ai/llama2-qlora-finetunined-french
2350
+ WompWomp1/DialoGPT-large-Kirin
2351
+ WompWomp1/DialoGPT-large-Kirin-2
2352
+ WompWomp1/DialoGPT-large-Rin
2353
+ or4cl3ai/Aiden_t5
2354
+ jstawski/Llama-2-13b-hf-finetuned-SNG
2355
+ Gelmo/Halouf
2356
+ IlyaGusev/saiga2_13b_lora
2357
+ sophji/DialoGPT-small-GodlyLJ
2358
+ ATrapenard/Discord-Impersonation-Bot
2359
+ hiamitabha/llama2forbittlerobot
2360
+ IlyaGusev/saiga2_7b_gguf
2361
+ IlyaGusev/saiga2_13b_gguf
2362
+ TejasC2/DialoGPT-TejasBot2
2363
+ CNR223/DialoGPT-medium-MalcolmReynold
2364
+ minh-hahaha/DialoGPT-small-harrypotter
2365
+ phucnq1591999/SolanaChatBot
2366
+ marclove/llama-2-7b-chat-functions
2367
+ Sheerapi/test
2368
+ YukioKoito/DialoGPT-small-chibi
2369
+ YukioKoito/DialoGPT-small-twilight
2370
+ amzrana/lora
2371
+ ierhon/basic-chatbot
2372
+ Pula23/Hggjg
2373
+ Focs/DialoGPT-medium-tony-stark
2374
+ Kenobiwan/DialoGPT-small-AizakkuBot2
2375
+ drado/DialoGPT-small-joshua
2376
+ rah-1/Rahulio
2377
+ tanishqvashisht/DialoGPT-small-Joshua
2378
+ Kenobiwan/DialoGPT-small-AizakkuBot3
2379
+ Ridloo/DialogGPT-small-harrypotter
2380
+ dyuhong80/DialoGPT-large-ModerateEffortBombGPT
2381
+ ai-forever/paper_persi_chat
2382
+ paralleldynamix/paralleldynamix-model101
2383
+ kelSidenna/SoftwareRequirements-T5-Base
2384
+ renahime/DialoGPT-medium-umineko
2385
+ Shaun1204/RedGPT-Gormlee
2386
+ diwas7777/HarryBot
2387
+ heliosbrahma/falcon-7b-sharded-bf16-finetuned-mental-health-conversational
2388
+ kelSidenna/SoftwareReq-DialoGPT-medium
2389
+ shanover/medbot-conv
2390
+ J-Wiggler/DialoGPT-medium-Stanley
2391
+ gearski/DialoGPT-small-itskleb
2392
+ wozniakclub/llama-2-7b-medtext-llama2
2393
+ gearski/DialoGPT-medium-itskleb
2394
+ rebornrulz/Rulz-AI
2395
+ Quantsr/DialogGPT-small-Aeris
2396
+ ostorc/rick-sanchez-chatbot
2397
+ nicbull/DialoGPT-medium-nic
2398
+ nicbull/DialoGPT-medium-nic2
2399
+ gorkemgoknar/llama2-7f-moviechatbot-ggml-q4
2400
+ aka-nikko/ainz-ooal-gown
2401
+ llSourcell/medllama2_7b
2402
+ xtuner/Llama-2-7b-qlora-moss-003-sft
2403
+ xtuner/Llama-2-7b-qlora-arxiv-gentitle
2404
+ xtuner/internlm-7b-qlora-arxiv-gentitle
2405
+ xtuner/internlm-7b-qlora-alpaca-enzh
2406
+ xtuner/Baichuan-7B-qlora-arxiv-gentitle
2407
+ xtuner/Baichuan-7B-qlora-alpaca-enzh
2408
+ nicbull/DialoGPT-medium-leric
2409
+ Ian-14/llm13
2410
+ theastro/starkbot
2411
+ yupimrandy/DialoGPT-medium-butcher
2412
+ hclaim/clamgptattempt4
2413
+ yupimrandy/DialoGPT-medium-hughie
2414
+ nekohacker591/google1
2415
+ zhmx31/Mychatbot
2416
+ sk8ingcat/DialoGPT-small-TonyStark
2417
+ SanchoJR/meX
2418
+ xtuner/Qwen-7B-qlora-moss-003-sft
2419
+ xtuner/Qwen-7B-qlora-arxiv-gentitle
2420
+ xtuner/Qwen-7B-qlora-alpaca-enzh
2421
+ xtuner/Qwen-7B-qlora-oasst1
2422
+ xtuner/Baichuan-7B-qlora-oasst1
2423
+ xtuner/internlm-7b-qlora-oasst1
2424
+ 4bit/medllama2_7b
2425
+ JGKD/JangoGPTv1.0
2426
+ kwankwan1000/DialoGPT-small-peppa
2427
+ JGKD/JangoGPTv1.5
2428
+ SoniR/config
2429
+ mjyh/falcon-7b-qlora-sclue-20230601-04-merged
2430
+ sadzip/SiberianPersona-ruGPT-3.5-qlora
2431
+ Wolffire88/DialoGPT-medium-Android16
2432
+ nolly3317/DialoGPT-small-alice
2433
+ feelinrealcute/pym-6b
2434
+ nixsy/AvasLove
2435
+ feelinrealcute/pym-13b7
2436
+ AleksiDu/HarryPotterBot
2437
+ Belcebuzzz/DialoGPT-small-TomoGF
2438
+ xtuner/internlm-7b-qlora-lawyer
2439
+ xtuner/internlm-7b-qlora-colorist
2440
+ xtuner/internlm-7b-qlora-coder
2441
+ xtuner/internlm-7b-qlora-open-platypus
2442
+ xtuner/internlm-7b-qlora-sql
2443
+ inception-mbzuai/jais-13b-chat
2444
+ Fredithefish/Guanaco-3B-Uncensored
2445
+ garrachonr/LlamaDos
2446
+ literallywood/DialoGPT-small-ekansh
2447
+ IALABS/Arturosfastfood
2448
+ javieitor/DialoGPT-medium-Rick
2449
+ Kuduxaaa/ava-small
2450
+ Al-Hathboor-Bikal-ai-2023/SRTIP-GPT-F7B-base
2451
+ L-R/LLmRa-355M
2452
+ Fredithefish/Guanaco-3B-Uncensored-v2
2453
+ xtuner/Llama-2-7b-qlora-colorist
2454
+ KE-AI/basicchatbot-kel
2455
+ josepholiver/TEST_MODEL_1
2456
+ PlaceReporter99/Utility_Bot_Chat
2457
+ J-Wiggler2/Caesar
2458
+ J-Wiggler2/Caesar2
2459
+ matvalan/vittae-cot
2460
+ Dawnstarhunter/DialoGPT-medium-Eveline
2461
+ sahilxyd/DialoGPT-small-joshua
2462
+ EnterNameBros/Senko-san-medium-abcd
2463
+ 6adityaverma/DialoGPT-large-Walter
2464
+ 6adityaverma/DialoGPT-large-Rick
2465
+ IlyaGusev/saiga2_70b_lora
2466
+ AyushK0808/StarWarsBot
2467
+ EnterNameBros/Senko-ai-medium
2468
+ Fredithefish/Guanaco-7B-Uncensored
2469
+ IlyaGusev/saiga2_70b_gguf
2470
+ glassofwine/DialoGPT-medium-johanwine
2471
+ zattio770/120-Days-of-LORA-v2-13B
2472
+ cannice/blenderbot-400M-distill-empathetic
2473
+ Likelihood94/Jackoftrades
2474
+ Hapski/DialoGPT-small-nene
2475
+ Fredithefish/Guanaco-13B-Uncensored
2476
+ kitbear444/DialoGPT-medium-kit
2477
+ SonnyAu/DialoGPT-dumbledore
2478
+ TheBloke/Guanaco-7B-Uncensored-GGUF
2479
+ TheBloke/Guanaco-13B-Uncensored-GGUF
2480
+ TheBloke/Guanaco-7B-Uncensored-GPTQ
2481
+ TheBloke/Guanaco-13B-Uncensored-GPTQ
2482
+ TheBloke/Guanaco-3B-Uncensored-v2-GPTQ
2483
+ TheBloke/Guanaco-3B-Uncensored-v2-GGML
2484
+ Codexister/DialoGPT-medium-KafkaBotV1
2485
+ mfodwo/STUGPT-small-v1
2486
+ asas-ai/jais-13b-chat-8bit
2487
+ SoupChickn/Valeen-DialoGPT
2488
+ Codexister/DialoGPT-medium-KafkaBotV2
2489
+ KoalaAI/OPT-1.3b-Chat
2490
+ Nafaille/nafaille6b
2491
+ DiTy/dialogpt
2492
+ Severus27/BeingWell_llama2_7b
2493
+ rayho/DialoGPT-small-polysoft
2494
+ TuningAI/Llama2_13B_startup_Assistant
2495
+ dipxsy/testmodel
2496
+ dipxsy/Jarvis-small
2497
+ Lazycuber/L2-7b-Chat-Guanaco-Uncensored
2498
+ dipxsy/jarvis-blend
2499
+ TheBloke/Guanaco-13B-Uncensored-AWQ
2500
+ TheBloke/Guanaco-7B-Uncensored-AWQ
2501
+ wstock04/shiddeatorBotV1
2502
+ Boqianshen/llama-2-7b-miniguanaco
2503
+ sebastiantrbl/distilgpt2-finetuned-wikitext2
2504
+ herzlixh/DialoGPTs_HarryFromHogwarts
2505
+ poiccard/jais-13b-chat-adn
2506
+ sebastiantrbl/test-DialoGPT-finetune
2507
+ uffergist/DialoGPT-small-cummy
2508
+ wstock04/shiddeatorBotV3.0
2509
+ wstock04/shiddeatorBotDUMB
2510
+ Applekinz/John
2511
+ Or4cl3/1nsfw
2512
+ sebastiantrbl/DialoGPT-finetuned-daily-dialog
2513
+ LTC-AI-Labs/L2-7b-Base-WVG-Uncensored
2514
+ hussain2030/jais13bchat2
2515
+ subabi/DialoGPT-medium-subabicord
2516
+ marblyso/DialoGPT-medium-collin
2517
+ Crataco/Pygmalion-6B-GGML
2518
+ dipxsy/jl
2519
+ testerhubhai/krnedo
2520
+ IAteSpaghettiForLunch/DialoGPT-medium-GLADoS
2521
+ IAteSpaghettiForLunch/GLADoSBOT
2522
+ Nikolai5592/DialoGPT-Medium-RickBot
2523
+ KuroganeNiello/medium-NebBot
litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt ADDED
The diff for this file is too large to render. See raw diff
 
litellm/llms/huggingface_restapi.py ADDED
@@ -0,0 +1,750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Uses the huggingface text generation inference API
2
+ import os, copy, types
3
+ import json
4
+ from enum import Enum
5
+ import httpx, requests
6
+ from .base import BaseLLM
7
+ import time
8
+ import litellm
9
+ from typing import Callable, Dict, List, Any
10
+ from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage
11
+ from typing import Optional
12
+ from .prompt_templates.factory import prompt_factory, custom_prompt
13
+
14
+
15
+ class HuggingfaceError(Exception):
16
+ def __init__(
17
+ self,
18
+ status_code,
19
+ message,
20
+ request: Optional[httpx.Request] = None,
21
+ response: Optional[httpx.Response] = None,
22
+ ):
23
+ self.status_code = status_code
24
+ self.message = message
25
+ if request is not None:
26
+ self.request = request
27
+ else:
28
+ self.request = httpx.Request(
29
+ method="POST", url="https://api-inference.huggingface.co/models"
30
+ )
31
+ if response is not None:
32
+ self.response = response
33
+ else:
34
+ self.response = httpx.Response(
35
+ status_code=status_code, request=self.request
36
+ )
37
+ super().__init__(
38
+ self.message
39
+ ) # Call the base class constructor with the parameters it needs
40
+
41
+
42
+ class HuggingfaceConfig:
43
+ """
44
+ Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate
45
+ """
46
+
47
+ best_of: Optional[int] = None
48
+ decoder_input_details: Optional[bool] = None
49
+ details: Optional[bool] = True # enables returning logprobs + best of
50
+ max_new_tokens: Optional[int] = None
51
+ repetition_penalty: Optional[float] = None
52
+ return_full_text: Optional[
53
+ bool
54
+ ] = False # by default don't return the input as part of the output
55
+ seed: Optional[int] = None
56
+ temperature: Optional[float] = None
57
+ top_k: Optional[int] = None
58
+ top_n_tokens: Optional[int] = None
59
+ top_p: Optional[int] = None
60
+ truncate: Optional[int] = None
61
+ typical_p: Optional[float] = None
62
+ watermark: Optional[bool] = None
63
+
64
+ def __init__(
65
+ self,
66
+ best_of: Optional[int] = None,
67
+ decoder_input_details: Optional[bool] = None,
68
+ details: Optional[bool] = None,
69
+ max_new_tokens: Optional[int] = None,
70
+ repetition_penalty: Optional[float] = None,
71
+ return_full_text: Optional[bool] = None,
72
+ seed: Optional[int] = None,
73
+ temperature: Optional[float] = None,
74
+ top_k: Optional[int] = None,
75
+ top_n_tokens: Optional[int] = None,
76
+ top_p: Optional[int] = None,
77
+ truncate: Optional[int] = None,
78
+ typical_p: Optional[float] = None,
79
+ watermark: Optional[bool] = None,
80
+ ) -> None:
81
+ locals_ = locals()
82
+ for key, value in locals_.items():
83
+ if key != "self" and value is not None:
84
+ setattr(self.__class__, key, value)
85
+
86
+ @classmethod
87
+ def get_config(cls):
88
+ return {
89
+ k: v
90
+ for k, v in cls.__dict__.items()
91
+ if not k.startswith("__")
92
+ and not isinstance(
93
+ v,
94
+ (
95
+ types.FunctionType,
96
+ types.BuiltinFunctionType,
97
+ classmethod,
98
+ staticmethod,
99
+ ),
100
+ )
101
+ and v is not None
102
+ }
103
+
104
+
105
+ def output_parser(generated_text: str):
106
+ """
107
+ Parse the output text to remove any special characters. In our current approach we just check for ChatML tokens.
108
+
109
+ Initial issue that prompted this - https://github.com/BerriAI/litellm/issues/763
110
+ """
111
+ chat_template_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "<s>", "</s>"]
112
+ for token in chat_template_tokens:
113
+ if generated_text.strip().startswith(token):
114
+ generated_text = generated_text.replace(token, "", 1)
115
+ if generated_text.endswith(token):
116
+ generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1]
117
+ return generated_text
118
+
119
+
120
+ tgi_models_cache = None
121
+ conv_models_cache = None
122
+
123
+
124
+ def read_tgi_conv_models():
125
+ try:
126
+ global tgi_models_cache, conv_models_cache
127
+ # Check if the cache is already populated
128
+ # so we don't keep on reading txt file if there are 1k requests
129
+ if (tgi_models_cache is not None) and (conv_models_cache is not None):
130
+ return tgi_models_cache, conv_models_cache
131
+ # If not, read the file and populate the cache
132
+ tgi_models = set()
133
+ script_directory = os.path.dirname(os.path.abspath(__file__))
134
+ # Construct the file path relative to the script's directory
135
+ file_path = os.path.join(
136
+ script_directory,
137
+ "huggingface_llms_metadata",
138
+ "hf_text_generation_models.txt",
139
+ )
140
+
141
+ with open(file_path, "r") as file:
142
+ for line in file:
143
+ tgi_models.add(line.strip())
144
+
145
+ # Cache the set for future use
146
+ tgi_models_cache = tgi_models
147
+
148
+ # If not, read the file and populate the cache
149
+ file_path = os.path.join(
150
+ script_directory,
151
+ "huggingface_llms_metadata",
152
+ "hf_conversational_models.txt",
153
+ )
154
+ conv_models = set()
155
+ with open(file_path, "r") as file:
156
+ for line in file:
157
+ conv_models.add(line.strip())
158
+ # Cache the set for future use
159
+ conv_models_cache = conv_models
160
+ return tgi_models, conv_models
161
+ except:
162
+ return set(), set()
163
+
164
+
165
+ def get_hf_task_for_model(model):
166
+ # read text file, cast it to set
167
+ # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt"
168
+ tgi_models, conversational_models = read_tgi_conv_models()
169
+ if model in tgi_models:
170
+ return "text-generation-inference"
171
+ elif model in conversational_models:
172
+ return "conversational"
173
+ elif "roneneldan/TinyStories" in model:
174
+ return None
175
+ else:
176
+ return "text-generation-inference" # default to tgi
177
+
178
+
179
+ class Huggingface(BaseLLM):
180
+ _client_session: Optional[httpx.Client] = None
181
+ _aclient_session: Optional[httpx.AsyncClient] = None
182
+
183
+ def __init__(self) -> None:
184
+ super().__init__()
185
+
186
+ def validate_environment(self, api_key, headers):
187
+ default_headers = {
188
+ "content-type": "application/json",
189
+ }
190
+ if api_key and headers is None:
191
+ default_headers[
192
+ "Authorization"
193
+ ] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens
194
+ headers = default_headers
195
+ elif headers:
196
+ headers = headers
197
+ else:
198
+ headers = default_headers
199
+ return headers
200
+
201
+ def convert_to_model_response_object(
202
+ self,
203
+ completion_response,
204
+ model_response,
205
+ task,
206
+ optional_params,
207
+ encoding,
208
+ input_text,
209
+ model,
210
+ ):
211
+ if task == "conversational":
212
+ if len(completion_response["generated_text"]) > 0: # type: ignore
213
+ model_response["choices"][0]["message"][
214
+ "content"
215
+ ] = completion_response[
216
+ "generated_text"
217
+ ] # type: ignore
218
+ elif task == "text-generation-inference":
219
+ if (
220
+ not isinstance(completion_response, list)
221
+ or not isinstance(completion_response[0], dict)
222
+ or "generated_text" not in completion_response[0]
223
+ ):
224
+ raise HuggingfaceError(
225
+ status_code=422,
226
+ message=f"response is not in expected format - {completion_response}",
227
+ )
228
+
229
+ if len(completion_response[0]["generated_text"]) > 0:
230
+ model_response["choices"][0]["message"]["content"] = output_parser(
231
+ completion_response[0]["generated_text"]
232
+ )
233
+ ## GETTING LOGPROBS + FINISH REASON
234
+ if (
235
+ "details" in completion_response[0]
236
+ and "tokens" in completion_response[0]["details"]
237
+ ):
238
+ model_response.choices[0].finish_reason = completion_response[0][
239
+ "details"
240
+ ]["finish_reason"]
241
+ sum_logprob = 0
242
+ for token in completion_response[0]["details"]["tokens"]:
243
+ if token["logprob"] != None:
244
+ sum_logprob += token["logprob"]
245
+ model_response["choices"][0]["message"]._logprob = sum_logprob
246
+ if "best_of" in optional_params and optional_params["best_of"] > 1:
247
+ if (
248
+ "details" in completion_response[0]
249
+ and "best_of_sequences" in completion_response[0]["details"]
250
+ ):
251
+ choices_list = []
252
+ for idx, item in enumerate(
253
+ completion_response[0]["details"]["best_of_sequences"]
254
+ ):
255
+ sum_logprob = 0
256
+ for token in item["tokens"]:
257
+ if token["logprob"] != None:
258
+ sum_logprob += token["logprob"]
259
+ if len(item["generated_text"]) > 0:
260
+ message_obj = Message(
261
+ content=output_parser(item["generated_text"]),
262
+ logprobs=sum_logprob,
263
+ )
264
+ else:
265
+ message_obj = Message(content=None)
266
+ choice_obj = Choices(
267
+ finish_reason=item["finish_reason"],
268
+ index=idx + 1,
269
+ message=message_obj,
270
+ )
271
+ choices_list.append(choice_obj)
272
+ model_response["choices"].extend(choices_list)
273
+ else:
274
+ if len(completion_response[0]["generated_text"]) > 0:
275
+ model_response["choices"][0]["message"]["content"] = output_parser(
276
+ completion_response[0]["generated_text"]
277
+ )
278
+ ## CALCULATING USAGE
279
+ prompt_tokens = 0
280
+ try:
281
+ prompt_tokens = len(
282
+ encoding.encode(input_text)
283
+ ) ##[TODO] use the llama2 tokenizer here
284
+ except:
285
+ # this should remain non blocking we should not block a response returning if calculating usage fails
286
+ pass
287
+ output_text = model_response["choices"][0]["message"].get("content", "")
288
+ if output_text is not None and len(output_text) > 0:
289
+ completion_tokens = 0
290
+ try:
291
+ completion_tokens = len(
292
+ encoding.encode(
293
+ model_response["choices"][0]["message"].get("content", "")
294
+ )
295
+ ) ##[TODO] use the llama2 tokenizer here
296
+ except:
297
+ # this should remain non blocking we should not block a response returning if calculating usage fails
298
+ pass
299
+ else:
300
+ completion_tokens = 0
301
+
302
+ model_response["created"] = int(time.time())
303
+ model_response["model"] = model
304
+ usage = Usage(
305
+ prompt_tokens=prompt_tokens,
306
+ completion_tokens=completion_tokens,
307
+ total_tokens=prompt_tokens + completion_tokens,
308
+ )
309
+ model_response.usage = usage
310
+ model_response._hidden_params["original_response"] = completion_response
311
+ return model_response
312
+
313
+ def completion(
314
+ self,
315
+ model: str,
316
+ messages: list,
317
+ api_base: Optional[str],
318
+ headers: Optional[dict],
319
+ model_response: ModelResponse,
320
+ print_verbose: Callable,
321
+ timeout: float,
322
+ encoding,
323
+ api_key,
324
+ logging_obj,
325
+ custom_prompt_dict={},
326
+ acompletion: bool = False,
327
+ optional_params=None,
328
+ litellm_params=None,
329
+ logger_fn=None,
330
+ ):
331
+ super().completion()
332
+ exception_mapping_worked = False
333
+ try:
334
+ headers = self.validate_environment(api_key, headers)
335
+ task = get_hf_task_for_model(model)
336
+ print_verbose(f"{model}, {task}")
337
+ completion_url = ""
338
+ input_text = ""
339
+ if "https" in model:
340
+ completion_url = model
341
+ elif api_base:
342
+ completion_url = api_base
343
+ elif "HF_API_BASE" in os.environ:
344
+ completion_url = os.getenv("HF_API_BASE", "")
345
+ elif "HUGGINGFACE_API_BASE" in os.environ:
346
+ completion_url = os.getenv("HUGGINGFACE_API_BASE", "")
347
+ else:
348
+ completion_url = f"https://api-inference.huggingface.co/models/{model}"
349
+
350
+ ## Load Config
351
+ config = litellm.HuggingfaceConfig.get_config()
352
+ for k, v in config.items():
353
+ if (
354
+ k not in optional_params
355
+ ): # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in
356
+ optional_params[k] = v
357
+
358
+ ### MAP INPUT PARAMS
359
+ if task == "conversational":
360
+ inference_params = copy.deepcopy(optional_params)
361
+ inference_params.pop("details")
362
+ inference_params.pop("return_full_text")
363
+ past_user_inputs = []
364
+ generated_responses = []
365
+ text = ""
366
+ for message in messages:
367
+ if message["role"] == "user":
368
+ if text != "":
369
+ past_user_inputs.append(text)
370
+ text = message["content"]
371
+ elif message["role"] == "assistant" or message["role"] == "system":
372
+ generated_responses.append(message["content"])
373
+ data = {
374
+ "inputs": {
375
+ "text": text,
376
+ "past_user_inputs": past_user_inputs,
377
+ "generated_responses": generated_responses,
378
+ },
379
+ "parameters": inference_params,
380
+ }
381
+ input_text = "".join(message["content"] for message in messages)
382
+ elif task == "text-generation-inference":
383
+ # always send "details" and "return_full_text" as params
384
+ if model in custom_prompt_dict:
385
+ # check if the model has a registered custom prompt
386
+ model_prompt_details = custom_prompt_dict[model]
387
+ prompt = custom_prompt(
388
+ role_dict=model_prompt_details.get("roles", None),
389
+ initial_prompt_value=model_prompt_details.get(
390
+ "initial_prompt_value", ""
391
+ ),
392
+ final_prompt_value=model_prompt_details.get(
393
+ "final_prompt_value", ""
394
+ ),
395
+ messages=messages,
396
+ )
397
+ else:
398
+ prompt = prompt_factory(model=model, messages=messages)
399
+ data = {
400
+ "inputs": prompt,
401
+ "parameters": optional_params,
402
+ "stream": True
403
+ if "stream" in optional_params and optional_params["stream"] == True
404
+ else False,
405
+ }
406
+ input_text = prompt
407
+ else:
408
+ # Non TGI and Conversational llms
409
+ # We need this branch, it removes 'details' and 'return_full_text' from params
410
+ if model in custom_prompt_dict:
411
+ # check if the model has a registered custom prompt
412
+ model_prompt_details = custom_prompt_dict[model]
413
+ prompt = custom_prompt(
414
+ role_dict=model_prompt_details.get("roles", {}),
415
+ initial_prompt_value=model_prompt_details.get(
416
+ "initial_prompt_value", ""
417
+ ),
418
+ final_prompt_value=model_prompt_details.get(
419
+ "final_prompt_value", ""
420
+ ),
421
+ bos_token=model_prompt_details.get("bos_token", ""),
422
+ eos_token=model_prompt_details.get("eos_token", ""),
423
+ messages=messages,
424
+ )
425
+ else:
426
+ prompt = prompt_factory(model=model, messages=messages)
427
+ inference_params = copy.deepcopy(optional_params)
428
+ inference_params.pop("details")
429
+ inference_params.pop("return_full_text")
430
+ data = {
431
+ "inputs": prompt,
432
+ "parameters": inference_params,
433
+ "stream": True
434
+ if "stream" in optional_params and optional_params["stream"] == True
435
+ else False,
436
+ }
437
+ input_text = prompt
438
+ ## LOGGING
439
+ logging_obj.pre_call(
440
+ input=input_text,
441
+ api_key=api_key,
442
+ additional_args={
443
+ "complete_input_dict": data,
444
+ "task": task,
445
+ "headers": headers,
446
+ "api_base": completion_url,
447
+ "acompletion": acompletion,
448
+ },
449
+ )
450
+ ## COMPLETION CALL
451
+ if acompletion is True:
452
+ ### ASYNC STREAMING
453
+ if optional_params.get("stream", False):
454
+ return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model, timeout=timeout) # type: ignore
455
+ else:
456
+ ### ASYNC COMPLETION
457
+ return self.acompletion(api_base=completion_url, data=data, headers=headers, model_response=model_response, task=task, encoding=encoding, input_text=input_text, model=model, optional_params=optional_params, timeout=timeout) # type: ignore
458
+ ### SYNC STREAMING
459
+ if "stream" in optional_params and optional_params["stream"] == True:
460
+ response = requests.post(
461
+ completion_url,
462
+ headers=headers,
463
+ data=json.dumps(data),
464
+ stream=optional_params["stream"],
465
+ )
466
+ return response.iter_lines()
467
+ ### SYNC COMPLETION
468
+ else:
469
+ response = requests.post(
470
+ completion_url, headers=headers, data=json.dumps(data)
471
+ )
472
+
473
+ ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
474
+ is_streamed = False
475
+ if (
476
+ response.__dict__["headers"].get("Content-Type", "")
477
+ == "text/event-stream"
478
+ ):
479
+ is_streamed = True
480
+
481
+ # iterate over the complete streamed response, and return the final answer
482
+ if is_streamed:
483
+ streamed_response = CustomStreamWrapper(
484
+ completion_stream=response.iter_lines(),
485
+ model=model,
486
+ custom_llm_provider="huggingface",
487
+ logging_obj=logging_obj,
488
+ )
489
+ content = ""
490
+ for chunk in streamed_response:
491
+ content += chunk["choices"][0]["delta"]["content"]
492
+ completion_response: List[Dict[str, Any]] = [
493
+ {"generated_text": content}
494
+ ]
495
+ ## LOGGING
496
+ logging_obj.post_call(
497
+ input=input_text,
498
+ api_key=api_key,
499
+ original_response=completion_response,
500
+ additional_args={"complete_input_dict": data, "task": task},
501
+ )
502
+ else:
503
+ ## LOGGING
504
+ logging_obj.post_call(
505
+ input=input_text,
506
+ api_key=api_key,
507
+ original_response=response.text,
508
+ additional_args={"complete_input_dict": data, "task": task},
509
+ )
510
+ ## RESPONSE OBJECT
511
+ try:
512
+ completion_response = response.json()
513
+ if isinstance(completion_response, dict):
514
+ completion_response = [completion_response]
515
+ except:
516
+ import traceback
517
+
518
+ raise HuggingfaceError(
519
+ message=f"Original Response received: {response.text}; Stacktrace: {traceback.format_exc()}",
520
+ status_code=response.status_code,
521
+ )
522
+ print_verbose(f"response: {completion_response}")
523
+ if (
524
+ isinstance(completion_response, dict)
525
+ and "error" in completion_response
526
+ ):
527
+ print_verbose(f"completion error: {completion_response['error']}")
528
+ print_verbose(f"response.status_code: {response.status_code}")
529
+ raise HuggingfaceError(
530
+ message=completion_response["error"],
531
+ status_code=response.status_code,
532
+ )
533
+ return self.convert_to_model_response_object(
534
+ completion_response=completion_response,
535
+ model_response=model_response,
536
+ task=task,
537
+ optional_params=optional_params,
538
+ encoding=encoding,
539
+ input_text=input_text,
540
+ model=model,
541
+ )
542
+ except HuggingfaceError as e:
543
+ exception_mapping_worked = True
544
+ raise e
545
+ except Exception as e:
546
+ if exception_mapping_worked:
547
+ raise e
548
+ else:
549
+ import traceback
550
+
551
+ raise HuggingfaceError(status_code=500, message=traceback.format_exc())
552
+
553
+ async def acompletion(
554
+ self,
555
+ api_base: str,
556
+ data: dict,
557
+ headers: dict,
558
+ model_response: ModelResponse,
559
+ task: str,
560
+ encoding: Any,
561
+ input_text: str,
562
+ model: str,
563
+ optional_params: dict,
564
+ timeout: float
565
+ ):
566
+ response = None
567
+ try:
568
+ async with httpx.AsyncClient(timeout=timeout) as client:
569
+ response = await client.post(
570
+ url=api_base, json=data, headers=headers
571
+ )
572
+ response_json = response.json()
573
+ if response.status_code != 200:
574
+ raise HuggingfaceError(
575
+ status_code=response.status_code,
576
+ message=response.text,
577
+ request=response.request,
578
+ response=response,
579
+ )
580
+
581
+ ## RESPONSE OBJECT
582
+ return self.convert_to_model_response_object(
583
+ completion_response=response_json,
584
+ model_response=model_response,
585
+ task=task,
586
+ encoding=encoding,
587
+ input_text=input_text,
588
+ model=model,
589
+ optional_params=optional_params,
590
+ )
591
+ except Exception as e:
592
+ if isinstance(e, httpx.TimeoutException):
593
+ raise HuggingfaceError(status_code=500, message="Request Timeout Error")
594
+ elif response is not None and hasattr(response, "text"):
595
+ raise HuggingfaceError(
596
+ status_code=500,
597
+ message=f"{str(e)}\n\nOriginal Response: {response.text}",
598
+ )
599
+ else:
600
+ raise HuggingfaceError(status_code=500, message=f"{str(e)}")
601
+
602
+ async def async_streaming(
603
+ self,
604
+ logging_obj,
605
+ api_base: str,
606
+ data: dict,
607
+ headers: dict,
608
+ model_response: ModelResponse,
609
+ model: str,
610
+ timeout: float
611
+ ):
612
+ async with httpx.AsyncClient(timeout=timeout) as client:
613
+ response = client.stream(
614
+ "POST", url=f"{api_base}", json=data, headers=headers
615
+ )
616
+ async with response as r:
617
+ if r.status_code != 200:
618
+ raise HuggingfaceError(
619
+ status_code=r.status_code,
620
+ message="An error occurred while streaming",
621
+ )
622
+ streamwrapper = CustomStreamWrapper(
623
+ completion_stream=r.aiter_lines(),
624
+ model=model,
625
+ custom_llm_provider="huggingface",
626
+ logging_obj=logging_obj,
627
+ )
628
+ async for transformed_chunk in streamwrapper:
629
+ yield transformed_chunk
630
+
631
+ def embedding(
632
+ self,
633
+ model: str,
634
+ input: list,
635
+ api_key: Optional[str] = None,
636
+ api_base: Optional[str] = None,
637
+ logging_obj=None,
638
+ model_response=None,
639
+ encoding=None,
640
+ ):
641
+ super().embedding()
642
+ headers = self.validate_environment(api_key, headers=None)
643
+ # print_verbose(f"{model}, {task}")
644
+ embed_url = ""
645
+ if "https" in model:
646
+ embed_url = model
647
+ elif api_base:
648
+ embed_url = api_base
649
+ elif "HF_API_BASE" in os.environ:
650
+ embed_url = os.getenv("HF_API_BASE", "")
651
+ elif "HUGGINGFACE_API_BASE" in os.environ:
652
+ embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
653
+ else:
654
+ embed_url = f"https://api-inference.huggingface.co/models/{model}"
655
+
656
+ if "sentence-transformers" in model:
657
+ if len(input) == 0:
658
+ raise HuggingfaceError(
659
+ status_code=400,
660
+ message="sentence transformers requires 2+ sentences",
661
+ )
662
+ data = {
663
+ "inputs": {
664
+ "source_sentence": input[0],
665
+ "sentences": [
666
+ "That is a happy dog",
667
+ "That is a very happy person",
668
+ "Today is a sunny day",
669
+ ],
670
+ }
671
+ }
672
+ else:
673
+ data = {"inputs": input} # type: ignore
674
+
675
+ ## LOGGING
676
+ logging_obj.pre_call(
677
+ input=input,
678
+ api_key=api_key,
679
+ additional_args={
680
+ "complete_input_dict": data,
681
+ "headers": headers,
682
+ "api_base": embed_url,
683
+ },
684
+ )
685
+ ## COMPLETION CALL
686
+ response = requests.post(embed_url, headers=headers, data=json.dumps(data))
687
+
688
+ ## LOGGING
689
+ logging_obj.post_call(
690
+ input=input,
691
+ api_key=api_key,
692
+ additional_args={"complete_input_dict": data},
693
+ original_response=response,
694
+ )
695
+
696
+ embeddings = response.json()
697
+
698
+ if "error" in embeddings:
699
+ raise HuggingfaceError(status_code=500, message=embeddings["error"])
700
+
701
+ output_data = []
702
+ if "similarities" in embeddings:
703
+ for idx, embedding in embeddings["similarities"]:
704
+ output_data.append(
705
+ {
706
+ "object": "embedding",
707
+ "index": idx,
708
+ "embedding": embedding, # flatten list returned from hf
709
+ }
710
+ )
711
+ else:
712
+ for idx, embedding in enumerate(embeddings):
713
+ if isinstance(embedding, float):
714
+ output_data.append(
715
+ {
716
+ "object": "embedding",
717
+ "index": idx,
718
+ "embedding": embedding, # flatten list returned from hf
719
+ }
720
+ )
721
+ elif isinstance(embedding, list) and isinstance(embedding[0], float):
722
+ output_data.append(
723
+ {
724
+ "object": "embedding",
725
+ "index": idx,
726
+ "embedding": embedding, # flatten list returned from hf
727
+ }
728
+ )
729
+ else:
730
+ output_data.append(
731
+ {
732
+ "object": "embedding",
733
+ "index": idx,
734
+ "embedding": embedding[0][
735
+ 0
736
+ ], # flatten list returned from hf
737
+ }
738
+ )
739
+ model_response["object"] = "list"
740
+ model_response["data"] = output_data
741
+ model_response["model"] = model
742
+ input_tokens = 0
743
+ for text in input:
744
+ input_tokens += len(encoding.encode(text))
745
+
746
+ model_response["usage"] = {
747
+ "prompt_tokens": input_tokens,
748
+ "total_tokens": input_tokens,
749
+ }
750
+ return model_response
litellm/llms/maritalk.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time, traceback
6
+ from typing import Callable, Optional, List
7
+ from litellm.utils import ModelResponse, Choices, Message, Usage
8
+ import litellm
9
+
10
+
11
+ class MaritalkError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ super().__init__(
16
+ self.message
17
+ ) # Call the base class constructor with the parameters it needs
18
+
19
+
20
+ class MaritTalkConfig:
21
+ """
22
+ The class `MaritTalkConfig` provides configuration for the MaritTalk's API interface. Here are the parameters:
23
+
24
+ - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default is 1.
25
+
26
+ - `model` (string): The model used for conversation. Default is 'maritalk'.
27
+
28
+ - `do_sample` (boolean): If set to True, the API will generate a response using sampling. Default is True.
29
+
30
+ - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.7.
31
+
32
+ - `top_p` (number): Selection threshold for token inclusion based on cumulative probability. Default is 0.95.
33
+
34
+ - `repetition_penalty` (number): Penalty for repetition in the generated conversation. Default is 1.
35
+
36
+ - `stopping_tokens` (list of string): List of tokens where the conversation can be stopped/stopped.
37
+ """
38
+
39
+ max_tokens: Optional[int] = None
40
+ model: Optional[str] = None
41
+ do_sample: Optional[bool] = None
42
+ temperature: Optional[float] = None
43
+ top_p: Optional[float] = None
44
+ repetition_penalty: Optional[float] = None
45
+ stopping_tokens: Optional[List[str]] = None
46
+
47
+ def __init__(
48
+ self,
49
+ max_tokens: Optional[int] = None,
50
+ model: Optional[str] = None,
51
+ do_sample: Optional[bool] = None,
52
+ temperature: Optional[float] = None,
53
+ top_p: Optional[float] = None,
54
+ repetition_penalty: Optional[float] = None,
55
+ stopping_tokens: Optional[List[str]] = None,
56
+ ) -> None:
57
+ locals_ = locals()
58
+ for key, value in locals_.items():
59
+ if key != "self" and value is not None:
60
+ setattr(self.__class__, key, value)
61
+
62
+ @classmethod
63
+ def get_config(cls):
64
+ return {
65
+ k: v
66
+ for k, v in cls.__dict__.items()
67
+ if not k.startswith("__")
68
+ and not isinstance(
69
+ v,
70
+ (
71
+ types.FunctionType,
72
+ types.BuiltinFunctionType,
73
+ classmethod,
74
+ staticmethod,
75
+ ),
76
+ )
77
+ and v is not None
78
+ }
79
+
80
+
81
+ def validate_environment(api_key):
82
+ headers = {
83
+ "accept": "application/json",
84
+ "content-type": "application/json",
85
+ }
86
+ if api_key:
87
+ headers["Authorization"] = f"Key {api_key}"
88
+ return headers
89
+
90
+
91
+ def completion(
92
+ model: str,
93
+ messages: list,
94
+ api_base: str,
95
+ model_response: ModelResponse,
96
+ print_verbose: Callable,
97
+ encoding,
98
+ api_key,
99
+ logging_obj,
100
+ optional_params=None,
101
+ litellm_params=None,
102
+ logger_fn=None,
103
+ ):
104
+ headers = validate_environment(api_key)
105
+ completion_url = api_base
106
+ model = model
107
+
108
+ ## Load Config
109
+ config = litellm.MaritTalkConfig.get_config()
110
+ for k, v in config.items():
111
+ if (
112
+ k not in optional_params
113
+ ): # completion(top_k=3) > maritalk_config(top_k=3) <- allows for dynamic variables to be passed in
114
+ optional_params[k] = v
115
+
116
+ data = {
117
+ "messages": messages,
118
+ **optional_params,
119
+ }
120
+
121
+ ## LOGGING
122
+ logging_obj.pre_call(
123
+ input=messages,
124
+ api_key=api_key,
125
+ additional_args={"complete_input_dict": data},
126
+ )
127
+ ## COMPLETION CALL
128
+ response = requests.post(
129
+ completion_url,
130
+ headers=headers,
131
+ data=json.dumps(data),
132
+ stream=optional_params["stream"] if "stream" in optional_params else False,
133
+ )
134
+ if "stream" in optional_params and optional_params["stream"] == True:
135
+ return response.iter_lines()
136
+ else:
137
+ ## LOGGING
138
+ logging_obj.post_call(
139
+ input=messages,
140
+ api_key=api_key,
141
+ original_response=response.text,
142
+ additional_args={"complete_input_dict": data},
143
+ )
144
+ print_verbose(f"raw model_response: {response.text}")
145
+ ## RESPONSE OBJECT
146
+ completion_response = response.json()
147
+ if "error" in completion_response:
148
+ raise MaritalkError(
149
+ message=completion_response["error"],
150
+ status_code=response.status_code,
151
+ )
152
+ else:
153
+ try:
154
+ if len(completion_response["answer"]) > 0:
155
+ model_response["choices"][0]["message"][
156
+ "content"
157
+ ] = completion_response["answer"]
158
+ except Exception as e:
159
+ raise MaritalkError(
160
+ message=response.text, status_code=response.status_code
161
+ )
162
+
163
+ ## CALCULATING USAGE
164
+ prompt = "".join(m["content"] for m in messages)
165
+ prompt_tokens = len(encoding.encode(prompt))
166
+ completion_tokens = len(
167
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
168
+ )
169
+
170
+ model_response["created"] = int(time.time())
171
+ model_response["model"] = model
172
+ usage = Usage(
173
+ prompt_tokens=prompt_tokens,
174
+ completion_tokens=completion_tokens,
175
+ total_tokens=prompt_tokens + completion_tokens,
176
+ )
177
+ model_response.usage = usage
178
+ return model_response
179
+
180
+
181
+ def embedding(
182
+ model: str,
183
+ input: list,
184
+ api_key: Optional[str] = None,
185
+ logging_obj=None,
186
+ model_response=None,
187
+ encoding=None,
188
+ ):
189
+ pass
litellm/llms/nlp_cloud.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ from litellm.utils import ModelResponse, Usage
9
+
10
+
11
+ class NLPCloudError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ super().__init__(
16
+ self.message
17
+ ) # Call the base class constructor with the parameters it needs
18
+
19
+
20
+ class NLPCloudConfig:
21
+ """
22
+ Reference: https://docs.nlpcloud.com/#generation
23
+
24
+ - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain.
25
+
26
+ - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text.
27
+
28
+ - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence.
29
+
30
+ - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result.
31
+
32
+ - `remove_input` (boolean): Optional. Whether to remove the input text from the result.
33
+
34
+ - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated.
35
+
36
+ - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities.
37
+
38
+ - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
39
+
40
+ - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering.
41
+
42
+ - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times.
43
+
44
+ - `num_beams` (int): Optional. Number of beams for beam search.
45
+
46
+ - `num_return_sequences` (int): Optional. The number of independently computed returned sequences.
47
+ """
48
+
49
+ max_length: Optional[int] = None
50
+ length_no_input: Optional[bool] = None
51
+ end_sequence: Optional[str] = None
52
+ remove_end_sequence: Optional[bool] = None
53
+ remove_input: Optional[bool] = None
54
+ bad_words: Optional[list] = None
55
+ temperature: Optional[float] = None
56
+ top_p: Optional[float] = None
57
+ top_k: Optional[int] = None
58
+ repetition_penalty: Optional[float] = None
59
+ num_beams: Optional[int] = None
60
+ num_return_sequences: Optional[int] = None
61
+
62
+ def __init__(
63
+ self,
64
+ max_length: Optional[int] = None,
65
+ length_no_input: Optional[bool] = None,
66
+ end_sequence: Optional[str] = None,
67
+ remove_end_sequence: Optional[bool] = None,
68
+ remove_input: Optional[bool] = None,
69
+ bad_words: Optional[list] = None,
70
+ temperature: Optional[float] = None,
71
+ top_p: Optional[float] = None,
72
+ top_k: Optional[int] = None,
73
+ repetition_penalty: Optional[float] = None,
74
+ num_beams: Optional[int] = None,
75
+ num_return_sequences: Optional[int] = None,
76
+ ) -> None:
77
+ locals_ = locals()
78
+ for key, value in locals_.items():
79
+ if key != "self" and value is not None:
80
+ setattr(self.__class__, key, value)
81
+
82
+ @classmethod
83
+ def get_config(cls):
84
+ return {
85
+ k: v
86
+ for k, v in cls.__dict__.items()
87
+ if not k.startswith("__")
88
+ and not isinstance(
89
+ v,
90
+ (
91
+ types.FunctionType,
92
+ types.BuiltinFunctionType,
93
+ classmethod,
94
+ staticmethod,
95
+ ),
96
+ )
97
+ and v is not None
98
+ }
99
+
100
+
101
+ def validate_environment(api_key):
102
+ headers = {
103
+ "accept": "application/json",
104
+ "content-type": "application/json",
105
+ }
106
+ if api_key:
107
+ headers["Authorization"] = f"Token {api_key}"
108
+ return headers
109
+
110
+
111
+ def completion(
112
+ model: str,
113
+ messages: list,
114
+ api_base: str,
115
+ model_response: ModelResponse,
116
+ print_verbose: Callable,
117
+ encoding,
118
+ api_key,
119
+ logging_obj,
120
+ optional_params=None,
121
+ litellm_params=None,
122
+ logger_fn=None,
123
+ default_max_tokens_to_sample=None,
124
+ ):
125
+ headers = validate_environment(api_key)
126
+
127
+ ## Load Config
128
+ config = litellm.NLPCloudConfig.get_config()
129
+ for k, v in config.items():
130
+ if (
131
+ k not in optional_params
132
+ ): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
133
+ optional_params[k] = v
134
+
135
+ completion_url_fragment_1 = api_base
136
+ completion_url_fragment_2 = "/generation"
137
+ model = model
138
+ text = " ".join(message["content"] for message in messages)
139
+
140
+ data = {
141
+ "text": text,
142
+ **optional_params,
143
+ }
144
+
145
+ completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
146
+
147
+ ## LOGGING
148
+ logging_obj.pre_call(
149
+ input=text,
150
+ api_key=api_key,
151
+ additional_args={
152
+ "complete_input_dict": data,
153
+ "headers": headers,
154
+ "api_base": completion_url,
155
+ },
156
+ )
157
+ ## COMPLETION CALL
158
+ response = requests.post(
159
+ completion_url,
160
+ headers=headers,
161
+ data=json.dumps(data),
162
+ stream=optional_params["stream"] if "stream" in optional_params else False,
163
+ )
164
+ if "stream" in optional_params and optional_params["stream"] == True:
165
+ return clean_and_iterate_chunks(response)
166
+ else:
167
+ ## LOGGING
168
+ logging_obj.post_call(
169
+ input=text,
170
+ api_key=api_key,
171
+ original_response=response.text,
172
+ additional_args={"complete_input_dict": data},
173
+ )
174
+ print_verbose(f"raw model_response: {response.text}")
175
+ ## RESPONSE OBJECT
176
+ try:
177
+ completion_response = response.json()
178
+ except:
179
+ raise NLPCloudError(message=response.text, status_code=response.status_code)
180
+ if "error" in completion_response:
181
+ raise NLPCloudError(
182
+ message=completion_response["error"],
183
+ status_code=response.status_code,
184
+ )
185
+ else:
186
+ try:
187
+ if len(completion_response["generated_text"]) > 0:
188
+ model_response["choices"][0]["message"][
189
+ "content"
190
+ ] = completion_response["generated_text"]
191
+ except:
192
+ raise NLPCloudError(
193
+ message=json.dumps(completion_response),
194
+ status_code=response.status_code,
195
+ )
196
+
197
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
198
+ prompt_tokens = completion_response["nb_input_tokens"]
199
+ completion_tokens = completion_response["nb_generated_tokens"]
200
+
201
+ model_response["created"] = int(time.time())
202
+ model_response["model"] = model
203
+ usage = Usage(
204
+ prompt_tokens=prompt_tokens,
205
+ completion_tokens=completion_tokens,
206
+ total_tokens=prompt_tokens + completion_tokens,
207
+ )
208
+ model_response.usage = usage
209
+ return model_response
210
+
211
+
212
+ # def clean_and_iterate_chunks(response):
213
+ # def process_chunk(chunk):
214
+ # print(f"received chunk: {chunk}")
215
+ # cleaned_chunk = chunk.decode("utf-8")
216
+ # # Perform further processing based on your needs
217
+ # return cleaned_chunk
218
+
219
+
220
+ # for line in response.iter_lines():
221
+ # if line:
222
+ # yield process_chunk(line)
223
+ def clean_and_iterate_chunks(response):
224
+ buffer = b""
225
+
226
+ for chunk in response.iter_content(chunk_size=1024):
227
+ if not chunk:
228
+ break
229
+
230
+ buffer += chunk
231
+ while b"\x00" in buffer:
232
+ buffer = buffer.replace(b"\x00", b"")
233
+ yield buffer.decode("utf-8")
234
+ buffer = b""
235
+
236
+ # No more data expected, yield any remaining data in the buffer
237
+ if buffer:
238
+ yield buffer.decode("utf-8")
239
+
240
+
241
+ def embedding():
242
+ # logic for parsing in - calling - parsing out model embedding calls
243
+ pass
litellm/llms/ollama.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests, types, time
2
+ import json, uuid
3
+ import traceback
4
+ from typing import Optional
5
+ import litellm
6
+ import httpx, aiohttp, asyncio
7
+ from .prompt_templates.factory import prompt_factory, custom_prompt
8
+
9
+
10
+ class OllamaError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ self.request = httpx.Request(method="POST", url="http://localhost:11434")
15
+ self.response = httpx.Response(status_code=status_code, request=self.request)
16
+ super().__init__(
17
+ self.message
18
+ ) # Call the base class constructor with the parameters it needs
19
+
20
+
21
+ class OllamaConfig:
22
+ """
23
+ Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
24
+
25
+ The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
26
+
27
+ - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
28
+
29
+ - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
30
+
31
+ - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
32
+
33
+ - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
34
+
35
+ - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
36
+
37
+ - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
38
+
39
+ - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
40
+
41
+ - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
42
+
43
+ - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
44
+
45
+ - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
46
+
47
+ - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
48
+
49
+ - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
50
+
51
+ - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
52
+
53
+ - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
54
+
55
+ - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
56
+
57
+ - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
58
+
59
+ - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
60
+ """
61
+
62
+ mirostat: Optional[int] = None
63
+ mirostat_eta: Optional[float] = None
64
+ mirostat_tau: Optional[float] = None
65
+ num_ctx: Optional[int] = None
66
+ num_gqa: Optional[int] = None
67
+ num_thread: Optional[int] = None
68
+ repeat_last_n: Optional[int] = None
69
+ repeat_penalty: Optional[float] = None
70
+ temperature: Optional[float] = None
71
+ stop: Optional[
72
+ list
73
+ ] = None # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
74
+ tfs_z: Optional[float] = None
75
+ num_predict: Optional[int] = None
76
+ top_k: Optional[int] = None
77
+ top_p: Optional[float] = None
78
+ system: Optional[str] = None
79
+ template: Optional[str] = None
80
+
81
+ def __init__(
82
+ self,
83
+ mirostat: Optional[int] = None,
84
+ mirostat_eta: Optional[float] = None,
85
+ mirostat_tau: Optional[float] = None,
86
+ num_ctx: Optional[int] = None,
87
+ num_gqa: Optional[int] = None,
88
+ num_thread: Optional[int] = None,
89
+ repeat_last_n: Optional[int] = None,
90
+ repeat_penalty: Optional[float] = None,
91
+ temperature: Optional[float] = None,
92
+ stop: Optional[list] = None,
93
+ tfs_z: Optional[float] = None,
94
+ num_predict: Optional[int] = None,
95
+ top_k: Optional[int] = None,
96
+ top_p: Optional[float] = None,
97
+ system: Optional[str] = None,
98
+ template: Optional[str] = None,
99
+ ) -> None:
100
+ locals_ = locals()
101
+ for key, value in locals_.items():
102
+ if key != "self" and value is not None:
103
+ setattr(self.__class__, key, value)
104
+
105
+ @classmethod
106
+ def get_config(cls):
107
+ return {
108
+ k: v
109
+ for k, v in cls.__dict__.items()
110
+ if not k.startswith("__")
111
+ and not isinstance(
112
+ v,
113
+ (
114
+ types.FunctionType,
115
+ types.BuiltinFunctionType,
116
+ classmethod,
117
+ staticmethod,
118
+ ),
119
+ )
120
+ and v is not None
121
+ }
122
+
123
+
124
+ # ollama implementation
125
+ def get_ollama_response(
126
+ api_base="http://localhost:11434",
127
+ model="llama2",
128
+ prompt="Why is the sky blue?",
129
+ optional_params=None,
130
+ logging_obj=None,
131
+ acompletion: bool = False,
132
+ model_response=None,
133
+ encoding=None,
134
+ ):
135
+ if api_base.endswith("/api/generate"):
136
+ url = api_base
137
+ else:
138
+ url = f"{api_base}/api/generate"
139
+
140
+ ## Load Config
141
+ config = litellm.OllamaConfig.get_config()
142
+ for k, v in config.items():
143
+ if (
144
+ k not in optional_params
145
+ ): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
146
+ optional_params[k] = v
147
+
148
+ optional_params["stream"] = optional_params.get("stream", False)
149
+ data = {"model": model, "prompt": prompt, **optional_params}
150
+ ## LOGGING
151
+ logging_obj.pre_call(
152
+ input=None,
153
+ api_key=None,
154
+ additional_args={
155
+ "api_base": url,
156
+ "complete_input_dict": data,
157
+ "headers": {},
158
+ "acompletion": acompletion,
159
+ },
160
+ )
161
+ if acompletion is True:
162
+ if optional_params.get("stream", False) == True:
163
+ response = ollama_async_streaming(
164
+ url=url,
165
+ data=data,
166
+ model_response=model_response,
167
+ encoding=encoding,
168
+ logging_obj=logging_obj,
169
+ )
170
+ else:
171
+ response = ollama_acompletion(
172
+ url=url,
173
+ data=data,
174
+ model_response=model_response,
175
+ encoding=encoding,
176
+ logging_obj=logging_obj,
177
+ )
178
+ return response
179
+ elif optional_params.get("stream", False) == True:
180
+ return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)
181
+
182
+ response = requests.post(url=f"{url}", json=data, timeout=litellm.request_timeout)
183
+ if response.status_code != 200:
184
+ raise OllamaError(status_code=response.status_code, message=response.text)
185
+
186
+ ## LOGGING
187
+ logging_obj.post_call(
188
+ input=prompt,
189
+ api_key="",
190
+ original_response=response.text,
191
+ additional_args={
192
+ "headers": None,
193
+ "api_base": api_base,
194
+ },
195
+ )
196
+
197
+ response_json = response.json()
198
+
199
+ ## RESPONSE OBJECT
200
+ model_response["choices"][0]["finish_reason"] = "stop"
201
+ if optional_params.get("format", "") == "json":
202
+ message = litellm.Message(
203
+ content=None,
204
+ tool_calls=[
205
+ {
206
+ "id": f"call_{str(uuid.uuid4())}",
207
+ "function": {"arguments": response_json["response"], "name": ""},
208
+ "type": "function",
209
+ }
210
+ ],
211
+ )
212
+ model_response["choices"][0]["message"] = message
213
+ else:
214
+ model_response["choices"][0]["message"]["content"] = response_json["response"]
215
+ model_response["created"] = int(time.time())
216
+ model_response["model"] = "ollama/" + model
217
+ prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore
218
+ completion_tokens = response_json["eval_count"]
219
+ model_response["usage"] = litellm.Usage(
220
+ prompt_tokens=prompt_tokens,
221
+ completion_tokens=completion_tokens,
222
+ total_tokens=prompt_tokens + completion_tokens,
223
+ )
224
+ return model_response
225
+
226
+
227
+ def ollama_completion_stream(url, data, logging_obj):
228
+ with httpx.stream(
229
+ url=url, json=data, method="POST", timeout=litellm.request_timeout
230
+ ) as response:
231
+ try:
232
+ if response.status_code != 200:
233
+ raise OllamaError(
234
+ status_code=response.status_code, message=response.text
235
+ )
236
+
237
+ streamwrapper = litellm.CustomStreamWrapper(
238
+ completion_stream=response.iter_lines(),
239
+ model=data["model"],
240
+ custom_llm_provider="ollama",
241
+ logging_obj=logging_obj,
242
+ )
243
+ for transformed_chunk in streamwrapper:
244
+ yield transformed_chunk
245
+ except Exception as e:
246
+ raise e
247
+
248
+
249
+ async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
250
+ try:
251
+ client = httpx.AsyncClient()
252
+ async with client.stream(
253
+ url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
254
+ ) as response:
255
+ if response.status_code != 200:
256
+ raise OllamaError(
257
+ status_code=response.status_code, message=response.text
258
+ )
259
+
260
+ streamwrapper = litellm.CustomStreamWrapper(
261
+ completion_stream=response.aiter_lines(),
262
+ model=data["model"],
263
+ custom_llm_provider="ollama",
264
+ logging_obj=logging_obj,
265
+ )
266
+ async for transformed_chunk in streamwrapper:
267
+ yield transformed_chunk
268
+ except Exception as e:
269
+ traceback.print_exc()
270
+
271
+
272
+ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
273
+ data["stream"] = False
274
+ try:
275
+ timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
276
+ async with aiohttp.ClientSession(timeout=timeout) as session:
277
+ resp = await session.post(url, json=data)
278
+
279
+ if resp.status != 200:
280
+ text = await resp.text()
281
+ raise OllamaError(status_code=resp.status, message=text)
282
+
283
+ ## LOGGING
284
+ logging_obj.post_call(
285
+ input=data["prompt"],
286
+ api_key="",
287
+ original_response=resp.text,
288
+ additional_args={
289
+ "headers": None,
290
+ "api_base": url,
291
+ },
292
+ )
293
+
294
+ response_json = await resp.json()
295
+ ## RESPONSE OBJECT
296
+ model_response["choices"][0]["finish_reason"] = "stop"
297
+ if data.get("format", "") == "json":
298
+ message = litellm.Message(
299
+ content=None,
300
+ tool_calls=[
301
+ {
302
+ "id": f"call_{str(uuid.uuid4())}",
303
+ "function": {
304
+ "arguments": response_json["response"],
305
+ "name": "",
306
+ },
307
+ "type": "function",
308
+ }
309
+ ],
310
+ )
311
+ model_response["choices"][0]["message"] = message
312
+ else:
313
+ model_response["choices"][0]["message"]["content"] = response_json[
314
+ "response"
315
+ ]
316
+ model_response["created"] = int(time.time())
317
+ model_response["model"] = "ollama/" + data["model"]
318
+ prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"]))) # type: ignore
319
+ completion_tokens = response_json["eval_count"]
320
+ model_response["usage"] = litellm.Usage(
321
+ prompt_tokens=prompt_tokens,
322
+ completion_tokens=completion_tokens,
323
+ total_tokens=prompt_tokens + completion_tokens,
324
+ )
325
+ return model_response
326
+ except Exception as e:
327
+ traceback.print_exc()
328
+ raise e
329
+
330
+
331
+ async def ollama_aembeddings(
332
+ api_base="http://localhost:11434",
333
+ model="llama2",
334
+ prompt="Why is the sky blue?",
335
+ optional_params=None,
336
+ logging_obj=None,
337
+ model_response=None,
338
+ encoding=None,
339
+ ):
340
+ if api_base.endswith("/api/embeddings"):
341
+ url = api_base
342
+ else:
343
+ url = f"{api_base}/api/embeddings"
344
+
345
+ ## Load Config
346
+ config = litellm.OllamaConfig.get_config()
347
+ for k, v in config.items():
348
+ if (
349
+ k not in optional_params
350
+ ): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
351
+ optional_params[k] = v
352
+
353
+ data = {
354
+ "model": model,
355
+ "prompt": prompt,
356
+ }
357
+ ## LOGGING
358
+ logging_obj.pre_call(
359
+ input=None,
360
+ api_key=None,
361
+ additional_args={"api_base": url, "complete_input_dict": data, "headers": {}},
362
+ )
363
+ timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
364
+ async with aiohttp.ClientSession(timeout=timeout) as session:
365
+ response = await session.post(url, json=data)
366
+
367
+ if response.status != 200:
368
+ text = await response.text()
369
+ raise OllamaError(status_code=response.status, message=text)
370
+
371
+ ## LOGGING
372
+ logging_obj.post_call(
373
+ input=prompt,
374
+ api_key="",
375
+ original_response=response.text,
376
+ additional_args={
377
+ "headers": None,
378
+ "api_base": api_base,
379
+ },
380
+ )
381
+
382
+ response_json = await response.json()
383
+ embeddings = response_json["embedding"]
384
+ ## RESPONSE OBJECT
385
+ output_data = []
386
+ for idx, embedding in enumerate(embeddings):
387
+ output_data.append(
388
+ {"object": "embedding", "index": idx, "embedding": embedding}
389
+ )
390
+ model_response["object"] = "list"
391
+ model_response["data"] = output_data
392
+ model_response["model"] = model
393
+
394
+ input_tokens = len(encoding.encode(prompt))
395
+
396
+ model_response["usage"] = {
397
+ "prompt_tokens": input_tokens,
398
+ "total_tokens": input_tokens,
399
+ }
400
+ return model_response
litellm/llms/ollama_chat.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests, types, time
2
+ import json, uuid
3
+ import traceback
4
+ from typing import Optional
5
+ import litellm
6
+ import httpx, aiohttp, asyncio
7
+ from .prompt_templates.factory import prompt_factory, custom_prompt
8
+
9
+
10
+ class OllamaError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ self.request = httpx.Request(method="POST", url="http://localhost:11434")
15
+ self.response = httpx.Response(status_code=status_code, request=self.request)
16
+ super().__init__(
17
+ self.message
18
+ ) # Call the base class constructor with the parameters it needs
19
+
20
+
21
+ class OllamaConfig:
22
+ """
23
+ Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
24
+
25
+ The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
26
+
27
+ - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
28
+
29
+ - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
30
+
31
+ - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
32
+
33
+ - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
34
+
35
+ - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
36
+
37
+ - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
38
+
39
+ - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
40
+
41
+ - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
42
+
43
+ - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
44
+
45
+ - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
46
+
47
+ - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
48
+
49
+ - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
50
+
51
+ - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
52
+
53
+ - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
54
+
55
+ - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
56
+
57
+ - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
58
+
59
+ - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
60
+ """
61
+
62
+ mirostat: Optional[int] = None
63
+ mirostat_eta: Optional[float] = None
64
+ mirostat_tau: Optional[float] = None
65
+ num_ctx: Optional[int] = None
66
+ num_gqa: Optional[int] = None
67
+ num_thread: Optional[int] = None
68
+ repeat_last_n: Optional[int] = None
69
+ repeat_penalty: Optional[float] = None
70
+ temperature: Optional[float] = None
71
+ stop: Optional[
72
+ list
73
+ ] = None # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
74
+ tfs_z: Optional[float] = None
75
+ num_predict: Optional[int] = None
76
+ top_k: Optional[int] = None
77
+ top_p: Optional[float] = None
78
+ system: Optional[str] = None
79
+ template: Optional[str] = None
80
+
81
+ def __init__(
82
+ self,
83
+ mirostat: Optional[int] = None,
84
+ mirostat_eta: Optional[float] = None,
85
+ mirostat_tau: Optional[float] = None,
86
+ num_ctx: Optional[int] = None,
87
+ num_gqa: Optional[int] = None,
88
+ num_thread: Optional[int] = None,
89
+ repeat_last_n: Optional[int] = None,
90
+ repeat_penalty: Optional[float] = None,
91
+ temperature: Optional[float] = None,
92
+ stop: Optional[list] = None,
93
+ tfs_z: Optional[float] = None,
94
+ num_predict: Optional[int] = None,
95
+ top_k: Optional[int] = None,
96
+ top_p: Optional[float] = None,
97
+ system: Optional[str] = None,
98
+ template: Optional[str] = None,
99
+ ) -> None:
100
+ locals_ = locals()
101
+ for key, value in locals_.items():
102
+ if key != "self" and value is not None:
103
+ setattr(self.__class__, key, value)
104
+
105
+ @classmethod
106
+ def get_config(cls):
107
+ return {
108
+ k: v
109
+ for k, v in cls.__dict__.items()
110
+ if not k.startswith("__")
111
+ and not isinstance(
112
+ v,
113
+ (
114
+ types.FunctionType,
115
+ types.BuiltinFunctionType,
116
+ classmethod,
117
+ staticmethod,
118
+ ),
119
+ )
120
+ and v is not None
121
+ }
122
+
123
+
124
+ # ollama implementation
125
+ def get_ollama_response(
126
+ api_base="http://localhost:11434",
127
+ model="llama2",
128
+ messages=None,
129
+ optional_params=None,
130
+ logging_obj=None,
131
+ acompletion: bool = False,
132
+ model_response=None,
133
+ encoding=None,
134
+ ):
135
+ if api_base.endswith("/api/chat"):
136
+ url = api_base
137
+ else:
138
+ url = f"{api_base}/api/chat"
139
+
140
+ ## Load Config
141
+ config = litellm.OllamaConfig.get_config()
142
+ for k, v in config.items():
143
+ if (
144
+ k not in optional_params
145
+ ): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
146
+ optional_params[k] = v
147
+
148
+ optional_params["stream"] = optional_params.get("stream", False)
149
+ data = {"model": model, "messages": messages, **optional_params}
150
+ ## LOGGING
151
+ logging_obj.pre_call(
152
+ input=None,
153
+ api_key=None,
154
+ additional_args={
155
+ "api_base": url,
156
+ "complete_input_dict": data,
157
+ "headers": {},
158
+ "acompletion": acompletion,
159
+ },
160
+ )
161
+ if acompletion is True:
162
+ if optional_params.get("stream", False) == True:
163
+ response = ollama_async_streaming(
164
+ url=url,
165
+ data=data,
166
+ model_response=model_response,
167
+ encoding=encoding,
168
+ logging_obj=logging_obj,
169
+ )
170
+ else:
171
+ response = ollama_acompletion(
172
+ url=url,
173
+ data=data,
174
+ model_response=model_response,
175
+ encoding=encoding,
176
+ logging_obj=logging_obj,
177
+ )
178
+ return response
179
+ elif optional_params.get("stream", False) == True:
180
+ return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)
181
+
182
+ response = requests.post(
183
+ url=f"{url}",
184
+ json=data,
185
+ )
186
+ if response.status_code != 200:
187
+ raise OllamaError(status_code=response.status_code, message=response.text)
188
+
189
+ ## LOGGING
190
+ logging_obj.post_call(
191
+ input=messages,
192
+ api_key="",
193
+ original_response=response.text,
194
+ additional_args={
195
+ "headers": None,
196
+ "api_base": api_base,
197
+ },
198
+ )
199
+
200
+ response_json = response.json()
201
+
202
+ ## RESPONSE OBJECT
203
+ model_response["choices"][0]["finish_reason"] = "stop"
204
+ if data.get("format", "") == "json":
205
+ message = litellm.Message(
206
+ content=None,
207
+ tool_calls=[
208
+ {
209
+ "id": f"call_{str(uuid.uuid4())}",
210
+ "function": {
211
+ "arguments": response_json["message"]["content"],
212
+ "name": "",
213
+ },
214
+ "type": "function",
215
+ }
216
+ ],
217
+ )
218
+ model_response["choices"][0]["message"] = message
219
+ else:
220
+ model_response["choices"][0]["message"] = response_json["message"]
221
+ model_response["created"] = int(time.time())
222
+ model_response["model"] = "ollama/" + model
223
+ prompt_tokens = response_json["prompt_eval_count"] # type: ignore
224
+ completion_tokens = response_json["eval_count"]
225
+ model_response["usage"] = litellm.Usage(
226
+ prompt_tokens=prompt_tokens,
227
+ completion_tokens=completion_tokens,
228
+ total_tokens=prompt_tokens + completion_tokens,
229
+ )
230
+ return model_response
231
+
232
+
233
+ def ollama_completion_stream(url, data, logging_obj):
234
+ with httpx.stream(
235
+ url=url, json=data, method="POST", timeout=litellm.request_timeout
236
+ ) as response:
237
+ try:
238
+ if response.status_code != 200:
239
+ raise OllamaError(
240
+ status_code=response.status_code, message=response.iter_lines()
241
+ )
242
+
243
+ streamwrapper = litellm.CustomStreamWrapper(
244
+ completion_stream=response.iter_lines(),
245
+ model=data["model"],
246
+ custom_llm_provider="ollama_chat",
247
+ logging_obj=logging_obj,
248
+ )
249
+ for transformed_chunk in streamwrapper:
250
+ yield transformed_chunk
251
+ except Exception as e:
252
+ raise e
253
+
254
+
255
+ async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
256
+ try:
257
+ client = httpx.AsyncClient()
258
+ async with client.stream(
259
+ url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
260
+ ) as response:
261
+ if response.status_code != 200:
262
+ raise OllamaError(
263
+ status_code=response.status_code, message=response.text
264
+ )
265
+
266
+ streamwrapper = litellm.CustomStreamWrapper(
267
+ completion_stream=response.aiter_lines(),
268
+ model=data["model"],
269
+ custom_llm_provider="ollama_chat",
270
+ logging_obj=logging_obj,
271
+ )
272
+ async for transformed_chunk in streamwrapper:
273
+ yield transformed_chunk
274
+ except Exception as e:
275
+ traceback.print_exc()
276
+
277
+
278
+ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
279
+ data["stream"] = False
280
+ try:
281
+ timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
282
+ async with aiohttp.ClientSession(timeout=timeout) as session:
283
+ resp = await session.post(url, json=data)
284
+
285
+ if resp.status != 200:
286
+ text = await resp.text()
287
+ raise OllamaError(status_code=resp.status, message=text)
288
+
289
+ response_json = await resp.json()
290
+
291
+ ## LOGGING
292
+ logging_obj.post_call(
293
+ input=data,
294
+ api_key="",
295
+ original_response=response_json,
296
+ additional_args={
297
+ "headers": None,
298
+ "api_base": url,
299
+ },
300
+ )
301
+
302
+ ## RESPONSE OBJECT
303
+ model_response["choices"][0]["finish_reason"] = "stop"
304
+ if data.get("format", "") == "json":
305
+ message = litellm.Message(
306
+ content=None,
307
+ tool_calls=[
308
+ {
309
+ "id": f"call_{str(uuid.uuid4())}",
310
+ "function": {
311
+ "arguments": response_json["message"]["content"],
312
+ "name": "",
313
+ },
314
+ "type": "function",
315
+ }
316
+ ],
317
+ )
318
+ model_response["choices"][0]["message"] = message
319
+ else:
320
+ model_response["choices"][0]["message"] = response_json["message"]
321
+ model_response["created"] = int(time.time())
322
+ model_response["model"] = "ollama/" + data["model"]
323
+ prompt_tokens = response_json["prompt_eval_count"] # type: ignore
324
+ completion_tokens = response_json["eval_count"]
325
+ model_response["usage"] = litellm.Usage(
326
+ prompt_tokens=prompt_tokens,
327
+ completion_tokens=completion_tokens,
328
+ total_tokens=prompt_tokens + completion_tokens,
329
+ )
330
+ return model_response
331
+ except Exception as e:
332
+ traceback.print_exc()
333
+ raise e