Filtir / llm_api_utils.py
vladbogo's picture
Upload folder using huggingface_hub
7a8b33f verified
import random
import cohere
import os
import openai
import time
import backoff
PRICE_PER_1K_TOKENS = {
"gpt-4": {"prompt": 0.03, "completion": 0.06},
"gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
"gpt-3.5-turbo": {"prompt": 0.001, "completion": 0.002},
"ada": {"embed": 0.0004},
"text-embedding-ada-002": {"embed": 0.0001},
}
EMBEDDING_DIMENSIONS = {
"ada": 1536,
"text-embedding-ada-002": 1536,
}
def estimate_cost_of_text_generation_api_call(
model: str, response: dict, verbose: bool
) -> float:
completion_tokens = response.usage.completion_tokens
prompt_tokens = response.usage.prompt_tokens
total_tokens = response.usage.total_tokens
prompt_cost = prompt_tokens / 1000 * PRICE_PER_1K_TOKENS[model]["prompt"]
completion_cost = (
completion_tokens / 1000 * PRICE_PER_1K_TOKENS[model]["completion"]
)
cost = prompt_cost + completion_cost
if verbose:
summary = f"""\
Used {prompt_tokens} prompt tokens, {completion_tokens} completion tokens, {total_tokens} total tokens
Esimated cost: {cost:.4f} USD
"""
print(summary)
return cost
@backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APIConnectionError))
def call_openai_with_exponetial_backoff(**kwargs):
rand_sleep_in_secs = 5 * random.random()
time.sleep(rand_sleep_in_secs)
return openai.chat.completions.create(**kwargs)
def init_openai_with_api_key():
openai.api_key = os.environ.get("OPENAI_API_KEY")
def init_cohere_with_api_key():
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")
co = cohere.Client(COHERE_API_KEY)
return co