|
import random |
|
import cohere |
|
import os |
|
import openai |
|
import time |
|
import backoff |
|
|
|
|
|
PRICE_PER_1K_TOKENS = { |
|
"gpt-4": {"prompt": 0.03, "completion": 0.06}, |
|
"gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03}, |
|
"gpt-3.5-turbo": {"prompt": 0.001, "completion": 0.002}, |
|
"ada": {"embed": 0.0004}, |
|
"text-embedding-ada-002": {"embed": 0.0001}, |
|
} |
|
|
|
|
|
EMBEDDING_DIMENSIONS = { |
|
"ada": 1536, |
|
"text-embedding-ada-002": 1536, |
|
} |
|
|
|
|
|
def estimate_cost_of_text_generation_api_call( |
|
model: str, response: dict, verbose: bool |
|
) -> float: |
|
completion_tokens = response.usage.completion_tokens |
|
prompt_tokens = response.usage.prompt_tokens |
|
total_tokens = response.usage.total_tokens |
|
|
|
prompt_cost = prompt_tokens / 1000 * PRICE_PER_1K_TOKENS[model]["prompt"] |
|
completion_cost = ( |
|
completion_tokens / 1000 * PRICE_PER_1K_TOKENS[model]["completion"] |
|
) |
|
cost = prompt_cost + completion_cost |
|
|
|
if verbose: |
|
summary = f"""\ |
|
Used {prompt_tokens} prompt tokens, {completion_tokens} completion tokens, {total_tokens} total tokens |
|
Esimated cost: {cost:.4f} USD |
|
""" |
|
print(summary) |
|
return cost |
|
|
|
|
|
@backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APIConnectionError)) |
|
def call_openai_with_exponetial_backoff(**kwargs): |
|
rand_sleep_in_secs = 5 * random.random() |
|
time.sleep(rand_sleep_in_secs) |
|
return openai.chat.completions.create(**kwargs) |
|
|
|
|
|
def init_openai_with_api_key(): |
|
openai.api_key = os.environ.get("OPENAI_API_KEY") |
|
|
|
|
|
def init_cohere_with_api_key(): |
|
COHERE_API_KEY = os.environ.get("COHERE_API_KEY") |
|
co = cohere.Client(COHERE_API_KEY) |
|
return co |
|
|