✨ Feature: Add feature: Support Cloudflare API model
Browse files- README.md +38 -31
- README_CN.md +10 -3
- main.py +6 -2
- request.py +53 -0
- response.py +30 -2
- utils.py +2 -0
README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
<a href="https://t.me/uni_api">
|
5 |
<img src="https://img.shields.io/badge/Join Telegram Group-blue?&logo=telegram">
|
6 |
</a>
|
7 |
-
<a href="https://hub.docker.com/repository/docker/yym68686/uni-api">
|
8 |
<img src="https://img.shields.io/docker/pulls/yym68686/uni-api?color=blue" alt="docker pull">
|
9 |
</a>
|
10 |
</p>
|
@@ -13,23 +13,23 @@
|
|
13 |
|
14 |
## Introduction
|
15 |
|
16 |
-
If used personally, one/new-api is
|
17 |
|
18 |
## Features
|
19 |
|
20 |
-
- No front-end,
|
21 |
-
- Unified management of multiple backend services, supporting OpenAI, Deepseek, DeepBricks, OpenRouter, and other
|
22 |
-
- Supports Anthropic, Gemini, Vertex API simultaneously. Vertex supports both Claude and Gemini API.
|
23 |
-
- Support
|
24 |
- Supports OpenAI, Anthropic, Gemini, Vertex native image recognition API.
|
25 |
- Supports four types of load balancing.
|
26 |
-
1. Supports channel-level weighted load balancing, which can allocate requests based on different channel weights.
|
27 |
-
2. Supports Vertex
|
28 |
3. Except for Vertex region-level load balancing, all APIs support channel-level sequential load balancing, enhancing the immersive translation experience. Automatically enabled without additional configuration.
|
29 |
-
4. Support automatic API key-level
|
30 |
-
- Supports automatic retry
|
31 |
-
- Supports fine-grained
|
32 |
-
- Supports rate limiting,
|
33 |
|
34 |
## Configuration
|
35 |
|
@@ -37,21 +37,21 @@ Using the api.yaml configuration file, you can configure multiple models, and ea
|
|
37 |
|
38 |
```yaml
|
39 |
providers:
|
40 |
-
- provider: provider_name # Service provider name, such as openai, anthropic, gemini, openrouter, deepbricks,
|
41 |
base_url: https://api.your.com/v1/chat/completions # Backend service API address, required
|
42 |
api: sk-YgS6GTi0b4bEabc4C # Provider's API Key, required
|
43 |
-
model: # At least one model
|
44 |
- gpt-4o # Usable model name, required
|
45 |
-
- claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, can use a
|
46 |
- dall-e-3
|
47 |
|
48 |
- provider: anthropic
|
49 |
base_url: https://api.anthropic.com/v1/messages
|
50 |
-
api: # Supports multiple API Keys, multiple keys automatically enable
|
51 |
- sk-ant-api03-bNnAOJyA-xQw_twAA
|
52 |
- sk-ant-api02-bNnxxxx
|
53 |
model:
|
54 |
-
- claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, can use a
|
55 |
tools: true # Whether to support tools, such as generating code, generating documents, etc., default is true, optional
|
56 |
|
57 |
- provider: gemini
|
@@ -59,14 +59,14 @@ providers:
|
|
59 |
api: AIzaSyAN2k6IRdgw
|
60 |
model:
|
61 |
- gemini-1.5-pro
|
62 |
-
- gemini-1.5-flash-exp-0827: gemini-1.5-flash # After renaming, the original model name gemini-1.5-flash-exp-0827 cannot be used
|
63 |
- gemini-1.5-flash-exp-0827 # Add this line, both gemini-1.5-flash-exp-0827 and gemini-1.5-flash can be requested
|
64 |
tools: true
|
65 |
|
66 |
- provider: vertex
|
67 |
-
project_id: gen-lang-client-xxxxxxxxxxxxxx # Description: Your Google Cloud project ID. Format: String, usually
|
68 |
-
private_key: "-----BEGIN PRIVATE KEY-----\nxxxxx\n-----END PRIVATE" # Description:
|
69 |
-
client_email: [email protected] # Description:
|
70 |
model:
|
71 |
- gemini-1.5-pro
|
72 |
- gemini-1.5-flash
|
@@ -75,7 +75,14 @@ providers:
|
|
75 |
- claude-3-sonnet@20240229: claude-3-sonnet
|
76 |
- claude-3-haiku@20240307: claude-3-haiku
|
77 |
tools: true
|
78 |
-
notes: https://xxxxx.com/ #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
- provider: other-provider
|
81 |
base_url: https://api.xxx.com/v1/messages
|
@@ -95,28 +102,28 @@ api_keys:
|
|
95 |
|
96 |
- api: sk-pkhf60Yf0JGyJygRmXqFQyTgWUd9GZnmi3KlvowmRWpWqrhy
|
97 |
model:
|
98 |
-
- anthropic/claude-3-5-sonnet # Usable model name, can only use the claude-3-5-sonnet model provided by the provider named anthropic. Models
|
99 |
preferences:
|
100 |
-
USE_ROUND_ROBIN: true # Whether to use
|
101 |
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true to automatically retry, false to not automatically retry, default is true
|
102 |
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
|
103 |
|
104 |
# Channel-level weighted load balancing configuration example
|
105 |
- api: sk-KjjI60Yf0JFWtxxxxxxxxxxxxxxwmRWpWpQRo
|
106 |
model:
|
107 |
-
- gcp1/*: 5 # The number after the colon is the weight,
|
108 |
-
- gcp2/*: 3 # The larger the number, the
|
109 |
-
- gcp3/*: 2 # In this example, there are a total of 10 weights for all channels, and
|
110 |
|
111 |
preferences:
|
112 |
-
USE_ROUND_ROBIN: true # When USE_ROUND_ROBIN must be true and there is no weight after the channels
|
113 |
AUTO_RETRY: true
|
114 |
```
|
115 |
|
116 |
-
## Environment
|
117 |
|
118 |
- CONFIG_URL: The download address of the configuration file, it can be a local file or a remote file, optional
|
119 |
-
- TIMEOUT: Request timeout, default is 20 seconds
|
120 |
|
121 |
## Docker Local Deployment
|
122 |
|
@@ -143,7 +150,7 @@ services:
|
|
143 |
- ./api.yaml:/home/api.yaml
|
144 |
```
|
145 |
|
146 |
-
CONFIG_URL is a direct link that can automatically download remote configuration files. For
|
147 |
|
148 |
Run Docker Compose container in the background
|
149 |
|
|
|
4 |
<a href="https://t.me/uni_api">
|
5 |
<img src="https://img.shields.io/badge/Join Telegram Group-blue?&logo=telegram">
|
6 |
</a>
|
7 |
+
<a href="https://hub.docker.com/repository/docker/yym68686/uni-api">
|
8 |
<img src="https://img.shields.io/docker/pulls/yym68686/uni-api?color=blue" alt="docker pull">
|
9 |
</a>
|
10 |
</p>
|
|
|
13 |
|
14 |
## Introduction
|
15 |
|
16 |
+
If used personally, one/new-api is overly complex, with many commercial features that individuals do not need. If you do not want a complex front-end interface and want to support more models, you can try uni-api. This is a project for unified management of large model APIs, allowing you to call multiple backend services through a unified API interface, uniformly converting them to OpenAI format and supporting load balancing. The currently supported backend services include: OpenAI, Anthropic, Gemini, Vertex, Cloudflare, DeepBricks, OpenRouter, etc.
|
17 |
|
18 |
## Features
|
19 |
|
20 |
+
- No front-end, pure configuration file setup for API channels. You can run your own API site just by writing a single file, and the documentation includes a detailed configuration guide, beginner-friendly.
|
21 |
+
- Unified management of multiple backend services, supporting providers such as OpenAI, Deepseek, DeepBricks, OpenRouter, and other APIs in OpenAI format. Supports OpenAI Dalle-3 image generation.
|
22 |
+
- Supports Anthropic, Gemini, Vertex API, and Cloudflare simultaneously. Vertex supports both Claude and Gemini API.
|
23 |
+
- Support for OpenAI, Anthropic, Gemini, Vertex native tool use function calls.
|
24 |
- Supports OpenAI, Anthropic, Gemini, Vertex native image recognition API.
|
25 |
- Supports four types of load balancing.
|
26 |
+
1. Supports channel-level weighted load balancing, which can allocate requests based on different channel weights. Disabled by default, channel weights need to be configured.
|
27 |
+
2. Supports Vertex regional load balancing, supports Vertex high concurrency, and can increase Gemini and Claude concurrency up to (API quantity * regional quantity) times. Automatically enabled without additional configuration.
|
28 |
3. Except for Vertex region-level load balancing, all APIs support channel-level sequential load balancing, enhancing the immersive translation experience. Automatically enabled without additional configuration.
|
29 |
+
4. Support automatic API key-level round-robin load balancing for multiple API keys in a single channel.
|
30 |
+
- Supports automatic retry, when an API channel response fails, automatically retry the next API channel.
|
31 |
+
- Supports fine-grained permission control. Supports using wildcards to set specific models available for API key channels.
|
32 |
+
- Supports rate limiting, allowing you to set the maximum number of requests per minute. It can be set as an integer, such as 2/min (2 times per minute), 5/hour (5 times per hour), 10/day (10 times per day), 10/month (10 times per month), 10/year (10 times per year). The default is 60/min.
|
33 |
|
34 |
## Configuration
|
35 |
|
|
|
37 |
|
38 |
```yaml
|
39 |
providers:
|
40 |
+
- provider: provider_name # Service provider name, such as openai, anthropic, gemini, openrouter, deepbricks, any name, required
|
41 |
base_url: https://api.your.com/v1/chat/completions # Backend service API address, required
|
42 |
api: sk-YgS6GTi0b4bEabc4C # Provider's API Key, required
|
43 |
+
model: # At least one model is required
|
44 |
- gpt-4o # Usable model name, required
|
45 |
+
- claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, you can use a simpler name instead of the original complex name, optional
|
46 |
- dall-e-3
|
47 |
|
48 |
- provider: anthropic
|
49 |
base_url: https://api.anthropic.com/v1/messages
|
50 |
+
api: # Supports multiple API Keys, multiple keys automatically enable round-robin load balancing, at least one key, required
|
51 |
- sk-ant-api03-bNnAOJyA-xQw_twAA
|
52 |
- sk-ant-api02-bNnxxxx
|
53 |
model:
|
54 |
+
- claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, you can use a simpler name instead of the original complex name, optional
|
55 |
tools: true # Whether to support tools, such as generating code, generating documents, etc., default is true, optional
|
56 |
|
57 |
- provider: gemini
|
|
|
59 |
api: AIzaSyAN2k6IRdgw
|
60 |
model:
|
61 |
- gemini-1.5-pro
|
62 |
+
- gemini-1.5-flash-exp-0827: gemini-1.5-flash # After renaming, the original model name gemini-1.5-flash-exp-0827 cannot be used. If you want to use the original name, you can add the original name in the model, just add the line below to use the original name.
|
63 |
- gemini-1.5-flash-exp-0827 # Add this line, both gemini-1.5-flash-exp-0827 and gemini-1.5-flash can be requested
|
64 |
tools: true
|
65 |
|
66 |
- provider: vertex
|
67 |
+
project_id: gen-lang-client-xxxxxxxxxxxxxx # Description: Your Google Cloud project ID. Format: String, usually consists of lowercase letters, numbers, and hyphens. How to get: You can find your project ID in the project selector of the Google Cloud Console.
|
68 |
+
private_key: "-----BEGIN PRIVATE KEY-----\nxxxxx\n-----END PRIVATE" # Description: Private key of Google Cloud Vertex AI service account. Format: A JSON formatted string containing the private key information of the service account. How to get: Create a service account in the Google Cloud Console, generate a JSON formatted key file, and set its content as the value of this environment variable.
|
69 |
+
client_email: [email protected] # Description: Email address of the Google Cloud Vertex AI service account. Format: Usually a string like "[email protected]". How to get: Generated when creating the service account, or you can view the service account details in the "IAM & Admin" section of the Google Cloud Console.
|
70 |
model:
|
71 |
- gemini-1.5-pro
|
72 |
- gemini-1.5-flash
|
|
|
75 |
- claude-3-sonnet@20240229: claude-3-sonnet
|
76 |
- claude-3-haiku@20240307: claude-3-haiku
|
77 |
tools: true
|
78 |
+
notes: https://xxxxx.com/ # You can put the provider's website, notes, official documentation, optional
|
79 |
+
|
80 |
+
- provider: cloudflare
|
81 |
+
api: f42b3xxxxxxxxxxq4aoGAh # Cloudflare API Key, required
|
82 |
+
cf_account_id: 8ec0xxxxxxxxxxxxe721 # Cloudflare Account ID, required
|
83 |
+
model:
|
84 |
+
- '@cf/meta/llama-3.1-8b-instruct': llama-3.1-8b # Rename model, @cf/meta/llama-3.1-8b-instruct is the provider's original model name, must be enclosed in quotes otherwise YAML syntax error, llama-3.1-8b is the renamed name, you can use a simpler name instead of the original complex name, optional
|
85 |
+
- '@cf/meta/llama-3.1-8b-instruct' # Must be enclosed in quotes otherwise YAML syntax error
|
86 |
|
87 |
- provider: other-provider
|
88 |
base_url: https://api.xxx.com/v1/messages
|
|
|
102 |
|
103 |
- api: sk-pkhf60Yf0JGyJygRmXqFQyTgWUd9GZnmi3KlvowmRWpWqrhy
|
104 |
model:
|
105 |
+
- anthropic/claude-3-5-sonnet # Usable model name, can only use the claude-3-5-sonnet model provided by the provider named anthropic. Models with the same name from other providers cannot be used.
|
106 |
preferences:
|
107 |
+
USE_ROUND_ROBIN: true # Whether to use round-robin load balancing, true to use, false to not use, default is true. When enabled, each request to the model is made in the order configured in the model. This is independent of the original channel order in providers. Therefore, you can set different request orders for each API key.
|
108 |
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true to automatically retry, false to not automatically retry, default is true
|
109 |
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
|
110 |
|
111 |
# Channel-level weighted load balancing configuration example
|
112 |
- api: sk-KjjI60Yf0JFWtxxxxxxxxxxxxxxwmRWpWpQRo
|
113 |
model:
|
114 |
+
- gcp1/*: 5 # The number after the colon is the weight, only positive integers are supported.
|
115 |
+
- gcp2/*: 3 # The larger the number, the higher the probability of the request.
|
116 |
+
- gcp3/*: 2 # In this example, there are a total of 10 weights for all channels, and out of 10 requests, 5 requests will request the gcp1/* model, 2 requests will request the gcp2/* model, and 3 requests will request the gcp3/* model.
|
117 |
|
118 |
preferences:
|
119 |
+
USE_ROUND_ROBIN: true # When USE_ROUND_ROBIN must be true and there is no weight after the above channels, requests will be made in the original channel order. If there are weights, requests will be made in the weighted order.
|
120 |
AUTO_RETRY: true
|
121 |
```
|
122 |
|
123 |
+
## Environment Variables
|
124 |
|
125 |
- CONFIG_URL: The download address of the configuration file, it can be a local file or a remote file, optional
|
126 |
+
- TIMEOUT: Request timeout, default is 20 seconds, the timeout can control the time needed to switch to the next channel when a channel does not respond. Optional
|
127 |
|
128 |
## Docker Local Deployment
|
129 |
|
|
|
150 |
- ./api.yaml:/home/api.yaml
|
151 |
```
|
152 |
|
153 |
+
CONFIG_URL is a direct link that can automatically download remote configuration files. For instance, if you find it inconvenient to modify configuration files on a certain platform, you can upload the configuration files to a hosting service that provides a direct link for uni-api to download. CONFIG_URL is this direct link.
|
154 |
|
155 |
Run Docker Compose container in the background
|
156 |
|
README_CN.md
CHANGED
@@ -13,13 +13,13 @@
|
|
13 |
|
14 |
## Introduction
|
15 |
|
16 |
-
如果个人使用的话,one/new-api 过于复杂,有很多个人不需要使用的商用功能,如果你不想要复杂的前端界面,有想要支持的模型多一点,可以试试 uni-api。这是一个统一管理大模型API的项目,可以通过一个统一的API接口调用多个后端服务,统一转换为 OpenAI 格式,支持负载均衡。目前支持的后端服务有:OpenAI、Anthropic、Gemini、Vertex、DeepBricks、OpenRouter 等。
|
17 |
|
18 |
## Features
|
19 |
|
20 |
- 无前端,纯配置文件配置 API 渠道。只要写一个文件就能运行起一个属于自己的 API 站,文档有详细的配置指南,小白友好。
|
21 |
-
- 统一管理多个后端服务,支持 OpenAI、Deepseek、DeepBricks、OpenRouter 等其他API 是 OpenAI 格式的提供商。支持 OpenAI Dalle-3 图像生成。
|
22 |
-
- 同时支持 Anthropic、Gemini、Vertex API。Vertex 同时支持 Claude 和 Gemini API。
|
23 |
- 支持 OpenAI、 Anthropic、Gemini、Vertex 原生 tool use 函数调用。
|
24 |
- 支持 OpenAI、Anthropic、Gemini、Vertex 原生识图 API。
|
25 |
- 支持四种负载均衡。
|
@@ -77,6 +77,13 @@ providers:
|
|
77 |
tools: true
|
78 |
notes: https://xxxxx.com/ # 可以放服务商的网址,备注信息,官方文档,选填
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
- provider: other-provider
|
81 |
base_url: https://api.xxx.com/v1/messages
|
82 |
api: sk-bNnAOJyA-xQw_twAA
|
|
|
13 |
|
14 |
## Introduction
|
15 |
|
16 |
+
如果个人使用的话,one/new-api 过于复杂,有很多个人不需要使用的商用功能,如果你不想要复杂的前端界面,有想要支持的模型多一点,可以试试 uni-api。这是一个统一管理大模型API的项目,可以通过一个统一的API接口调用多个后端服务,统一转换为 OpenAI 格式,支持负载均衡。目前支持的后端服务有:OpenAI、Anthropic、Gemini、Vertex、cloudflare、DeepBricks、OpenRouter 等。
|
17 |
|
18 |
## Features
|
19 |
|
20 |
- 无前端,纯配置文件配置 API 渠道。只要写一个文件就能运行起一个属于自己的 API 站,文档有详细的配置指南,小白友好。
|
21 |
+
- 统一管理多个后端服务,支持 OpenAI、Deepseek、DeepBricks、OpenRouter 等其他 API 是 OpenAI 格式的提供商。支持 OpenAI Dalle-3 图像生成。
|
22 |
+
- 同时支持 Anthropic、Gemini、Vertex API、cloudflare。Vertex 同时支持 Claude 和 Gemini API。
|
23 |
- 支持 OpenAI、 Anthropic、Gemini、Vertex 原生 tool use 函数调用。
|
24 |
- 支持 OpenAI、Anthropic、Gemini、Vertex 原生识图 API。
|
25 |
- 支持四种负载均衡。
|
|
|
77 |
tools: true
|
78 |
notes: https://xxxxx.com/ # 可以放服务商的网址,备注信息,官方文档,选填
|
79 |
|
80 |
+
- provider: cloudflare
|
81 |
+
api: f42b3xxxxxxxxxxq4aoGAh # Cloudflare API Key,必填
|
82 |
+
cf_account_id: 8ec0xxxxxxxxxxxxe721 # Cloudflare Account ID,必填
|
83 |
+
model:
|
84 |
+
- '@cf/meta/llama-3.1-8b-instruct': llama-3.1-8b # 重命名模型,@cf/meta/llama-3.1-8b-instruct 是服务商的原始的模型名称,必须使用引号包裹模型名,否则yaml语法错误,llama-3.1-8b 是重命名后的名字,可以使用简洁的名字代替原来复杂的名称,选填
|
85 |
+
- '@cf/meta/llama-3.1-8b-instruct' # 必须使用引号包裹模型名,否则yaml语法错误
|
86 |
+
|
87 |
- provider: other-provider
|
88 |
base_url: https://api.xxx.com/v1/messages
|
89 |
api: sk-bNnAOJyA-xQw_twAA
|
main.py
CHANGED
@@ -174,11 +174,14 @@ app.add_middleware(StatsMiddleware, exclude_paths=["/stats", "/generate-api-key"
|
|
174 |
async def process_request(request: Union[RequestModel, ImageGenerationRequest], provider: Dict, endpoint=None):
|
175 |
url = provider['base_url']
|
176 |
parsed_url = urlparse(url)
|
|
|
177 |
engine = None
|
178 |
if parsed_url.netloc == 'generativelanguage.googleapis.com':
|
179 |
engine = "gemini"
|
180 |
elif parsed_url.netloc == 'aiplatform.googleapis.com':
|
181 |
engine = "vertex"
|
|
|
|
|
182 |
elif parsed_url.netloc == 'api.anthropic.com' or parsed_url.path.endswith("v1/messages"):
|
183 |
engine = "claude"
|
184 |
elif parsed_url.netloc == 'openrouter.ai':
|
@@ -188,7 +191,8 @@ async def process_request(request: Union[RequestModel, ImageGenerationRequest],
|
|
188 |
|
189 |
if "claude" not in provider['model'][request.model] \
|
190 |
and "gpt" not in provider['model'][request.model] \
|
191 |
-
and "gemini" not in provider['model'][request.model]
|
|
|
192 |
engine = "openrouter"
|
193 |
|
194 |
if "claude" in provider['model'][request.model] and engine == "vertex":
|
@@ -311,7 +315,7 @@ class ModelRequestHandler:
|
|
311 |
|
312 |
# import json
|
313 |
# for provider in provider_list:
|
314 |
-
# print(json.dumps(provider, indent=4, ensure_ascii=False))
|
315 |
return provider_list
|
316 |
|
317 |
async def request_model(self, request: Union[RequestModel, ImageGenerationRequest], token: str, endpoint=None):
|
|
|
174 |
async def process_request(request: Union[RequestModel, ImageGenerationRequest], provider: Dict, endpoint=None):
|
175 |
url = provider['base_url']
|
176 |
parsed_url = urlparse(url)
|
177 |
+
# print("parsed_url", parsed_url)
|
178 |
engine = None
|
179 |
if parsed_url.netloc == 'generativelanguage.googleapis.com':
|
180 |
engine = "gemini"
|
181 |
elif parsed_url.netloc == 'aiplatform.googleapis.com':
|
182 |
engine = "vertex"
|
183 |
+
elif parsed_url.netloc == 'api.cloudflare.com':
|
184 |
+
engine = "cloudflare"
|
185 |
elif parsed_url.netloc == 'api.anthropic.com' or parsed_url.path.endswith("v1/messages"):
|
186 |
engine = "claude"
|
187 |
elif parsed_url.netloc == 'openrouter.ai':
|
|
|
191 |
|
192 |
if "claude" not in provider['model'][request.model] \
|
193 |
and "gpt" not in provider['model'][request.model] \
|
194 |
+
and "gemini" not in provider['model'][request.model] \
|
195 |
+
and parsed_url.netloc != 'api.cloudflare.com':
|
196 |
engine = "openrouter"
|
197 |
|
198 |
if "claude" in provider['model'][request.model] and engine == "vertex":
|
|
|
315 |
|
316 |
# import json
|
317 |
# for provider in provider_list:
|
318 |
+
# print(json.dumps(provider, indent=4, ensure_ascii=False, default=circular_list_encoder))
|
319 |
return provider_list
|
320 |
|
321 |
async def request_model(self, request: Union[RequestModel, ImageGenerationRequest], token: str, endpoint=None):
|
request.py
CHANGED
@@ -33,6 +33,8 @@ async def get_text_message(role, message, engine = None):
|
|
33 |
return {"type": "text", "text": message}
|
34 |
if "gemini" == engine or "vertex-gemini" == engine:
|
35 |
return {"text": message}
|
|
|
|
|
36 |
raise ValueError("Unknown engine")
|
37 |
|
38 |
async def get_gemini_payload(request, engine, provider):
|
@@ -640,6 +642,55 @@ async def get_openrouter_payload(request, engine, provider):
|
|
640 |
|
641 |
return url, headers, payload
|
642 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
async def gpt2claude_tools_json(json_dict):
|
644 |
import copy
|
645 |
json_dict = copy.deepcopy(json_dict)
|
@@ -830,6 +881,8 @@ async def get_payload(request: RequestModel, engine, provider):
|
|
830 |
return await get_gpt_payload(request, engine, provider)
|
831 |
elif engine == "openrouter":
|
832 |
return await get_openrouter_payload(request, engine, provider)
|
|
|
|
|
833 |
elif engine == "dalle":
|
834 |
return await get_dalle_payload(request, engine, provider)
|
835 |
else:
|
|
|
33 |
return {"type": "text", "text": message}
|
34 |
if "gemini" == engine or "vertex-gemini" == engine:
|
35 |
return {"text": message}
|
36 |
+
if engine == "cloudflare":
|
37 |
+
return message
|
38 |
raise ValueError("Unknown engine")
|
39 |
|
40 |
async def get_gemini_payload(request, engine, provider):
|
|
|
642 |
|
643 |
return url, headers, payload
|
644 |
|
645 |
+
async def get_cloudflare_payload(request, engine, provider):
|
646 |
+
headers = {
|
647 |
+
'Content-Type': 'application/json'
|
648 |
+
}
|
649 |
+
if provider.get("api"):
|
650 |
+
headers['Authorization'] = f"Bearer {provider['api'].next()}"
|
651 |
+
|
652 |
+
model = provider['model'][request.model]
|
653 |
+
url = "https://api.cloudflare.com/client/v4/accounts/{cf_account_id}/ai/run/{cf_model_id}".format(cf_account_id=provider['cf_account_id'], cf_model_id=model)
|
654 |
+
|
655 |
+
msg = request.messages[-1]
|
656 |
+
messages = []
|
657 |
+
content = None
|
658 |
+
if isinstance(msg.content, list):
|
659 |
+
for item in msg.content:
|
660 |
+
if item.type == "text":
|
661 |
+
content = await get_text_message(msg.role, item.text, engine)
|
662 |
+
else:
|
663 |
+
content = msg.content
|
664 |
+
name = msg.name
|
665 |
+
|
666 |
+
model = provider['model'][request.model]
|
667 |
+
payload = {
|
668 |
+
"prompt": content,
|
669 |
+
}
|
670 |
+
|
671 |
+
miss_fields = [
|
672 |
+
'model',
|
673 |
+
'messages',
|
674 |
+
'tools',
|
675 |
+
'tool_choice',
|
676 |
+
'temperature',
|
677 |
+
'top_p',
|
678 |
+
'max_tokens',
|
679 |
+
'presence_penalty',
|
680 |
+
'frequency_penalty',
|
681 |
+
'n',
|
682 |
+
'user',
|
683 |
+
'include_usage',
|
684 |
+
'logprobs',
|
685 |
+
'top_logprobs'
|
686 |
+
]
|
687 |
+
|
688 |
+
for field, value in request.model_dump(exclude_unset=True).items():
|
689 |
+
if field not in miss_fields and value is not None:
|
690 |
+
payload[field] = value
|
691 |
+
|
692 |
+
return url, headers, payload
|
693 |
+
|
694 |
async def gpt2claude_tools_json(json_dict):
|
695 |
import copy
|
696 |
json_dict = copy.deepcopy(json_dict)
|
|
|
881 |
return await get_gpt_payload(request, engine, provider)
|
882 |
elif engine == "openrouter":
|
883 |
return await get_openrouter_payload(request, engine, provider)
|
884 |
+
elif engine == "cloudflare":
|
885 |
+
return await get_cloudflare_payload(request, engine, provider)
|
886 |
elif engine == "dalle":
|
887 |
return await get_dalle_payload(request, engine, provider)
|
888 |
else:
|
response.py
CHANGED
@@ -112,7 +112,7 @@ async def fetch_vertex_claude_response_stream(client, url, headers, payload, mod
|
|
112 |
buffer += chunk
|
113 |
while "\n" in buffer:
|
114 |
line, buffer = buffer.split("\n", 1)
|
115 |
-
logger.info(f"{line}")
|
116 |
if line and '\"text\": \"' in line:
|
117 |
try:
|
118 |
json_data = json.loads( "{" + line + "}")
|
@@ -143,7 +143,7 @@ async def fetch_vertex_claude_response_stream(client, url, headers, payload, mod
|
|
143 |
yield sse_string
|
144 |
yield "data: [DONE]\n\r\n"
|
145 |
|
146 |
-
async def fetch_gpt_response_stream(client, url, headers, payload
|
147 |
async with client.stream('POST', url, headers=headers, json=payload) as response:
|
148 |
error_message = await check_response(response, "fetch_gpt_response_stream")
|
149 |
if error_message:
|
@@ -159,6 +159,31 @@ async def fetch_gpt_response_stream(client, url, headers, payload, max_redirects
|
|
159 |
if line and line != "data: " and line != "data:" and not line.startswith(": "):
|
160 |
yield line.strip() + "\n\r\n"
|
161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
async def fetch_claude_response_stream(client, url, headers, payload, model):
|
163 |
timestamp = int(datetime.timestamp(datetime.now()))
|
164 |
async with client.stream('POST', url, headers=headers, json=payload) as response:
|
@@ -242,6 +267,9 @@ async def fetch_response_stream(client, url, headers, payload, engine, model):
|
|
242 |
elif engine == "openrouter":
|
243 |
async for chunk in fetch_gpt_response_stream(client, url, headers, payload):
|
244 |
yield chunk
|
|
|
|
|
|
|
245 |
else:
|
246 |
raise ValueError("Unknown response")
|
247 |
except httpx.ConnectError as e:
|
|
|
112 |
buffer += chunk
|
113 |
while "\n" in buffer:
|
114 |
line, buffer = buffer.split("\n", 1)
|
115 |
+
# logger.info(f"{line}")
|
116 |
if line and '\"text\": \"' in line:
|
117 |
try:
|
118 |
json_data = json.loads( "{" + line + "}")
|
|
|
143 |
yield sse_string
|
144 |
yield "data: [DONE]\n\r\n"
|
145 |
|
146 |
+
async def fetch_gpt_response_stream(client, url, headers, payload):
|
147 |
async with client.stream('POST', url, headers=headers, json=payload) as response:
|
148 |
error_message = await check_response(response, "fetch_gpt_response_stream")
|
149 |
if error_message:
|
|
|
159 |
if line and line != "data: " and line != "data:" and not line.startswith(": "):
|
160 |
yield line.strip() + "\n\r\n"
|
161 |
|
162 |
+
async def fetch_cloudflare_response_stream(client, url, headers, payload, model):
|
163 |
+
timestamp = int(datetime.timestamp(datetime.now()))
|
164 |
+
async with client.stream('POST', url, headers=headers, json=payload) as response:
|
165 |
+
error_message = await check_response(response, "fetch_gpt_response_stream")
|
166 |
+
if error_message:
|
167 |
+
yield error_message
|
168 |
+
return
|
169 |
+
|
170 |
+
buffer = ""
|
171 |
+
async for chunk in response.aiter_text():
|
172 |
+
buffer += chunk
|
173 |
+
while "\n" in buffer:
|
174 |
+
line, buffer = buffer.split("\n", 1)
|
175 |
+
# logger.info("line: %s", repr(line))
|
176 |
+
if line.startswith("data:"):
|
177 |
+
line = line.lstrip("data: ")
|
178 |
+
if line == "[DONE]":
|
179 |
+
yield "data: [DONE]\n\r\n"
|
180 |
+
return
|
181 |
+
resp: dict = json.loads(line)
|
182 |
+
message = resp.get("response")
|
183 |
+
if message:
|
184 |
+
sse_string = await generate_sse_response(timestamp, model, content=message)
|
185 |
+
yield sse_string
|
186 |
+
|
187 |
async def fetch_claude_response_stream(client, url, headers, payload, model):
|
188 |
timestamp = int(datetime.timestamp(datetime.now()))
|
189 |
async with client.stream('POST', url, headers=headers, json=payload) as response:
|
|
|
267 |
elif engine == "openrouter":
|
268 |
async for chunk in fetch_gpt_response_stream(client, url, headers, payload):
|
269 |
yield chunk
|
270 |
+
elif engine == "cloudflare":
|
271 |
+
async for chunk in fetch_cloudflare_response_stream(client, url, headers, payload, model):
|
272 |
+
yield chunk
|
273 |
else:
|
274 |
raise ValueError("Unknown response")
|
275 |
except httpx.ConnectError as e:
|
utils.py
CHANGED
@@ -15,6 +15,8 @@ def update_config(config_data):
|
|
15 |
provider['model'] = model_dict
|
16 |
if provider.get('project_id'):
|
17 |
provider['base_url'] = 'https://aiplatform.googleapis.com/'
|
|
|
|
|
18 |
|
19 |
if provider.get('api'):
|
20 |
if isinstance(provider.get('api'), str):
|
|
|
15 |
provider['model'] = model_dict
|
16 |
if provider.get('project_id'):
|
17 |
provider['base_url'] = 'https://aiplatform.googleapis.com/'
|
18 |
+
if provider.get('cf_account_id'):
|
19 |
+
provider['base_url'] = 'https://api.cloudflare.com/'
|
20 |
|
21 |
if provider.get('api'):
|
22 |
if isinstance(provider.get('api'), str):
|