Update app.py
Browse files
app.py
CHANGED
@@ -3,20 +3,13 @@ import time
|
|
3 |
import logging
|
4 |
import requests
|
5 |
import json
|
6 |
-
import random
|
7 |
import uuid
|
8 |
import concurrent.futures
|
9 |
import threading
|
10 |
-
import base64
|
11 |
-
import io
|
12 |
-
from PIL import Image
|
13 |
from datetime import datetime, timedelta
|
14 |
from apscheduler.schedulers.background import BackgroundScheduler
|
15 |
from flask import Flask, request, jsonify, Response, stream_with_context
|
16 |
|
17 |
-
os.environ['TZ'] = 'Asia/Shanghai'
|
18 |
-
time.tzset()
|
19 |
-
|
20 |
logging.basicConfig(level=logging.INFO,
|
21 |
format='%(asctime)s - %(levelname)s - %(message)s')
|
22 |
|
@@ -39,9 +32,6 @@ token_counts = []
|
|
39 |
data_lock = threading.Lock()
|
40 |
|
41 |
def get_credit_summary(api_key):
|
42 |
-
"""
|
43 |
-
使用 API 密钥获取额度信息,并将美元余额转换为人民币。
|
44 |
-
"""
|
45 |
headers = {
|
46 |
"Authorization": f"Bearer {api_key}",
|
47 |
"Content-Type": "application/json"
|
@@ -53,7 +43,7 @@ def get_credit_summary(api_key):
|
|
53 |
if not data.get("is_available", False):
|
54 |
logging.warning(f"API Key: {api_key} is not available.")
|
55 |
return None
|
56 |
-
|
57 |
balance_infos = data.get("balance_infos", [])
|
58 |
total_balance_cny = 0.0
|
59 |
usd_balance = 0.0
|
@@ -87,10 +77,6 @@ def get_credit_summary(api_key):
|
|
87 |
return None
|
88 |
|
89 |
def get_usd_to_cny_rate():
|
90 |
-
"""
|
91 |
-
获取美元兑人民币的汇率。
|
92 |
-
这里使用一个公共的汇率 API,你可以替换成你自己的。
|
93 |
-
"""
|
94 |
try:
|
95 |
response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
|
96 |
response.raise_for_status()
|
@@ -105,12 +91,6 @@ def refresh_models():
|
|
105 |
logging.info(f"所有文本模型列表:{text_models}")
|
106 |
|
107 |
def load_keys():
|
108 |
-
"""
|
109 |
-
从环境变量中加载 keys,进行去重,
|
110 |
-
并根据额度和模型可用性进行分类,
|
111 |
-
然后记录到日志中。
|
112 |
-
使用线程池并发处理每个 key。
|
113 |
-
"""
|
114 |
keys_str = os.environ.get("KEYS")
|
115 |
keys = [key.strip() for key in keys_str.split(',')]
|
116 |
unique_keys = list(set(keys))
|
@@ -152,9 +132,6 @@ def load_keys():
|
|
152 |
valid_keys_global = valid_keys
|
153 |
|
154 |
def process_key(key):
|
155 |
-
"""
|
156 |
-
处理单个 key,判断其类型。
|
157 |
-
"""
|
158 |
credit_summary = get_credit_summary(key)
|
159 |
if credit_summary is None:
|
160 |
return "invalid"
|
@@ -166,10 +143,6 @@ def process_key(key):
|
|
166 |
return "valid"
|
167 |
|
168 |
def select_key(model_name):
|
169 |
-
"""
|
170 |
-
根据请求类型和模型名称选择合适的 KEY,
|
171 |
-
并实现轮询和重试机制。
|
172 |
-
"""
|
173 |
available_keys = valid_keys_global
|
174 |
|
175 |
current_index = model_key_indices.get(model_name, 0)
|
@@ -184,10 +157,6 @@ def select_key(model_name):
|
|
184 |
return None
|
185 |
|
186 |
def check_authorization(request):
|
187 |
-
"""
|
188 |
-
检查请求头中的 Authorization 字段
|
189 |
-
是否匹配环境变量 AUTHORIZATION_KEY。
|
190 |
-
"""
|
191 |
authorization_key = os.environ.get("AUTHORIZATION_KEY")
|
192 |
if not authorization_key:
|
193 |
logging.warning("环境变量 AUTHORIZATION_KEY 未设置,请设置后重试。")
|
@@ -198,9 +167,9 @@ def check_authorization(request):
|
|
198 |
logging.warning("请求头中缺少 Authorization 字段。")
|
199 |
return False
|
200 |
|
201 |
-
if auth_header != f"Bearer {authorization_key}":
|
202 |
-
|
203 |
-
|
204 |
|
205 |
return True
|
206 |
|
@@ -270,12 +239,12 @@ def check_tokens():
|
|
270 |
)
|
271 |
|
272 |
return jsonify(results)
|
273 |
-
|
274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
275 |
def list_models():
|
276 |
if not check_authorization(request):
|
277 |
return jsonify({"error": "Unauthorized"}), 401
|
278 |
-
|
279 |
detailed_models = [
|
280 |
{
|
281 |
"id": "deepseek-chat",
|
@@ -379,38 +348,6 @@ def billing_usage():
|
|
379 |
"total_usage": 0
|
380 |
})
|
381 |
|
382 |
-
@app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
|
383 |
-
def billing_subscription():
|
384 |
-
if not check_authorization(request):
|
385 |
-
return jsonify({"error": "Unauthorized"}), 401
|
386 |
-
|
387 |
-
total_balance = get_billing_info()
|
388 |
-
|
389 |
-
return jsonify({
|
390 |
-
"object": "billing_subscription",
|
391 |
-
"has_payment_method": False,
|
392 |
-
"canceled": False,
|
393 |
-
"canceled_at": None,
|
394 |
-
"delinquent": None,
|
395 |
-
"access_until": int(datetime(9999, 12, 31).timestamp()),
|
396 |
-
"soft_limit": 0,
|
397 |
-
"hard_limit": total_balance,
|
398 |
-
"system_hard_limit": total_balance,
|
399 |
-
"soft_limit_usd": 0,
|
400 |
-
"hard_limit_usd": total_balance,
|
401 |
-
"system_hard_limit_usd": total_balance,
|
402 |
-
"plan": {
|
403 |
-
"name": "SiliconFlow API",
|
404 |
-
"id": "siliconflow-api"
|
405 |
-
},
|
406 |
-
"account_name": "SiliconFlow User",
|
407 |
-
"po_number": None,
|
408 |
-
"billing_email": None,
|
409 |
-
"tax_ids": [],
|
410 |
-
"billing_address": None,
|
411 |
-
"business_address": None
|
412 |
-
})
|
413 |
-
|
414 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
415 |
def handsome_chat_completions():
|
416 |
if not check_authorization(request):
|
@@ -421,7 +358,7 @@ def handsome_chat_completions():
|
|
421 |
return jsonify({"error": "Invalid request data"}), 400
|
422 |
|
423 |
model_name = data['model']
|
424 |
-
|
425 |
api_key = select_key(model_name)
|
426 |
|
427 |
if not api_key:
|
@@ -434,7 +371,7 @@ def handsome_chat_completions():
|
|
434 |
)
|
435 |
}
|
436 |
), 429
|
437 |
-
|
438 |
if model_name == "deepseek-reasoner":
|
439 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
440 |
if param in data:
|
@@ -444,7 +381,7 @@ def handsome_chat_completions():
|
|
444 |
"Authorization": f"Bearer {api_key}",
|
445 |
"Content-Type": "application/json"
|
446 |
}
|
447 |
-
|
448 |
try:
|
449 |
start_time = time.time()
|
450 |
response = requests.post(
|
@@ -462,12 +399,40 @@ def handsome_chat_completions():
|
|
462 |
def generate():
|
463 |
first_chunk_time = None
|
464 |
full_response_content = ""
|
|
|
|
|
|
|
|
|
465 |
for chunk in response.iter_content(chunk_size=1024):
|
466 |
if chunk:
|
467 |
if first_chunk_time is None:
|
468 |
first_chunk_time = time.time()
|
469 |
full_response_content += chunk.decode("utf-8")
|
470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
end_time = time.time()
|
473 |
first_token_time = (
|
@@ -478,7 +443,6 @@ def handsome_chat_completions():
|
|
478 |
|
479 |
prompt_tokens = 0
|
480 |
completion_tokens = 0
|
481 |
-
response_content = ""
|
482 |
for line in full_response_content.splitlines():
|
483 |
if line.startswith("data:"):
|
484 |
line = line[5:].strip()
|
@@ -491,22 +455,9 @@ def handsome_chat_completions():
|
|
491 |
"usage" in response_json and
|
492 |
"completion_tokens" in response_json["usage"]
|
493 |
):
|
494 |
-
completion_tokens
|
495 |
"usage"
|
496 |
]["completion_tokens"]
|
497 |
-
|
498 |
-
if (
|
499 |
-
"choices" in response_json and
|
500 |
-
len(response_json["choices"]) > 0 and
|
501 |
-
"delta" in response_json["choices"][0] and
|
502 |
-
"content" in response_json[
|
503 |
-
"choices"
|
504 |
-
][0]["delta"]
|
505 |
-
):
|
506 |
-
response_content += response_json[
|
507 |
-
"choices"
|
508 |
-
][0]["delta"]["content"]
|
509 |
-
|
510 |
if (
|
511 |
"usage" in response_json and
|
512 |
"prompt_tokens" in response_json["usage"]
|
@@ -547,7 +498,8 @@ def handsome_chat_completions():
|
|
547 |
user_content_replaced = user_content.replace(
|
548 |
'\n', '\\n'
|
549 |
).replace('\r', '\\n')
|
550 |
-
response_content_replaced =
|
|
|
551 |
'\n', '\\n'
|
552 |
).replace('\r', '\\n')
|
553 |
|
@@ -564,13 +516,16 @@ def handsome_chat_completions():
|
|
564 |
|
565 |
with data_lock:
|
566 |
request_timestamps.append(time.time())
|
567 |
-
token_counts.append(prompt_tokens+completion_tokens)
|
|
|
|
|
568 |
|
569 |
return Response(
|
570 |
stream_with_context(generate()),
|
571 |
-
content_type=
|
572 |
)
|
573 |
else:
|
|
|
574 |
response.raise_for_status()
|
575 |
end_time = time.time()
|
576 |
response_json = response.json()
|
@@ -578,12 +533,21 @@ def handsome_chat_completions():
|
|
578 |
|
579 |
try:
|
580 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
581 |
-
completion_tokens = response_json[
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
587 |
except (KeyError, ValueError, IndexError) as e:
|
588 |
logging.error(
|
589 |
f"解析非流式响应 JSON 失败: {e}, "
|
@@ -606,7 +570,8 @@ def handsome_chat_completions():
|
|
606 |
item.get("type") == "text"
|
607 |
):
|
608 |
user_content += (
|
609 |
-
item.get("text", "") +
|
|
|
610 |
)
|
611 |
|
612 |
user_content = user_content.strip()
|
@@ -630,12 +595,32 @@ def handsome_chat_completions():
|
|
630 |
)
|
631 |
with data_lock:
|
632 |
request_timestamps.append(time.time())
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
|
638 |
-
return jsonify(
|
639 |
|
640 |
except requests.exceptions.RequestException as e:
|
641 |
logging.error(f"请求转发异常: {e}")
|
@@ -661,4 +646,4 @@ if __name__ == '__main__':
|
|
661 |
debug=False,
|
662 |
host='0.0.0.0',
|
663 |
port=int(os.environ.get('PORT', 7860))
|
664 |
-
)
|
|
|
3 |
import logging
|
4 |
import requests
|
5 |
import json
|
|
|
6 |
import uuid
|
7 |
import concurrent.futures
|
8 |
import threading
|
|
|
|
|
|
|
9 |
from datetime import datetime, timedelta
|
10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
from flask import Flask, request, jsonify, Response, stream_with_context
|
12 |
|
|
|
|
|
|
|
13 |
logging.basicConfig(level=logging.INFO,
|
14 |
format='%(asctime)s - %(levelname)s - %(message)s')
|
15 |
|
|
|
32 |
data_lock = threading.Lock()
|
33 |
|
34 |
def get_credit_summary(api_key):
|
|
|
|
|
|
|
35 |
headers = {
|
36 |
"Authorization": f"Bearer {api_key}",
|
37 |
"Content-Type": "application/json"
|
|
|
43 |
if not data.get("is_available", False):
|
44 |
logging.warning(f"API Key: {api_key} is not available.")
|
45 |
return None
|
46 |
+
|
47 |
balance_infos = data.get("balance_infos", [])
|
48 |
total_balance_cny = 0.0
|
49 |
usd_balance = 0.0
|
|
|
77 |
return None
|
78 |
|
79 |
def get_usd_to_cny_rate():
|
|
|
|
|
|
|
|
|
80 |
try:
|
81 |
response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
|
82 |
response.raise_for_status()
|
|
|
91 |
logging.info(f"所有文本模型列表:{text_models}")
|
92 |
|
93 |
def load_keys():
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
keys_str = os.environ.get("KEYS")
|
95 |
keys = [key.strip() for key in keys_str.split(',')]
|
96 |
unique_keys = list(set(keys))
|
|
|
132 |
valid_keys_global = valid_keys
|
133 |
|
134 |
def process_key(key):
|
|
|
|
|
|
|
135 |
credit_summary = get_credit_summary(key)
|
136 |
if credit_summary is None:
|
137 |
return "invalid"
|
|
|
143 |
return "valid"
|
144 |
|
145 |
def select_key(model_name):
|
|
|
|
|
|
|
|
|
146 |
available_keys = valid_keys_global
|
147 |
|
148 |
current_index = model_key_indices.get(model_name, 0)
|
|
|
157 |
return None
|
158 |
|
159 |
def check_authorization(request):
|
|
|
|
|
|
|
|
|
160 |
authorization_key = os.environ.get("AUTHORIZATION_KEY")
|
161 |
if not authorization_key:
|
162 |
logging.warning("环境变量 AUTHORIZATION_KEY 未设置,请设置后重试。")
|
|
|
167 |
logging.warning("请求头中缺少 Authorization 字段。")
|
168 |
return False
|
169 |
|
170 |
+
# if auth_header != f"Bearer {authorization_key}":
|
171 |
+
# logging.warning(f"无效的 Authorization 密钥:{auth_header}")
|
172 |
+
# return False
|
173 |
|
174 |
return True
|
175 |
|
|
|
239 |
)
|
240 |
|
241 |
return jsonify(results)
|
242 |
+
|
243 |
@app.route('/handsome/v1/models', methods=['GET'])
|
244 |
def list_models():
|
245 |
if not check_authorization(request):
|
246 |
return jsonify({"error": "Unauthorized"}), 401
|
247 |
+
|
248 |
detailed_models = [
|
249 |
{
|
250 |
"id": "deepseek-chat",
|
|
|
348 |
"total_usage": 0
|
349 |
})
|
350 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
352 |
def handsome_chat_completions():
|
353 |
if not check_authorization(request):
|
|
|
358 |
return jsonify({"error": "Invalid request data"}), 400
|
359 |
|
360 |
model_name = data['model']
|
361 |
+
|
362 |
api_key = select_key(model_name)
|
363 |
|
364 |
if not api_key:
|
|
|
371 |
)
|
372 |
}
|
373 |
), 429
|
374 |
+
|
375 |
if model_name == "deepseek-reasoner":
|
376 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
377 |
if param in data:
|
|
|
381 |
"Authorization": f"Bearer {api_key}",
|
382 |
"Content-Type": "application/json"
|
383 |
}
|
384 |
+
|
385 |
try:
|
386 |
start_time = time.time()
|
387 |
response = requests.post(
|
|
|
399 |
def generate():
|
400 |
first_chunk_time = None
|
401 |
full_response_content = ""
|
402 |
+
reasoning_content_accumulated = ""
|
403 |
+
content_accumulated = ""
|
404 |
+
first_reasoning_chunk = True
|
405 |
+
|
406 |
for chunk in response.iter_content(chunk_size=1024):
|
407 |
if chunk:
|
408 |
if first_chunk_time is None:
|
409 |
first_chunk_time = time.time()
|
410 |
full_response_content += chunk.decode("utf-8")
|
411 |
+
|
412 |
+
for line in chunk.decode("utf-8").splitlines():
|
413 |
+
if line.startswith("data:"):
|
414 |
+
try:
|
415 |
+
chunk_json = json.loads(line.lstrip("data: ").strip())
|
416 |
+
if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
|
417 |
+
delta = chunk_json["choices"][0].get("delta", {})
|
418 |
+
|
419 |
+
if delta.get("reasoning_content") is not None:
|
420 |
+
reasoning_chunk = delta["reasoning_content"]
|
421 |
+
if first_reasoning_chunk:
|
422 |
+
reasoning_chunk = "```Thinking\n" + reasoning_chunk
|
423 |
+
first_reasoning_chunk = False
|
424 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
|
425 |
+
|
426 |
+
if delta.get("content") is not None:
|
427 |
+
if not first_reasoning_chunk:
|
428 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n```'}, 'index': 0}]})}\n\n"
|
429 |
+
first_reasoning_chunk = True
|
430 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
|
431 |
+
|
432 |
+
except (KeyError, ValueError, json.JSONDecodeError) as e:
|
433 |
+
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
|
434 |
+
continue
|
435 |
+
|
436 |
|
437 |
end_time = time.time()
|
438 |
first_token_time = (
|
|
|
443 |
|
444 |
prompt_tokens = 0
|
445 |
completion_tokens = 0
|
|
|
446 |
for line in full_response_content.splitlines():
|
447 |
if line.startswith("data:"):
|
448 |
line = line[5:].strip()
|
|
|
455 |
"usage" in response_json and
|
456 |
"completion_tokens" in response_json["usage"]
|
457 |
):
|
458 |
+
completion_tokens += response_json[
|
459 |
"usage"
|
460 |
]["completion_tokens"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
if (
|
462 |
"usage" in response_json and
|
463 |
"prompt_tokens" in response_json["usage"]
|
|
|
498 |
user_content_replaced = user_content.replace(
|
499 |
'\n', '\\n'
|
500 |
).replace('\r', '\\n')
|
501 |
+
response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
|
502 |
+
response_content_replaced = response_content_replaced.replace(
|
503 |
'\n', '\\n'
|
504 |
).replace('\r', '\\n')
|
505 |
|
|
|
516 |
|
517 |
with data_lock:
|
518 |
request_timestamps.append(time.time())
|
519 |
+
token_counts.append(prompt_tokens + completion_tokens)
|
520 |
+
|
521 |
+
yield "data: [DONE]\n\n"
|
522 |
|
523 |
return Response(
|
524 |
stream_with_context(generate()),
|
525 |
+
content_type="text/event-stream"
|
526 |
)
|
527 |
else:
|
528 |
+
# ... (Non-streaming part remains the same as in the previous response)
|
529 |
response.raise_for_status()
|
530 |
end_time = time.time()
|
531 |
response_json = response.json()
|
|
|
533 |
|
534 |
try:
|
535 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
536 |
+
completion_tokens = response_json["usage"]["completion_tokens"]
|
537 |
+
response_content = ""
|
538 |
+
|
539 |
+
# Special handling for deepseek-reasoner in non-streaming mode
|
540 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
541 |
+
choice = response_json["choices"][0]
|
542 |
+
if "message" in choice:
|
543 |
+
if "reasoning_content" in choice["message"]:
|
544 |
+
formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
|
545 |
+
response_content += formatted_reasoning + "\n"
|
546 |
+
if "content" in choice["message"]:
|
547 |
+
response_content += choice["message"]["content"]
|
548 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
549 |
+
response_content = response_json["choices"][0]["message"]["content"]
|
550 |
+
|
551 |
except (KeyError, ValueError, IndexError) as e:
|
552 |
logging.error(
|
553 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
|
570 |
item.get("type") == "text"
|
571 |
):
|
572 |
user_content += (
|
573 |
+
item.get("text", "") +
|
574 |
+
" "
|
575 |
)
|
576 |
|
577 |
user_content = user_content.strip()
|
|
|
595 |
)
|
596 |
with data_lock:
|
597 |
request_timestamps.append(time.time())
|
598 |
+
token_counts.append(prompt_tokens + completion_tokens)
|
599 |
+
|
600 |
+
# Reformat the response to standard OpenAI format for non-streaming responses
|
601 |
+
formatted_response = {
|
602 |
+
"id": response_json.get("id", ""),
|
603 |
+
"object": "chat.completion",
|
604 |
+
"created": response_json.get("created", int(time.time())),
|
605 |
+
"model": model_name,
|
606 |
+
"choices": [
|
607 |
+
{
|
608 |
+
"index": 0,
|
609 |
+
"message": {
|
610 |
+
"role": "assistant",
|
611 |
+
"content": response_content
|
612 |
+
},
|
613 |
+
"finish_reason": "stop"
|
614 |
+
}
|
615 |
+
],
|
616 |
+
"usage": {
|
617 |
+
"prompt_tokens": prompt_tokens,
|
618 |
+
"completion_tokens": completion_tokens,
|
619 |
+
"total_tokens": prompt_tokens + completion_tokens
|
620 |
+
}
|
621 |
+
}
|
622 |
|
623 |
+
return jsonify(formatted_response)
|
624 |
|
625 |
except requests.exceptions.RequestException as e:
|
626 |
logging.error(f"请求转发异常: {e}")
|
|
|
646 |
debug=False,
|
647 |
host='0.0.0.0',
|
648 |
port=int(os.environ.get('PORT', 7860))
|
649 |
+
)
|