Update app.py
Browse files
app.py
CHANGED
@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
|
|
53 |
if not data.get("is_available", False):
|
54 |
logging.warning(f"API Key: {api_key} is not available.")
|
55 |
return None
|
56 |
-
|
57 |
balance_infos = data.get("balance_infos", [])
|
58 |
total_balance_cny = 0.0
|
59 |
usd_balance = 0.0
|
@@ -270,12 +270,12 @@ def check_tokens():
|
|
270 |
)
|
271 |
|
272 |
return jsonify(results)
|
273 |
-
|
274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
275 |
def list_models():
|
276 |
if not check_authorization(request):
|
277 |
return jsonify({"error": "Unauthorized"}), 401
|
278 |
-
|
279 |
detailed_models = [
|
280 |
{
|
281 |
"id": "deepseek-chat",
|
@@ -431,139 +431,15 @@ def handsome_chat_completions():
|
|
431 |
def generate():
|
432 |
first_chunk_time = None
|
433 |
full_response_content = ""
|
434 |
-
reasoning_content_accumulated = ""
|
435 |
-
content_accumulated = ""
|
436 |
-
|
437 |
-
|
438 |
for chunk in response.iter_content(chunk_size=1024):
|
439 |
if chunk:
|
440 |
if first_chunk_time is None:
|
441 |
first_chunk_time = time.time()
|
442 |
-
|
443 |
full_response_content += chunk.decode("utf-8")
|
444 |
-
|
445 |
-
try:
|
446 |
-
for line in chunk.decode("utf-8").splitlines():
|
447 |
-
if line.startswith("data:"):
|
448 |
-
line = line[5:].strip()
|
449 |
-
if line == "[DONE]":
|
450 |
-
continue
|
451 |
-
try:
|
452 |
-
response_json = json.loads(line)
|
453 |
-
|
454 |
-
if (
|
455 |
-
"usage" in response_json and
|
456 |
-
"completion_tokens" in response_json["usage"]
|
457 |
-
):
|
458 |
-
completion_tokens = response_json[
|
459 |
-
"usage"
|
460 |
-
]["completion_tokens"]
|
461 |
-
|
462 |
-
# Special handling for deepseek-reasoner in streaming mode
|
463 |
-
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
464 |
-
delta = response_json["choices"][0].get("delta", {})
|
465 |
-
|
466 |
-
if "reasoning_content" in delta and delta["reasoning_content"]:
|
467 |
-
reasoning_content = delta["reasoning_content"]
|
468 |
-
|
469 |
-
formatted_reasoning_chunk = {
|
470 |
-
"id": response_json.get("id", ""),
|
471 |
-
"object": "chat.completion.chunk",
|
472 |
-
"created": response_json.get("created", int(time.time())),
|
473 |
-
"model": model_name,
|
474 |
-
"choices": [
|
475 |
-
{
|
476 |
-
"index": 0,
|
477 |
-
"delta": {
|
478 |
-
"content": f"```Thinking\n{reasoning_content}\n```",
|
479 |
-
},
|
480 |
-
"finish_reason": None
|
481 |
-
}
|
482 |
-
],
|
483 |
-
"usage": None,
|
484 |
-
}
|
485 |
-
yield f"data: {json.dumps(formatted_reasoning_chunk)}\n\n".encode('utf-8')
|
486 |
-
if "content" in delta and delta["content"]:
|
487 |
-
content = delta["content"]
|
488 |
-
formatted_content_chunk = {
|
489 |
-
"id": response_json.get("id", ""),
|
490 |
-
"object": "chat.completion.chunk",
|
491 |
-
"created": response_json.get("created", int(time.time())),
|
492 |
-
"model": model_name,
|
493 |
-
"choices": [
|
494 |
-
{
|
495 |
-
"index": 0,
|
496 |
-
"delta": {
|
497 |
-
"content": content,
|
498 |
-
},
|
499 |
-
"finish_reason": None
|
500 |
-
}
|
501 |
-
],
|
502 |
-
"usage": None,
|
503 |
-
}
|
504 |
-
yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
|
505 |
-
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
506 |
-
# Handle other models normally
|
507 |
-
delta = response_json["choices"][0].get("delta", {})
|
508 |
-
if "content" in delta and delta["content"]:
|
509 |
-
formatted_content_chunk = {
|
510 |
-
"id": response_json.get("id", ""),
|
511 |
-
"object": "chat.completion.chunk",
|
512 |
-
"created": response_json.get("created", int(time.time())),
|
513 |
-
"model": model_name,
|
514 |
-
"choices": [
|
515 |
-
{
|
516 |
-
"index": 0,
|
517 |
-
"delta": {
|
518 |
-
"content": delta["content"],
|
519 |
-
},
|
520 |
-
"finish_reason": None
|
521 |
-
}
|
522 |
-
],
|
523 |
-
"usage": None,
|
524 |
-
}
|
525 |
-
yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
|
526 |
-
|
527 |
-
if (
|
528 |
-
"usage" in response_json and
|
529 |
-
"prompt_tokens" in response_json["usage"]
|
530 |
-
):
|
531 |
-
prompt_tokens = response_json[
|
532 |
-
"usage"
|
533 |
-
]["prompt_tokens"]
|
534 |
-
|
535 |
-
except (
|
536 |
-
KeyError,
|
537 |
-
ValueError,
|
538 |
-
IndexError
|
539 |
-
) as e:
|
540 |
-
logging.error(
|
541 |
-
f"解析流式响应单行 JSON 失败: {e}, "
|
542 |
-
f"行内容: {line}"
|
543 |
-
)
|
544 |
-
except Exception as e:
|
545 |
-
logging.error(f"处理流式响应失败:{e}")
|
546 |
-
|
547 |
-
# Send the [DONE] message after all chunks have been processed
|
548 |
-
done_chunk = {
|
549 |
-
"id": response_json.get("id", ""),
|
550 |
-
"object": "chat.completion.chunk",
|
551 |
-
"created": response_json.get("created", int(time.time())),
|
552 |
-
"model": model_name,
|
553 |
-
"choices": [
|
554 |
-
{
|
555 |
-
"index": 0,
|
556 |
-
"delta": {},
|
557 |
-
"finish_reason": "stop"
|
558 |
-
}
|
559 |
-
],
|
560 |
-
"usage": {
|
561 |
-
"completion_tokens": completion_tokens,
|
562 |
-
"prompt_tokens": prompt_tokens,
|
563 |
-
"total_tokens": prompt_tokens + completion_tokens
|
564 |
-
},
|
565 |
-
}
|
566 |
-
yield f"data: {json.dumps(done_chunk)}\n\n".encode('utf-8')
|
567 |
|
568 |
end_time = time.time()
|
569 |
first_token_time = (
|
@@ -572,6 +448,61 @@ def handsome_chat_completions():
|
|
572 |
)
|
573 |
total_time = end_time - start_time
|
574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
|
576 |
user_content = ""
|
577 |
messages = data.get("messages", [])
|
@@ -595,7 +526,10 @@ def handsome_chat_completions():
|
|
595 |
user_content_replaced = user_content.replace(
|
596 |
'\n', '\\n'
|
597 |
).replace('\r', '\\n')
|
598 |
-
|
|
|
|
|
|
|
599 |
logging.info(
|
600 |
f"使用的key: {api_key}, "
|
601 |
f"提示token: {prompt_tokens}, "
|
@@ -603,16 +537,21 @@ def handsome_chat_completions():
|
|
603 |
f"首字用时: {first_token_time:.4f}秒, "
|
604 |
f"总共用时: {total_time:.4f}秒, "
|
605 |
f"使用的模型: {model_name}, "
|
606 |
-
f"用户的内容: {user_content_replaced}"
|
|
|
607 |
)
|
608 |
|
609 |
with data_lock:
|
610 |
request_timestamps.append(time.time())
|
611 |
token_counts.append(prompt_tokens + completion_tokens)
|
|
|
|
|
|
|
|
|
612 |
|
613 |
return Response(
|
614 |
stream_with_context(generate()),
|
615 |
-
content_type=
|
616 |
)
|
617 |
else:
|
618 |
# ... (Non-streaming part remains the same as in the previous response)
|
@@ -631,8 +570,7 @@ def handsome_chat_completions():
|
|
631 |
choice = response_json["choices"][0]
|
632 |
if "message" in choice:
|
633 |
if "reasoning_content" in choice["message"]:
|
634 |
-
|
635 |
-
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
636 |
response_content += formatted_reasoning + "\n"
|
637 |
if "content" in choice["message"]:
|
638 |
response_content += choice["message"]["content"]
|
|
|
53 |
if not data.get("is_available", False):
|
54 |
logging.warning(f"API Key: {api_key} is not available.")
|
55 |
return None
|
56 |
+
|
57 |
balance_infos = data.get("balance_infos", [])
|
58 |
total_balance_cny = 0.0
|
59 |
usd_balance = 0.0
|
|
|
270 |
)
|
271 |
|
272 |
return jsonify(results)
|
273 |
+
|
274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
275 |
def list_models():
|
276 |
if not check_authorization(request):
|
277 |
return jsonify({"error": "Unauthorized"}), 401
|
278 |
+
|
279 |
detailed_models = [
|
280 |
{
|
281 |
"id": "deepseek-chat",
|
|
|
431 |
def generate():
|
432 |
first_chunk_time = None
|
433 |
full_response_content = ""
|
434 |
+
reasoning_content_accumulated = "" # Accumulate reasoning content
|
435 |
+
content_accumulated = "" # Accumulate regular content
|
436 |
+
|
|
|
437 |
for chunk in response.iter_content(chunk_size=1024):
|
438 |
if chunk:
|
439 |
if first_chunk_time is None:
|
440 |
first_chunk_time = time.time()
|
|
|
441 |
full_response_content += chunk.decode("utf-8")
|
442 |
+
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
|
444 |
end_time = time.time()
|
445 |
first_token_time = (
|
|
|
448 |
)
|
449 |
total_time = end_time - start_time
|
450 |
|
451 |
+
prompt_tokens = 0
|
452 |
+
completion_tokens = 0
|
453 |
+
for line in full_response_content.splitlines():
|
454 |
+
if line.startswith("data:"):
|
455 |
+
line = line[5:].strip()
|
456 |
+
if line == "[DONE]":
|
457 |
+
continue
|
458 |
+
try:
|
459 |
+
response_json = json.loads(line)
|
460 |
+
|
461 |
+
if (
|
462 |
+
"usage" in response_json and
|
463 |
+
"completion_tokens" in response_json["usage"]
|
464 |
+
):
|
465 |
+
completion_tokens += response_json[
|
466 |
+
"usage"
|
467 |
+
]["completion_tokens"]
|
468 |
+
|
469 |
+
# Special handling for deepseek-reasoner in streaming mode
|
470 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
471 |
+
delta = response_json["choices"][0].get("delta", {})
|
472 |
+
if "reasoning_content" in delta:
|
473 |
+
reasoning_content_accumulated += delta["reasoning_content"]
|
474 |
+
if "content" in delta:
|
475 |
+
content_accumulated += delta["content"]
|
476 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
477 |
+
# Handle other models normally
|
478 |
+
delta = response_json["choices"][0].get("delta", {})
|
479 |
+
if "content" in delta:
|
480 |
+
content_accumulated += delta["content"]
|
481 |
+
|
482 |
+
if (
|
483 |
+
"usage" in response_json and
|
484 |
+
"prompt_tokens" in response_json["usage"]
|
485 |
+
):
|
486 |
+
prompt_tokens = response_json[
|
487 |
+
"usage"
|
488 |
+
]["prompt_tokens"]
|
489 |
+
|
490 |
+
except (
|
491 |
+
KeyError,
|
492 |
+
ValueError,
|
493 |
+
IndexError
|
494 |
+
) as e:
|
495 |
+
logging.error(
|
496 |
+
f"解析流式响应单行 JSON 失败: {e}, "
|
497 |
+
f"行内容: {line}"
|
498 |
+
)
|
499 |
+
|
500 |
+
# Format the accumulated reasoning content after processing all chunks
|
501 |
+
if model_name == "deepseek-reasoner":
|
502 |
+
formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
|
503 |
+
response_content = formatted_reasoning + "\n" + content_accumulated
|
504 |
+
else:
|
505 |
+
response_content = content_accumulated
|
506 |
|
507 |
user_content = ""
|
508 |
messages = data.get("messages", [])
|
|
|
526 |
user_content_replaced = user_content.replace(
|
527 |
'\n', '\\n'
|
528 |
).replace('\r', '\\n')
|
529 |
+
response_content_replaced = response_content.replace(
|
530 |
+
'\n', '\\n'
|
531 |
+
).replace('\r', '\\n')
|
532 |
+
|
533 |
logging.info(
|
534 |
f"使用的key: {api_key}, "
|
535 |
f"提示token: {prompt_tokens}, "
|
|
|
537 |
f"首字用时: {first_token_time:.4f}秒, "
|
538 |
f"总共用时: {total_time:.4f}秒, "
|
539 |
f"使用的模型: {model_name}, "
|
540 |
+
f"用户的内容: {user_content_replaced}, "
|
541 |
+
f"输出的内容: {response_content_replaced}"
|
542 |
)
|
543 |
|
544 |
with data_lock:
|
545 |
request_timestamps.append(time.time())
|
546 |
token_counts.append(prompt_tokens + completion_tokens)
|
547 |
+
|
548 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
549 |
+
yield "data: [DONE]\n\n"
|
550 |
+
|
551 |
|
552 |
return Response(
|
553 |
stream_with_context(generate()),
|
554 |
+
content_type="text/event-stream"
|
555 |
)
|
556 |
else:
|
557 |
# ... (Non-streaming part remains the same as in the previous response)
|
|
|
570 |
choice = response_json["choices"][0]
|
571 |
if "message" in choice:
|
572 |
if "reasoning_content" in choice["message"]:
|
573 |
+
formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
|
|
|
574 |
response_content += formatted_reasoning + "\n"
|
575 |
if "content" in choice["message"]:
|
576 |
response_content += choice["message"]["content"]
|