yangtb24 commited on
Commit
868e37f
·
verified ·
1 Parent(s): ae93e33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -136
app.py CHANGED
@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
53
  if not data.get("is_available", False):
54
  logging.warning(f"API Key: {api_key} is not available.")
55
  return None
56
-
57
  balance_infos = data.get("balance_infos", [])
58
  total_balance_cny = 0.0
59
  usd_balance = 0.0
@@ -270,12 +270,12 @@ def check_tokens():
270
  )
271
 
272
  return jsonify(results)
273
-
274
  @app.route('/handsome/v1/models', methods=['GET'])
275
  def list_models():
276
  if not check_authorization(request):
277
  return jsonify({"error": "Unauthorized"}), 401
278
-
279
  detailed_models = [
280
  {
281
  "id": "deepseek-chat",
@@ -431,139 +431,15 @@ def handsome_chat_completions():
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
- reasoning_content_accumulated = ""
435
- content_accumulated = ""
436
-
437
-
438
  for chunk in response.iter_content(chunk_size=1024):
439
  if chunk:
440
  if first_chunk_time is None:
441
  first_chunk_time = time.time()
442
-
443
  full_response_content += chunk.decode("utf-8")
444
-
445
- try:
446
- for line in chunk.decode("utf-8").splitlines():
447
- if line.startswith("data:"):
448
- line = line[5:].strip()
449
- if line == "[DONE]":
450
- continue
451
- try:
452
- response_json = json.loads(line)
453
-
454
- if (
455
- "usage" in response_json and
456
- "completion_tokens" in response_json["usage"]
457
- ):
458
- completion_tokens = response_json[
459
- "usage"
460
- ]["completion_tokens"]
461
-
462
- # Special handling for deepseek-reasoner in streaming mode
463
- if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
464
- delta = response_json["choices"][0].get("delta", {})
465
-
466
- if "reasoning_content" in delta and delta["reasoning_content"]:
467
- reasoning_content = delta["reasoning_content"]
468
-
469
- formatted_reasoning_chunk = {
470
- "id": response_json.get("id", ""),
471
- "object": "chat.completion.chunk",
472
- "created": response_json.get("created", int(time.time())),
473
- "model": model_name,
474
- "choices": [
475
- {
476
- "index": 0,
477
- "delta": {
478
- "content": f"```Thinking\n{reasoning_content}\n```",
479
- },
480
- "finish_reason": None
481
- }
482
- ],
483
- "usage": None,
484
- }
485
- yield f"data: {json.dumps(formatted_reasoning_chunk)}\n\n".encode('utf-8')
486
- if "content" in delta and delta["content"]:
487
- content = delta["content"]
488
- formatted_content_chunk = {
489
- "id": response_json.get("id", ""),
490
- "object": "chat.completion.chunk",
491
- "created": response_json.get("created", int(time.time())),
492
- "model": model_name,
493
- "choices": [
494
- {
495
- "index": 0,
496
- "delta": {
497
- "content": content,
498
- },
499
- "finish_reason": None
500
- }
501
- ],
502
- "usage": None,
503
- }
504
- yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
505
- elif "choices" in response_json and len(response_json["choices"]) > 0:
506
- # Handle other models normally
507
- delta = response_json["choices"][0].get("delta", {})
508
- if "content" in delta and delta["content"]:
509
- formatted_content_chunk = {
510
- "id": response_json.get("id", ""),
511
- "object": "chat.completion.chunk",
512
- "created": response_json.get("created", int(time.time())),
513
- "model": model_name,
514
- "choices": [
515
- {
516
- "index": 0,
517
- "delta": {
518
- "content": delta["content"],
519
- },
520
- "finish_reason": None
521
- }
522
- ],
523
- "usage": None,
524
- }
525
- yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
526
-
527
- if (
528
- "usage" in response_json and
529
- "prompt_tokens" in response_json["usage"]
530
- ):
531
- prompt_tokens = response_json[
532
- "usage"
533
- ]["prompt_tokens"]
534
-
535
- except (
536
- KeyError,
537
- ValueError,
538
- IndexError
539
- ) as e:
540
- logging.error(
541
- f"解析流式响应单行 JSON 失败: {e}, "
542
- f"行内容: {line}"
543
- )
544
- except Exception as e:
545
- logging.error(f"处理流式响应失败:{e}")
546
-
547
- # Send the [DONE] message after all chunks have been processed
548
- done_chunk = {
549
- "id": response_json.get("id", ""),
550
- "object": "chat.completion.chunk",
551
- "created": response_json.get("created", int(time.time())),
552
- "model": model_name,
553
- "choices": [
554
- {
555
- "index": 0,
556
- "delta": {},
557
- "finish_reason": "stop"
558
- }
559
- ],
560
- "usage": {
561
- "completion_tokens": completion_tokens,
562
- "prompt_tokens": prompt_tokens,
563
- "total_tokens": prompt_tokens + completion_tokens
564
- },
565
- }
566
- yield f"data: {json.dumps(done_chunk)}\n\n".encode('utf-8')
567
 
568
  end_time = time.time()
569
  first_token_time = (
@@ -572,6 +448,61 @@ def handsome_chat_completions():
572
  )
573
  total_time = end_time - start_time
574
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
  user_content = ""
577
  messages = data.get("messages", [])
@@ -595,7 +526,10 @@ def handsome_chat_completions():
595
  user_content_replaced = user_content.replace(
596
  '\n', '\\n'
597
  ).replace('\r', '\\n')
598
-
 
 
 
599
  logging.info(
600
  f"使用的key: {api_key}, "
601
  f"提示token: {prompt_tokens}, "
@@ -603,16 +537,21 @@ def handsome_chat_completions():
603
  f"首字用时: {first_token_time:.4f}秒, "
604
  f"总共用时: {total_time:.4f}秒, "
605
  f"使用的模型: {model_name}, "
606
- f"用户的内容: {user_content_replaced}"
 
607
  )
608
 
609
  with data_lock:
610
  request_timestamps.append(time.time())
611
  token_counts.append(prompt_tokens + completion_tokens)
 
 
 
 
612
 
613
  return Response(
614
  stream_with_context(generate()),
615
- content_type=response.headers['Content-Type']
616
  )
617
  else:
618
  # ... (Non-streaming part remains the same as in the previous response)
@@ -631,8 +570,7 @@ def handsome_chat_completions():
631
  choice = response_json["choices"][0]
632
  if "message" in choice:
633
  if "reasoning_content" in choice["message"]:
634
- reasoning_lines = choice["message"]["reasoning_content"].splitlines()
635
- formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
636
  response_content += formatted_reasoning + "\n"
637
  if "content" in choice["message"]:
638
  response_content += choice["message"]["content"]
 
53
  if not data.get("is_available", False):
54
  logging.warning(f"API Key: {api_key} is not available.")
55
  return None
56
+
57
  balance_infos = data.get("balance_infos", [])
58
  total_balance_cny = 0.0
59
  usd_balance = 0.0
 
270
  )
271
 
272
  return jsonify(results)
273
+
274
  @app.route('/handsome/v1/models', methods=['GET'])
275
  def list_models():
276
  if not check_authorization(request):
277
  return jsonify({"error": "Unauthorized"}), 401
278
+
279
  detailed_models = [
280
  {
281
  "id": "deepseek-chat",
 
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
+ reasoning_content_accumulated = "" # Accumulate reasoning content
435
+ content_accumulated = "" # Accumulate regular content
436
+
 
437
  for chunk in response.iter_content(chunk_size=1024):
438
  if chunk:
439
  if first_chunk_time is None:
440
  first_chunk_time = time.time()
 
441
  full_response_content += chunk.decode("utf-8")
442
+ yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  end_time = time.time()
445
  first_token_time = (
 
448
  )
449
  total_time = end_time - start_time
450
 
451
+ prompt_tokens = 0
452
+ completion_tokens = 0
453
+ for line in full_response_content.splitlines():
454
+ if line.startswith("data:"):
455
+ line = line[5:].strip()
456
+ if line == "[DONE]":
457
+ continue
458
+ try:
459
+ response_json = json.loads(line)
460
+
461
+ if (
462
+ "usage" in response_json and
463
+ "completion_tokens" in response_json["usage"]
464
+ ):
465
+ completion_tokens += response_json[
466
+ "usage"
467
+ ]["completion_tokens"]
468
+
469
+ # Special handling for deepseek-reasoner in streaming mode
470
+ if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
471
+ delta = response_json["choices"][0].get("delta", {})
472
+ if "reasoning_content" in delta:
473
+ reasoning_content_accumulated += delta["reasoning_content"]
474
+ if "content" in delta:
475
+ content_accumulated += delta["content"]
476
+ elif "choices" in response_json and len(response_json["choices"]) > 0:
477
+ # Handle other models normally
478
+ delta = response_json["choices"][0].get("delta", {})
479
+ if "content" in delta:
480
+ content_accumulated += delta["content"]
481
+
482
+ if (
483
+ "usage" in response_json and
484
+ "prompt_tokens" in response_json["usage"]
485
+ ):
486
+ prompt_tokens = response_json[
487
+ "usage"
488
+ ]["prompt_tokens"]
489
+
490
+ except (
491
+ KeyError,
492
+ ValueError,
493
+ IndexError
494
+ ) as e:
495
+ logging.error(
496
+ f"解析流式响应单行 JSON 失败: {e}, "
497
+ f"行内容: {line}"
498
+ )
499
+
500
+ # Format the accumulated reasoning content after processing all chunks
501
+ if model_name == "deepseek-reasoner":
502
+ formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
503
+ response_content = formatted_reasoning + "\n" + content_accumulated
504
+ else:
505
+ response_content = content_accumulated
506
 
507
  user_content = ""
508
  messages = data.get("messages", [])
 
526
  user_content_replaced = user_content.replace(
527
  '\n', '\\n'
528
  ).replace('\r', '\\n')
529
+ response_content_replaced = response_content.replace(
530
+ '\n', '\\n'
531
+ ).replace('\r', '\\n')
532
+
533
  logging.info(
534
  f"使用的key: {api_key}, "
535
  f"提示token: {prompt_tokens}, "
 
537
  f"首字用时: {first_token_time:.4f}秒, "
538
  f"总共用时: {total_time:.4f}秒, "
539
  f"使用的模型: {model_name}, "
540
+ f"用户的内容: {user_content_replaced}, "
541
+ f"输出的内容: {response_content_replaced}"
542
  )
543
 
544
  with data_lock:
545
  request_timestamps.append(time.time())
546
  token_counts.append(prompt_tokens + completion_tokens)
547
+
548
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
549
+ yield "data: [DONE]\n\n"
550
+
551
 
552
  return Response(
553
  stream_with_context(generate()),
554
+ content_type="text/event-stream"
555
  )
556
  else:
557
  # ... (Non-streaming part remains the same as in the previous response)
 
570
  choice = response_json["choices"][0]
571
  if "message" in choice:
572
  if "reasoning_content" in choice["message"]:
573
+ formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
 
574
  response_content += formatted_reasoning + "\n"
575
  if "content" in choice["message"]:
576
  response_content += choice["message"]["content"]