yangtb24 commited on
Commit
ae93e33
·
verified ·
1 Parent(s): 27ad875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -75
app.py CHANGED
@@ -431,51 +431,139 @@ def handsome_chat_completions():
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
-
 
 
 
435
  for chunk in response.iter_content(chunk_size=1024):
436
  if chunk:
437
  if first_chunk_time is None:
438
  first_chunk_time = time.time()
 
439
  full_response_content += chunk.decode("utf-8")
440
 
441
- chunk_data_list = chunk.decode("utf-8").split("\n\n")
442
-
443
- for chunk_data in chunk_data_list:
444
- if not chunk_data:
445
- continue
446
- if chunk_data.startswith("data:"):
447
- chunk_data = chunk_data[5:].strip()
448
- if chunk_data == "[DONE]":
449
- continue
450
- try:
451
- response_json = json.loads(chunk_data)
452
- if (
453
- model_name == "deepseek-reasoner" and
454
- "choices" in response_json and
455
- len(response_json["choices"]) > 0
456
- ):
457
- delta = response_json["choices"][0].get("delta", {})
458
- new_content = ""
459
- if "reasoning_content" in delta and delta["reasoning_content"] is not None:
460
- new_content += "> " + delta["reasoning_content"]
461
- if "content" in delta and delta["content"] is not None:
462
- new_content += delta["content"]
463
-
464
- if new_content:
465
- response_json["choices"][0]["delta"] = {"content": new_content}
466
- yield f"data: {json.dumps(response_json)}\n\n".encode("utf-8")
467
- else:
468
- yield f"data: {chunk_data}\n\n".encode("utf-8")
469
-
470
- except (
471
- KeyError,
472
- ValueError,
473
- IndexError
474
- ) as e:
475
- logging.error(
476
- f"解析流式响应单行 JSON 失败: {e}, "
477
- f"行内容: {chunk_data}"
478
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
  end_time = time.time()
481
  first_token_time = (
@@ -484,41 +572,6 @@ def handsome_chat_completions():
484
  )
485
  total_time = end_time - start_time
486
 
487
- prompt_tokens = 0
488
- completion_tokens = 0
489
- for line in full_response_content.splitlines():
490
- if line.startswith("data:"):
491
- line = line[5:].strip()
492
- if line == "[DONE]":
493
- continue
494
- try:
495
- response_json = json.loads(line)
496
-
497
- if (
498
- "usage" in response_json and
499
- "completion_tokens" in response_json["usage"]
500
- ):
501
- completion_tokens += response_json[
502
- "usage"
503
- ]["completion_tokens"]
504
-
505
- if (
506
- "usage" in response_json and
507
- "prompt_tokens" in response_json["usage"]
508
- ):
509
- prompt_tokens = response_json[
510
- "usage"
511
- ]["prompt_tokens"]
512
-
513
- except (
514
- KeyError,
515
- ValueError,
516
- IndexError
517
- ) as e:
518
- logging.error(
519
- f"解析流式响应单行 JSON 失败: {e}, "
520
- f"行内容: {line}"
521
- )
522
 
523
  user_content = ""
524
  messages = data.get("messages", [])
@@ -542,7 +595,7 @@ def handsome_chat_completions():
542
  user_content_replaced = user_content.replace(
543
  '\n', '\\n'
544
  ).replace('\r', '\\n')
545
-
546
  logging.info(
547
  f"使用的key: {api_key}, "
548
  f"提示token: {prompt_tokens}, "
@@ -562,6 +615,7 @@ def handsome_chat_completions():
562
  content_type=response.headers['Content-Type']
563
  )
564
  else:
 
565
  response.raise_for_status()
566
  end_time = time.time()
567
  response_json = response.json()
@@ -663,6 +717,7 @@ def handsome_chat_completions():
663
  logging.error(f"请求转发异常: {e}")
664
  return jsonify({"error": str(e)}), 500
665
 
 
666
  if __name__ == '__main__':
667
  logging.info(f"环境变量:{os.environ}")
668
 
 
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
+ reasoning_content_accumulated = ""
435
+ content_accumulated = ""
436
+
437
+
438
  for chunk in response.iter_content(chunk_size=1024):
439
  if chunk:
440
  if first_chunk_time is None:
441
  first_chunk_time = time.time()
442
+
443
  full_response_content += chunk.decode("utf-8")
444
 
445
+ try:
446
+ for line in chunk.decode("utf-8").splitlines():
447
+ if line.startswith("data:"):
448
+ line = line[5:].strip()
449
+ if line == "[DONE]":
450
+ continue
451
+ try:
452
+ response_json = json.loads(line)
453
+
454
+ if (
455
+ "usage" in response_json and
456
+ "completion_tokens" in response_json["usage"]
457
+ ):
458
+ completion_tokens = response_json[
459
+ "usage"
460
+ ]["completion_tokens"]
461
+
462
+ # Special handling for deepseek-reasoner in streaming mode
463
+ if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
464
+ delta = response_json["choices"][0].get("delta", {})
465
+
466
+ if "reasoning_content" in delta and delta["reasoning_content"]:
467
+ reasoning_content = delta["reasoning_content"]
468
+
469
+ formatted_reasoning_chunk = {
470
+ "id": response_json.get("id", ""),
471
+ "object": "chat.completion.chunk",
472
+ "created": response_json.get("created", int(time.time())),
473
+ "model": model_name,
474
+ "choices": [
475
+ {
476
+ "index": 0,
477
+ "delta": {
478
+ "content": f"```Thinking\n{reasoning_content}\n```",
479
+ },
480
+ "finish_reason": None
481
+ }
482
+ ],
483
+ "usage": None,
484
+ }
485
+ yield f"data: {json.dumps(formatted_reasoning_chunk)}\n\n".encode('utf-8')
486
+ if "content" in delta and delta["content"]:
487
+ content = delta["content"]
488
+ formatted_content_chunk = {
489
+ "id": response_json.get("id", ""),
490
+ "object": "chat.completion.chunk",
491
+ "created": response_json.get("created", int(time.time())),
492
+ "model": model_name,
493
+ "choices": [
494
+ {
495
+ "index": 0,
496
+ "delta": {
497
+ "content": content,
498
+ },
499
+ "finish_reason": None
500
+ }
501
+ ],
502
+ "usage": None,
503
+ }
504
+ yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
505
+ elif "choices" in response_json and len(response_json["choices"]) > 0:
506
+ # Handle other models normally
507
+ delta = response_json["choices"][0].get("delta", {})
508
+ if "content" in delta and delta["content"]:
509
+ formatted_content_chunk = {
510
+ "id": response_json.get("id", ""),
511
+ "object": "chat.completion.chunk",
512
+ "created": response_json.get("created", int(time.time())),
513
+ "model": model_name,
514
+ "choices": [
515
+ {
516
+ "index": 0,
517
+ "delta": {
518
+ "content": delta["content"],
519
+ },
520
+ "finish_reason": None
521
+ }
522
+ ],
523
+ "usage": None,
524
+ }
525
+ yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
526
+
527
+ if (
528
+ "usage" in response_json and
529
+ "prompt_tokens" in response_json["usage"]
530
+ ):
531
+ prompt_tokens = response_json[
532
+ "usage"
533
+ ]["prompt_tokens"]
534
+
535
+ except (
536
+ KeyError,
537
+ ValueError,
538
+ IndexError
539
+ ) as e:
540
+ logging.error(
541
+ f"解析流式响应单行 JSON 失败: {e}, "
542
+ f"行内容: {line}"
543
+ )
544
+ except Exception as e:
545
+ logging.error(f"处理流式响应失败:{e}")
546
+
547
+ # Send the [DONE] message after all chunks have been processed
548
+ done_chunk = {
549
+ "id": response_json.get("id", ""),
550
+ "object": "chat.completion.chunk",
551
+ "created": response_json.get("created", int(time.time())),
552
+ "model": model_name,
553
+ "choices": [
554
+ {
555
+ "index": 0,
556
+ "delta": {},
557
+ "finish_reason": "stop"
558
+ }
559
+ ],
560
+ "usage": {
561
+ "completion_tokens": completion_tokens,
562
+ "prompt_tokens": prompt_tokens,
563
+ "total_tokens": prompt_tokens + completion_tokens
564
+ },
565
+ }
566
+ yield f"data: {json.dumps(done_chunk)}\n\n".encode('utf-8')
567
 
568
  end_time = time.time()
569
  first_token_time = (
 
572
  )
573
  total_time = end_time - start_time
574
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
  user_content = ""
577
  messages = data.get("messages", [])
 
595
  user_content_replaced = user_content.replace(
596
  '\n', '\\n'
597
  ).replace('\r', '\\n')
598
+
599
  logging.info(
600
  f"使用的key: {api_key}, "
601
  f"提示token: {prompt_tokens}, "
 
615
  content_type=response.headers['Content-Type']
616
  )
617
  else:
618
+ # ... (Non-streaming part remains the same as in the previous response)
619
  response.raise_for_status()
620
  end_time = time.time()
621
  response_json = response.json()
 
717
  logging.error(f"请求转发异常: {e}")
718
  return jsonify({"error": str(e)}), 500
719
 
720
+
721
  if __name__ == '__main__':
722
  logging.info(f"环境变量:{os.environ}")
723