yangtb24 commited on
Commit
54ab1ea
·
verified ·
1 Parent(s): 3296e59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -1
app.py CHANGED
@@ -465,13 +465,110 @@ def handsome_chat_completions():
465
  content_type=response.headers['Content-Type']
466
  )
467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  first_chunk_time = None
469
  full_response_content = ""
470
  reasoning_content_accumulated = ""
471
  content_accumulated = ""
472
  first_reasoning_chunk = True
473
 
474
- for chunk in response.iter_content(chunk_size=10000000000):
475
  if chunk:
476
  if first_chunk_time is None:
477
  first_chunk_time = time.time()
 
465
  content_type=response.headers['Content-Type']
466
  )
467
 
468
+ if model_name == "deepseek-reasoner-openwebui":
469
+ first_chunk_time = None
470
+ full_response_content = ""
471
+ for chunk in response.iter_content(chunk_size=2048):
472
+ if chunk:
473
+ if first_chunk_time is None:
474
+ first_chunk_time = time.time()
475
+ full_response_content += chunk.decode("utf-8")
476
+ yield chunk
477
+
478
+ end_time = time.time()
479
+ first_token_time = (
480
+ first_chunk_time - start_time
481
+ if first_chunk_time else 0
482
+ )
483
+ total_time = end_time - start_time
484
+
485
+ prompt_tokens = 0
486
+ completion_tokens = 0
487
+ response_content = ""
488
+ for line in full_response_content.splitlines():
489
+ if line.startswith("data:"):
490
+ line = line[5:].strip()
491
+ if line == "[DONE]":
492
+ continue
493
+ try:
494
+ response_json = json.loads(line)
495
+
496
+ if (
497
+ "usage" in response_json and
498
+ "completion_tokens" in response_json["usage"]
499
+ ):
500
+ completion_tokens = response_json[
501
+ "usage"
502
+ ]["completion_tokens"]
503
+
504
+ if (
505
+ "choices" in response_json and
506
+ len(response_json["choices"]) > 0 and
507
+ "delta" in response_json["choices"][0] and
508
+ "content" in response_json[
509
+ "choices"
510
+ ][0]["delta"]
511
+ ):
512
+ response_content += response_json[
513
+ "choices"
514
+ ][0]["delta"]["content"]
515
+
516
+ if (
517
+ "usage" in response_json and
518
+ "prompt_tokens" in response_json["usage"]
519
+ ):
520
+ prompt_tokens = response_json[
521
+ "usage"
522
+ ]["prompt_tokens"]
523
+
524
+ except (
525
+ KeyError,
526
+ ValueError,
527
+ IndexError
528
+ ) as e:
529
+ logging.error(
530
+ f"解析流式响应单行 JSON 失败: {e}, "
531
+ f"行内容: {line}"
532
+ )
533
+
534
+ user_content = extract_user_content(data.get("messages", []))
535
+
536
+ user_content_replaced = user_content.replace(
537
+ '\n', '\\n'
538
+ ).replace('\r', '\\n')
539
+ response_content_replaced = response_content.replace(
540
+ '\n', '\\n'
541
+ ).replace('\r', '\\n')
542
+
543
+ logging.info(
544
+ f"使用的key: {api_key}, "
545
+ f"提示token: {prompt_tokens}, "
546
+ f"输出token: {completion_tokens}, "
547
+ f"首字用时: {first_token_time:.4f}秒, "
548
+ f"总共用时: {total_time:.4f}秒, "
549
+ f"使用的模型: {model_name}, "
550
+ f"用户的内容: {user_content_replaced}, "
551
+ f"输出的内容: {response_content_replaced}"
552
+ )
553
+
554
+ with data_lock:
555
+ request_timestamps.append(time.time())
556
+ token_counts.append(prompt_tokens+completion_tokens)
557
+ request_timestamps_day.append(time.time())
558
+ token_counts_day.append(prompt_tokens+completion_tokens)
559
+
560
+ return Response(
561
+ stream_with_context(generate()),
562
+ content_type=response.headers['Content-Type']
563
+ )
564
+
565
  first_chunk_time = None
566
  full_response_content = ""
567
  reasoning_content_accumulated = ""
568
  content_accumulated = ""
569
  first_reasoning_chunk = True
570
 
571
+ for chunk in response.iter_lines():
572
  if chunk:
573
  if first_chunk_time is None:
574
  first_chunk_time = time.time()