yangtb24 commited on
Commit
3296e59
·
verified ·
1 Parent(s): 36bcac8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py CHANGED
@@ -368,6 +368,103 @@ def handsome_chat_completions():
368
 
369
  if data.get("stream", False):
370
  def generate():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  first_chunk_time = None
372
  full_response_content = ""
373
  reasoning_content_accumulated = ""
 
368
 
369
  if data.get("stream", False):
370
  def generate():
371
+ if model_name == "deepseek-reasoner":
372
+ first_chunk_time = None
373
+ full_response_content = ""
374
+ for chunk in response.iter_content(chunk_size=2048):
375
+ if chunk:
376
+ if first_chunk_time is None:
377
+ first_chunk_time = time.time()
378
+ full_response_content += chunk.decode("utf-8")
379
+ yield chunk
380
+
381
+ end_time = time.time()
382
+ first_token_time = (
383
+ first_chunk_time - start_time
384
+ if first_chunk_time else 0
385
+ )
386
+ total_time = end_time - start_time
387
+
388
+ prompt_tokens = 0
389
+ completion_tokens = 0
390
+ response_content = ""
391
+ for line in full_response_content.splitlines():
392
+ if line.startswith("data:"):
393
+ line = line[5:].strip()
394
+ if line == "[DONE]":
395
+ continue
396
+ try:
397
+ response_json = json.loads(line)
398
+
399
+ if (
400
+ "usage" in response_json and
401
+ "completion_tokens" in response_json["usage"]
402
+ ):
403
+ completion_tokens = response_json[
404
+ "usage"
405
+ ]["completion_tokens"]
406
+
407
+ if (
408
+ "choices" in response_json and
409
+ len(response_json["choices"]) > 0 and
410
+ "delta" in response_json["choices"][0] and
411
+ "content" in response_json[
412
+ "choices"
413
+ ][0]["delta"]
414
+ ):
415
+ response_content += response_json[
416
+ "choices"
417
+ ][0]["delta"]["content"]
418
+
419
+ if (
420
+ "usage" in response_json and
421
+ "prompt_tokens" in response_json["usage"]
422
+ ):
423
+ prompt_tokens = response_json[
424
+ "usage"
425
+ ]["prompt_tokens"]
426
+
427
+ except (
428
+ KeyError,
429
+ ValueError,
430
+ IndexError
431
+ ) as e:
432
+ logging.error(
433
+ f"解析流式响应单行 JSON 失败: {e}, "
434
+ f"行内容: {line}"
435
+ )
436
+
437
+ user_content = extract_user_content(data.get("messages", []))
438
+
439
+ user_content_replaced = user_content.replace(
440
+ '\n', '\\n'
441
+ ).replace('\r', '\\n')
442
+ response_content_replaced = response_content.replace(
443
+ '\n', '\\n'
444
+ ).replace('\r', '\\n')
445
+
446
+ logging.info(
447
+ f"使用的key: {api_key}, "
448
+ f"提示token: {prompt_tokens}, "
449
+ f"输出token: {completion_tokens}, "
450
+ f"首字用时: {first_token_time:.4f}秒, "
451
+ f"总共用时: {total_time:.4f}秒, "
452
+ f"使用的模型: {model_name}, "
453
+ f"用户的内容: {user_content_replaced}, "
454
+ f"输出的内容: {response_content_replaced}"
455
+ )
456
+
457
+ with data_lock:
458
+ request_timestamps.append(time.time())
459
+ token_counts.append(prompt_tokens+completion_tokens)
460
+ request_timestamps_day.append(time.time())
461
+ token_counts_day.append(prompt_tokens+completion_tokens)
462
+
463
+ return Response(
464
+ stream_with_context(generate()),
465
+ content_type=response.headers['Content-Type']
466
+ )
467
+
468
  first_chunk_time = None
469
  full_response_content = ""
470
  reasoning_content_accumulated = ""