yangtb24 commited on
Commit
0727ff4
·
verified ·
1 Parent(s): 8da261d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -74
app.py CHANGED
@@ -5,7 +5,6 @@ import requests
5
  import json
6
  import concurrent.futures
7
  import threading
8
- import uuid
9
  from datetime import datetime, timedelta
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
  from flask import Flask, request, jsonify, Response, stream_with_context
@@ -361,7 +360,7 @@ def handsome_chat_completions():
361
  headers=headers,
362
  json=data,
363
  stream=data.get("stream", False),
364
- timeout=1200
365
  )
366
 
367
  if response.status_code == 429:
@@ -466,93 +465,137 @@ def handsome_chat_completions():
466
  content_type=response.headers['Content-Type']
467
  )
468
 
469
- if model_name == "deepseek-reasoner-openwebui":
470
  first_chunk_time = None
471
  full_response_content = ""
472
  reasoning_content_accumulated = ""
473
  content_accumulated = ""
474
  first_reasoning_chunk = True
475
- response_id = f"chatcmpl-{uuid.uuid4()}"
476
- created_time = int(time.time())
477
-
478
  for chunk in response.iter_lines():
479
  if chunk:
480
  if first_chunk_time is None:
481
  first_chunk_time = time.time()
482
- chunk_str = chunk.decode("utf-8")
483
- full_response_content += chunk_str
484
 
485
- if chunk_str.startswith("data:"):
486
- try:
487
- chunk_json = json.loads(chunk_str[5:].strip())
488
- delta = chunk_json.get("choices", [{}])[0].get("delta", {})
489
-
490
- openai_chunk = {
491
- "id": response_id,
492
- "object": "chat.completion.chunk",
493
- "created": created_time,
494
- "model": model_name,
495
- "choices": [{
496
- "index": 0,
497
- "delta": {},
498
- "finish_reason": None
499
- }]
500
- }
501
-
502
- if delta.get("reasoning_content") is not None:
503
- if first_reasoning_chunk:
504
- first_chunk = openai_chunk.copy()
505
-
506
- first_chunk["choices"][0]["delta"]["content"] = ""
507
- first_chunk["choices"][0]["delta"]["reasoning_content"] = ""
508
- first_chunk["choices"][0]["delta"]["role"] = "assistant"
509
- yield f"data: {json.dumps(first_chunk)}\n\n"
510
-
511
- openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
512
-
513
- openai_chunk["choices"][0]["delta"]["content"] = "<"
514
- yield f"data: {json.dumps(openai_chunk)}\n\n"
515
- openai_chunk["choices"][0]["delta"]["content"] = "think"
516
- yield f"data: {json.dumps(openai_chunk)}\n\n"
517
- openai_chunk["choices"][0]["delta"]["content"] = ">\n"
518
- yield f"data: {json.dumps(openai_chunk)}\n\n"
519
-
520
- first_reasoning_chunk = False
521
-
522
- # openai_chunk["choices"][0]["delta"]["content"] = "<think>\n"
523
- # openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
524
- # yield f"data: {json.dumps(openai_chunk)}\n\n"
525
- # first_reasoning_chunk = False
526
- else:
527
- openai_chunk["choices"][0]["delta"]["content"] = delta["reasoning_content"]
528
- openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
529
- yield f"data: {json.dumps(openai_chunk)}\n\n"
530
- reasoning_content_accumulated += delta["reasoning_content"]
531
-
532
- if delta.get("content") is not None:
533
- if not first_reasoning_chunk:
534
- openai_chunk["choices"][0]["delta"]["content"] = "\n</think>\n"
535
- openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
536
- yield f"data: {json.dumps(openai_chunk)}\n\n"
537
- first_reasoning_chunk = True
538
-
539
- openai_chunk["choices"][0]["delta"]["content"] = delta["content"]
540
- openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
541
- yield f"data: {json.dumps(openai_chunk)}\n\n"
542
- content_accumulated += delta["content"]
543
 
544
- except (json.JSONDecodeError, KeyError) as e:
545
- logging.error(f"Error parsing chunk: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
 
547
  yield "data: [DONE]\n\n"
548
-
549
  return Response(
550
  stream_with_context(generate()),
551
- mimetype="text/event-stream",
552
- headers={
553
- "X-Content-Type-Options": "nosniff",
554
- "Connection": "keep-alive"
555
- }
556
  )
557
 
558
  first_chunk_time = None
 
5
  import json
6
  import concurrent.futures
7
  import threading
 
8
  from datetime import datetime, timedelta
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
  from flask import Flask, request, jsonify, Response, stream_with_context
 
360
  headers=headers,
361
  json=data,
362
  stream=data.get("stream", False),
363
+ timeout=120
364
  )
365
 
366
  if response.status_code == 429:
 
465
  content_type=response.headers['Content-Type']
466
  )
467
 
468
+ if model_name == "deepseek-reasoner-openwebui":
469
  first_chunk_time = None
470
  full_response_content = ""
471
  reasoning_content_accumulated = ""
472
  content_accumulated = ""
473
  first_reasoning_chunk = True
474
+
 
 
475
  for chunk in response.iter_lines():
476
  if chunk:
477
  if first_chunk_time is None:
478
  first_chunk_time = time.time()
479
+ full_response_content += chunk.decode("utf-8")
 
480
 
481
+ for line in chunk.decode("utf-8").splitlines():
482
+ if line.startswith("data:"):
483
+ try:
484
+ chunk_json = json.loads(line.lstrip("data: ").strip())
485
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
486
+ delta = chunk_json["choices"][0].get("delta", {})
487
+
488
+ if delta.get("reasoning_content") is not None:
489
+ reasoning_chunk = delta["reasoning_content"]
490
+ if first_reasoning_chunk:
491
+ think_chunk = f"<"
492
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
493
+ think_chunk = f"think"
494
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
495
+ think_chunk = f">\n"
496
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
497
+ first_reasoning_chunk = False
498
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
499
+
500
+ if delta.get("content") is not None:
501
+ if not first_reasoning_chunk:
502
+ reasoning_chunk = f"\n</think>\n"
503
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
504
+ first_reasoning_chunk = True
505
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
506
+
507
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
508
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
+ end_time = time.time()
511
+ first_token_time = (
512
+ first_chunk_time - start_time
513
+ if first_chunk_time else 0
514
+ )
515
+ total_time = end_time - start_time
516
+
517
+ prompt_tokens = 0
518
+ completion_tokens = 0
519
+ for line in full_response_content.splitlines():
520
+ if line.startswith("data:"):
521
+ line = line[5:].strip()
522
+ if line == "[DONE]":
523
+ continue
524
+ try:
525
+ response_json = json.loads(line)
526
+
527
+ if (
528
+ "usage" in response_json and
529
+ "completion_tokens" in response_json["usage"]
530
+ ):
531
+ completion_tokens += response_json[
532
+ "usage"
533
+ ]["completion_tokens"]
534
+ if (
535
+ "usage" in response_json and
536
+ "prompt_tokens" in response_json["usage"]
537
+ ):
538
+ prompt_tokens = response_json[
539
+ "usage"
540
+ ]["prompt_tokens"]
541
+
542
+ except (
543
+ KeyError,
544
+ ValueError,
545
+ IndexError
546
+ ) as e:
547
+ logging.error(
548
+ f"解析流式响应单行 JSON 失败: {e}, "
549
+ f"行内容: {line}"
550
+ )
551
+
552
+ user_content = ""
553
+ messages = data.get("messages", [])
554
+ for message in messages:
555
+ if message["role"] == "user":
556
+ if isinstance(message["content"], str):
557
+ user_content += message["content"] + " "
558
+ elif isinstance(message["content"], list):
559
+ for item in message["content"]:
560
+ if (
561
+ isinstance(item, dict) and
562
+ item.get("type") == "text"
563
+ ):
564
+ user_content += (
565
+ item.get("text", "") +
566
+ " "
567
+ )
568
+
569
+ user_content = user_content.strip()
570
+
571
+ user_content_replaced = user_content.replace(
572
+ '\n', '\\n'
573
+ ).replace('\r', '\\n')
574
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
575
+ response_content_replaced = response_content_replaced.replace(
576
+ '\n', '\\n'
577
+ ).replace('\r', '\\n')
578
+
579
+ logging.info(
580
+ f"使用的key: {api_key}, "
581
+ f"提示token: {prompt_tokens}, "
582
+ f"输出token: {completion_tokens}, "
583
+ f"首字用时: {first_token_time:.4f}秒, "
584
+ f"总共用时: {total_time:.4f}秒, "
585
+ f"使用的模型: {model_name}, "
586
+ f"用户的内容: {user_content_replaced}, "
587
+ f"输出的内容: {response_content_replaced}"
588
+ )
589
+
590
+ with data_lock:
591
+ request_timestamps.append(time.time())
592
+ token_counts.append(prompt_tokens + completion_tokens)
593
 
594
  yield "data: [DONE]\n\n"
595
+
596
  return Response(
597
  stream_with_context(generate()),
598
+ content_type="text/event-stream"
 
 
 
 
599
  )
600
 
601
  first_chunk_time = None