ds

Running

App Files Files Community

yangtb24 commited on Feb 4

Commit

0727ff4

verified ·

1 Parent(s): 8da261d

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -74

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import requests
 import json
 import concurrent.futures
 import threading
-import uuid
 from datetime import datetime, timedelta
 from apscheduler.schedulers.background import BackgroundScheduler
 from flask import Flask, request, jsonify, Response, stream_with_context
@@ -361,7 +360,7 @@ def handsome_chat_completions():
             headers=headers,
             json=data,
             stream=data.get("stream", False),
-            timeout=1200
         )
         if response.status_code == 429:
@@ -466,93 +465,137 @@ def handsome_chat_completions():
                         content_type=response.headers['Content-Type']
                     )
-                if model_name == "deepseek-reasoner-openwebui":
                     first_chunk_time = None
                     full_response_content = ""
                     reasoning_content_accumulated = ""
                     content_accumulated = ""
                     first_reasoning_chunk = True
-                    response_id = f"chatcmpl-{uuid.uuid4()}"
-                    created_time = int(time.time())
                     for chunk in response.iter_lines():
                         if chunk:
                             if first_chunk_time is None:
                                 first_chunk_time = time.time()
-                            chunk_str = chunk.decode("utf-8")
-                            full_response_content += chunk_str
-                            if chunk_str.startswith("data:"):
-                                try:
-                                    chunk_json = json.loads(chunk_str[5:].strip())
-                                    delta = chunk_json.get("choices", [{}])[0].get("delta", {})
-                                    openai_chunk = {
-                                        "id": response_id,
-                                        "object": "chat.completion.chunk",
-                                        "created": created_time,
-                                        "model": model_name,
-                                        "choices": [{
-                                            "index": 0,
-                                            "delta": {},
-                                            "finish_reason": None
-                                        }]
-                                    }
-                                    if delta.get("reasoning_content") is not None:
-                                        if first_reasoning_chunk:
-                                            first_chunk = openai_chunk.copy()
-                                            first_chunk["choices"][0]["delta"]["content"] = ""
-                                            first_chunk["choices"][0]["delta"]["reasoning_content"] = ""
-                                            first_chunk["choices"][0]["delta"]["role"] = "assistant"
-                                            yield f"data: {json.dumps(first_chunk)}\n\n"
-                                            openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
-                                            openai_chunk["choices"][0]["delta"]["content"] = "<"
-                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                            openai_chunk["choices"][0]["delta"]["content"] = "think"
-                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                            openai_chunk["choices"][0]["delta"]["content"] = ">\n"
-                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                            first_reasoning_chunk = False
-                                            # openai_chunk["choices"][0]["delta"]["content"] = "<think>\n"
-                                            # openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
-                                            # yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                            # first_reasoning_chunk = False
-                                        else:
-                                            openai_chunk["choices"][0]["delta"]["content"] = delta["reasoning_content"]
-                                            openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
-                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                            reasoning_content_accumulated += delta["reasoning_content"]
-                                    if delta.get("content") is not None:
-                                        if not first_reasoning_chunk:
-                                            openai_chunk["choices"][0]["delta"]["content"] = "\n</think>\n"
-                                            openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
-                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                            first_reasoning_chunk = True
-                                        openai_chunk["choices"][0]["delta"]["content"] = delta["content"]
-                                        openai_chunk["choices"][0]["delta"]["reasoning_content"] = None
-                                        yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                        content_accumulated += delta["content"]
-                                except (json.JSONDecodeError, KeyError) as e:
-                                    logging.error(f"Error parsing chunk: {e}")
                     yield "data: [DONE]\n\n"
                     return Response(
                         stream_with_context(generate()),
-                        mimetype="text/event-stream",
-                        headers={
-                            "X-Content-Type-Options": "nosniff",
-                            "Connection": "keep-alive"
-                        }
                     )
                 first_chunk_time = None

 import json
 import concurrent.futures
 import threading
 from datetime import datetime, timedelta
 from apscheduler.schedulers.background import BackgroundScheduler
 from flask import Flask, request, jsonify, Response, stream_with_context
             headers=headers,
             json=data,
             stream=data.get("stream", False),
+            timeout=120
         )
         if response.status_code == 429:
                         content_type=response.headers['Content-Type']
                     )
+                if model_name == "deepseek-reasoner-openwebui":
                     first_chunk_time = None
                     full_response_content = ""
                     reasoning_content_accumulated = ""
                     content_accumulated = ""
                     first_reasoning_chunk = True
                     for chunk in response.iter_lines():
                         if chunk:
                             if first_chunk_time is None:
                                 first_chunk_time = time.time()
+                            full_response_content += chunk.decode("utf-8")
+                            for line in chunk.decode("utf-8").splitlines():
+                                if line.startswith("data:"):
+                                    try:
+                                        chunk_json = json.loads(line.lstrip("data: ").strip())
+                                        if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
+                                            delta = chunk_json["choices"][0].get("delta", {})
+                                            if delta.get("reasoning_content") is not None:
+                                                reasoning_chunk = delta["reasoning_content"]
+                                                if first_reasoning_chunk:
+                                                    think_chunk = f"<"
+                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
+                                                    think_chunk = f"think"
+                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
+                                                    think_chunk = f">\n"
+                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
+                                                    first_reasoning_chunk = False
+                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
+                                            if delta.get("content") is not None:
+                                                if not first_reasoning_chunk:
+                                                    reasoning_chunk = f"\n</think>\n"
+                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
+                                                    first_reasoning_chunk = True
+                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
+                                    except (KeyError, ValueError, json.JSONDecodeError) as e:
+                                        continue
+                    end_time = time.time()
+                    first_token_time = (
+                        first_chunk_time - start_time
+                        if first_chunk_time else 0
+                    )
+                    total_time = end_time - start_time
+                    prompt_tokens = 0
+                    completion_tokens = 0
+                    for line in full_response_content.splitlines():
+                        if line.startswith("data:"):
+                            line = line[5:].strip()
+                            if line == "[DONE]":
+                                continue
+                            try:
+                                response_json = json.loads(line)
+                                if (
+                                    "usage" in response_json and
+                                    "completion_tokens" in response_json["usage"]
+                                ):
+                                    completion_tokens += response_json[
+                                        "usage"
+                                    ]["completion_tokens"]
+                                if (
+                                    "usage" in response_json and
+                                    "prompt_tokens" in response_json["usage"]
+                                ):
+                                    prompt_tokens = response_json[
+                                        "usage"
+                                    ]["prompt_tokens"]
+                            except (
+                                KeyError,
+                                ValueError,
+                                IndexError
+                            ) as e:
+                                logging.error(
+                                    f"解析流式响应单行 JSON 失败: {e}, "
+                                    f"行内容: {line}"
+                                )
+                    user_content = ""
+                    messages = data.get("messages", [])
+                    for message in messages:
+                        if message["role"] == "user":
+                            if isinstance(message["content"], str):
+                                user_content += message["content"] + " "
+                            elif isinstance(message["content"], list):
+                                for item in message["content"]:
+                                    if (
+                                        isinstance(item, dict) and
+                                        item.get("type") == "text"
+                                    ):
+                                        user_content += (
+                                            item.get("text", "") +
+                                            " "
+                                        )
+                    user_content = user_content.strip()
+                    user_content_replaced = user_content.replace(
+                        '\n', '\\n'
+                    ).replace('\r', '\\n')
+                    response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
+                    response_content_replaced = response_content_replaced.replace(
+                        '\n', '\\n'
+                    ).replace('\r', '\\n')
+                    logging.info(
+                        f"使用的key: {api_key}, "
+                        f"提示token: {prompt_tokens}, "
+                        f"输出token: {completion_tokens}, "
+                        f"首字用时: {first_token_time:.4f}秒, "
+                        f"总共用时: {total_time:.4f}秒, "
+                        f"使用的模型: {model_name}, "
+                        f"用户的内容: {user_content_replaced}, "
+                        f"输出的内容: {response_content_replaced}"
+                    )
+                    with data_lock:
+                        request_timestamps.append(time.time())
+                        token_counts.append(prompt_tokens + completion_tokens)
                     yield "data: [DONE]\n\n"
                     return Response(
                         stream_with_context(generate()),
+                        content_type="text/event-stream"
                     )
                 first_chunk_time = None