Spaces:

yangtb24
/

sone

Running

App Files Files Community

yangtb24 commited on 7 days ago

Commit

df482ac

•

1 Parent(s): 4078885

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -90

app.py CHANGED Viewed

@@ -1305,11 +1305,11 @@ def handsome_chat_completions():
         try:
             start_time = time.time()
             response = requests.post(
-                "https://api.siliconflow.cn/v1/chat/completions",
                 headers=headers,
                 json=data,
-                timeout=120,
-                stream=data.get("stream", False)
             )
             if response.status_code == 429:
@@ -1317,33 +1317,116 @@ def handsome_chat_completions():
             if data.get("stream", False):
                 def generate():
-                    try:
-                        response.raise_for_status()
-                        for chunk in response.iter_lines():
-                            if chunk:
-                                chunk = chunk.decode('utf-8')
-                                yield f"{chunk}\n\n".encode('utf-8')
-                    except requests.exceptions.RequestException as e:
-                        logging.error(f"请求转发异常: {e}")
-                        error_chunk_data = {
-                            "id": f"chatcmpl-{uuid.uuid4()}",
-                            "object": "chat.completion.chunk",
-                            "created": int(time.time()),
-                            "model": model_name,
-                            "choices": [
-                                {
-                                    "index": 0,
-                                    "delta": {
-                                        "role": "assistant",
-                                        "content": "Failed to process data"
-                                    },
-                                    "finish_reason": "stop"
-                                }
-                            ]
-                         }
-                        yield f"data: {json.dumps(error_chunk_data)}\n\n".encode('utf-8')
-                        yield "data: [DONE]\n\n".encode('utf-8')
-                return Response(stream_with_context(generate()), content_type='text/event-stream')
             else:
                 response.raise_for_status()
                 end_time = time.time()
@@ -1351,74 +1434,66 @@ def handsome_chat_completions():
                 total_time = end_time - start_time
                 try:
-                    choices = response_json.get("choices", [])
-                    if choices and isinstance(choices[0], dict):
-                         message = choices[0].get("message",{})
-                         content = message.get("content")
-                         response_data = {
-                            "id": f"chatcmpl-{uuid.uuid4()}",
-                            "object": "chat.completion",
-                            "created": int(time.time()),
-                            "model": model_name,
-                             "choices": [
-                                    {
-                                        "index": 0,
-                                        "message": {
-                                            "role": "assistant",
-                                            "content": content
-                                        },
-                                        "finish_reason": "stop"
-                                    }
-                             ],
-                         }
-                    else:
-                        response_data = {
-                            "id": f"chatcmpl-{uuid.uuid4()}",
-                            "object": "chat.completion",
-                            "created": int(time.time()),
-                            "model": model_name,
-                            "choices": [
-                                    {
-                                        "index": 0,
-                                        "message": {
-                                            "role": "assistant",
-                                            "content": "No response content"
-                                        },
-                                        "finish_reason": "stop"
-                                    }
-                            ]
-                        }
                 except (KeyError, ValueError, IndexError) as e:
                     logging.error(
-                        f"解析响应 JSON 失败: {e}, "
                         f"完整内容: {response_json}"
                     )
-                    response_data = {
-                        "id": f"chatcmpl-{uuid.uuid4()}",
-                        "object": "chat.completion",
-                        "created": int(time.time()),
-                        "model": model_name,
-                        "choices": [
-                            {
-                                "index": 0,
-                                "message": {
-                                    "role": "assistant",
-                                    "content": "Failed to process data"
-                                },
-                                "finish_reason": "stop"
-                            }
-                        ]
-                    }
                 logging.info(
-                   f"使用的key: {api_key}, "
-                   f"总共用时: {total_time:.4f}秒, "
-                   f"使用的模型: {model_name}"
                 )
                 with data_lock:
                     request_timestamps.append(time.time())
-                    token_counts.append(0)
-                return jsonify(response_data)
         except requests.exceptions.RequestException as e:
             logging.error(f"请求转发异常: {e}")
             return jsonify({"error": str(e)}), 500

         try:
             start_time = time.time()
             response = requests.post(
+                TEST_MODEL_ENDPOINT,
                 headers=headers,
                 json=data,
+                stream=data.get("stream", False),
+                timeout=60
             )
             if response.status_code == 429:
             if data.get("stream", False):
                 def generate():
+                    first_chunk_time = None
+                    full_response_content = ""
+                    for chunk in response.iter_content(chunk_size=1024):
+                        if chunk:
+                            if first_chunk_time is None:
+                                first_chunk_time = time.time()
+                            full_response_content += chunk.decode("utf-8")
+                            yield chunk
+                    end_time = time.time()
+                    first_token_time = (
+                        first_chunk_time - start_time
+                        if first_chunk_time else 0
+                    )
+                    total_time = end_time - start_time
+                    prompt_tokens = 0
+                    completion_tokens = 0
+                    response_content = ""
+                    for line in full_response_content.splitlines():
+                        if line.startswith("data:"):
+                            line = line[5:].strip()
+                            if line == "[DONE]":
+                                continue
+                            try:
+                                response_json = json.loads(line)
+                                if (
+                                    "usage" in response_json and
+                                    "completion_tokens" in response_json["usage"]
+                                ):
+                                    completion_tokens = response_json[
+                                        "usage"
+                                    ]["completion_tokens"]
+                                if (
+                                    "choices" in response_json and
+                                    len(response_json["choices"]) > 0 and
+                                    "delta" in response_json["choices"][0] and
+                                    "content" in response_json[
+                                        "choices"
+                                    ][0]["delta"]
+                                ):
+                                    response_content += response_json[
+                                        "choices"
+                                    ][0]["delta"]["content"]
+                                if (
+                                    "usage" in response_json and
+                                    "prompt_tokens" in response_json["usage"]
+                                ):
+                                    prompt_tokens = response_json[
+                                        "usage"
+                                    ]["prompt_tokens"]
+                            except (
+                                KeyError,
+                                ValueError,
+                                IndexError
+                            ) as e:
+                                logging.error(
+                                    f"解析流式响应单行 JSON 失败: {e}, "
+                                    f"行内容: {line}"
+                                )
+                    user_content = ""
+                    messages = data.get("messages", [])
+                    for message in messages:
+                        if message["role"] == "user":
+                            if isinstance(message["content"], str):
+                                user_content += message["content"] + " "
+                            elif isinstance(message["content"], list):
+                                for item in message["content"]:
+                                    if (
+                                        isinstance(item, dict) and
+                                        item.get("type") == "text"
+                                    ):
+                                        user_content += (
+                                            item.get("text", "") +
+                                            " "
+                                        )
+                    user_content = user_content.strip()
+                    user_content_replaced = user_content.replace(
+                        '\n', '\\n'
+                    ).replace('\r', '\\n')
+                    response_content_replaced = response_content.replace(
+                        '\n', '\\n'
+                    ).replace('\r', '\\n')
+                    logging.info(
+                        f"使用的key: {api_key}, "
+                        f"提示token: {prompt_tokens}, "
+                        f"输出token: {completion_tokens}, "
+                        f"首字用时: {first_token_time:.4f}秒, "
+                        f"总共用时: {total_time:.4f}秒, "
+                        f"使用的模型: {model_name}, "
+                        f"用户的内容: {user_content_replaced}, "
+                        f"输出的内容: {response_content_replaced}"
+                    )
+                    with data_lock:
+                        request_timestamps.append(time.time())
+                        token_counts.append(prompt_tokens+completion_tokens)
+                return Response(
+                    stream_with_context(generate()),
+                    content_type=response.headers['Content-Type']
+                )
             else:
                 response.raise_for_status()
                 end_time = time.time()
                 total_time = end_time - start_time
                 try:
+                    prompt_tokens = response_json["usage"]["prompt_tokens"]
+                    completion_tokens = response_json[
+                        "usage"
+                    ]["completion_tokens"]
+                    response_content = response_json[
+                        "choices"
+                    ][0]["message"]["content"]
                 except (KeyError, ValueError, IndexError) as e:
                     logging.error(
+                        f"解析非流式响应 JSON 失败: {e}, "
                         f"完整内容: {response_json}"
                     )
+                    prompt_tokens = 0
+                    completion_tokens = 0
+                    response_content = ""
+                user_content = ""
+                messages = data.get("messages", [])
+                for message in messages:
+                    if message["role"] == "user":
+                        if isinstance(message["content"], str):
+                            user_content += message["content"] + " "
+                        elif isinstance(message["content"], list):
+                            for item in message["content"]:
+                                if (
+                                    isinstance(item, dict) and
+                                    item.get("type") == "text"
+                                ):
+                                    user_content += (
+                                        item.get("text", "") + " "
+                                    )
+                user_content = user_content.strip()
+                user_content_replaced = user_content.replace(
+                    '\n', '\\n'
+                ).replace('\r', '\\n')
+                response_content_replaced = response_content.replace(
+                    '\n', '\\n'
+                ).replace('\r', '\\n')
                 logging.info(
+                    f"使用的key: {api_key}, "
+                    f"提示token: {prompt_tokens}, "
+                    f"输出token: {completion_tokens}, "
+                    f"首字用时: 0, "
+                    f"总共用时: {total_time:.4f}秒, "
+                    f"使用的模型: {model_name}, "
+                    f"用户的内容: {user_content_replaced}, "
+                    f"输出的内容: {response_content_replaced}"
                 )
                 with data_lock:
                     request_timestamps.append(time.time())
+                    if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
+                        token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
+                    else:
+                        token_counts.append(0)
+                return jsonify(response_json)
         except requests.exceptions.RequestException as e:
             logging.error(f"请求转发异常: {e}")
             return jsonify({"error": str(e)}), 500