ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 20

Commit

868e37f

verified ·

1 Parent(s): ae93e33

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -136

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
         if not data.get("is_available", False):
             logging.warning(f"API Key: {api_key} is not available.")
             return None
         balance_infos = data.get("balance_infos", [])
         total_balance_cny = 0.0
         usd_balance = 0.0
@@ -270,12 +270,12 @@ def check_tokens():
                 )
     return jsonify(results)
 @app.route('/handsome/v1/models', methods=['GET'])
 def list_models():
     if not check_authorization(request):
         return jsonify({"error": "Unauthorized"}), 401
     detailed_models = [
         {
             "id": "deepseek-chat",
@@ -431,139 +431,15 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
-                reasoning_content_accumulated = ""
-                content_accumulated = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        try:
-                            for line in chunk.decode("utf-8").splitlines():
-                                if line.startswith("data:"):
-                                    line = line[5:].strip()
-                                    if line == "[DONE]":
-                                        continue
-                                    try:
-                                        response_json = json.loads(line)
-                                        if (
-                                            "usage" in response_json and
-                                            "completion_tokens" in response_json["usage"]
-                                        ):
-                                            completion_tokens = response_json[
-                                                "usage"
-                                            ]["completion_tokens"]
-                                        # Special handling for deepseek-reasoner in streaming mode
-                                        if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
-                                            delta = response_json["choices"][0].get("delta", {})
-                                            if "reasoning_content" in delta and delta["reasoning_content"]:
-                                                reasoning_content = delta["reasoning_content"]
-                                                formatted_reasoning_chunk = {
-                                                    "id": response_json.get("id", ""),
-                                                    "object": "chat.completion.chunk",
-                                                    "created": response_json.get("created", int(time.time())),
-                                                    "model": model_name,
-                                                    "choices": [
-                                                        {
-                                                            "index": 0,
-                                                            "delta": {
-                                                                "content": f"```Thinking\n{reasoning_content}\n```",
-                                                            },
-                                                            "finish_reason": None
-                                                        }
-                                                    ],
-                                                    "usage": None,
-                                                }
-                                                yield f"data: {json.dumps(formatted_reasoning_chunk)}\n\n".encode('utf-8')
-                                            if "content" in delta and delta["content"]:
-                                                content = delta["content"]
-                                                formatted_content_chunk = {
-                                                    "id": response_json.get("id", ""),
-                                                    "object": "chat.completion.chunk",
-                                                    "created": response_json.get("created", int(time.time())),
-                                                    "model": model_name,
-                                                    "choices": [
-                                                        {
-                                                            "index": 0,
-                                                            "delta": {
-                                                                "content": content,
-                                                            },
-                                                            "finish_reason": None
-                                                        }
-                                                    ],
-                                                    "usage": None,
-                                                }
-                                                yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
-                                        elif "choices" in response_json and len(response_json["choices"]) > 0:
-                                            # Handle other models normally
-                                            delta = response_json["choices"][0].get("delta", {})
-                                            if "content" in delta and delta["content"]:
-                                                formatted_content_chunk = {
-                                                    "id": response_json.get("id", ""),
-                                                    "object": "chat.completion.chunk",
-                                                    "created": response_json.get("created", int(time.time())),
-                                                    "model": model_name,
-                                                    "choices": [
-                                                        {
-                                                            "index": 0,
-                                                            "delta": {
-                                                                "content": delta["content"],
-                                                            },
-                                                            "finish_reason": None
-                                                        }
-                                                    ],
-                                                    "usage": None,
-                                                }
-                                                yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
-                                        if (
-                                            "usage" in response_json and
-                                            "prompt_tokens" in response_json["usage"]
-                                        ):
-                                            prompt_tokens = response_json[
-                                                "usage"
-                                            ]["prompt_tokens"]
-                                    except (
-                                        KeyError,
-                                        ValueError,
-                                        IndexError
-                                    ) as e:
-                                        logging.error(
-                                            f"解析流式响应单行 JSON 失败: {e}, "
-                                            f"行内容: {line}"
-                                        )
-                        except Exception as e:
-                             logging.error(f"处理流式响应失败：{e}")
-                # Send the [DONE] message after all chunks have been processed
-                done_chunk = {
-                    "id": response_json.get("id", ""),
-                    "object": "chat.completion.chunk",
-                    "created": response_json.get("created", int(time.time())),
-                    "model": model_name,
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {},
-                            "finish_reason": "stop"
-                        }
-                    ],
-                    "usage": {
-                        "completion_tokens": completion_tokens,
-                        "prompt_tokens": prompt_tokens,
-                        "total_tokens": prompt_tokens + completion_tokens
-                    },
-                }
-                yield f"data: {json.dumps(done_chunk)}\n\n".encode('utf-8')
                 end_time = time.time()
                 first_token_time = (
@@ -572,6 +448,61 @@ def handsome_chat_completions():
                 )
                 total_time = end_time - start_time
                 user_content = ""
                 messages = data.get("messages", [])
@@ -595,7 +526,10 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, "
                     f"提示token: {prompt_tokens}, "
@@ -603,16 +537,21 @@ def handsome_chat_completions():
                     f"首字用时: {first_token_time:.4f}秒, "
                     f"总共用时: {total_time:.4f}秒, "
                     f"使用的模型: {model_name}, "
-                    f"用户的内容: {user_content_replaced}"
                 )
                 with data_lock:
                     request_timestamps.append(time.time())
                     token_counts.append(prompt_tokens + completion_tokens)
             return Response(
                 stream_with_context(generate()),
-                content_type=response.headers['Content-Type']
             )
         else:
             # ... (Non-streaming part remains the same as in the previous response)
@@ -631,8 +570,7 @@ def handsome_chat_completions():
                     choice = response_json["choices"][0]
                     if "message" in choice:
                         if "reasoning_content" in choice["message"]:
-                            reasoning_lines = choice["message"]["reasoning_content"].splitlines()
-                            formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
                             response_content += formatted_reasoning + "\n"
                         if "content" in choice["message"]:
                             response_content += choice["message"]["content"]

         if not data.get("is_available", False):
             logging.warning(f"API Key: {api_key} is not available.")
             return None
         balance_infos = data.get("balance_infos", [])
         total_balance_cny = 0.0
         usd_balance = 0.0
                 )
     return jsonify(results)
 @app.route('/handsome/v1/models', methods=['GET'])
 def list_models():
     if not check_authorization(request):
         return jsonify({"error": "Unauthorized"}), 401
     detailed_models = [
         {
             "id": "deepseek-chat",
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
+                reasoning_content_accumulated = ""  # Accumulate reasoning content
+                content_accumulated = ""  # Accumulate regular content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        yield chunk
                 end_time = time.time()
                 first_token_time = (
                 )
                 total_time = end_time - start_time
+                prompt_tokens = 0
+                completion_tokens = 0
+                for line in full_response_content.splitlines():
+                    if line.startswith("data:"):
+                        line = line[5:].strip()
+                        if line == "[DONE]":
+                            continue
+                        try:
+                            response_json = json.loads(line)
+                            if (
+                                "usage" in response_json and
+                                "completion_tokens" in response_json["usage"]
+                            ):
+                                completion_tokens += response_json[
+                                    "usage"
+                                ]["completion_tokens"]
+                            # Special handling for deepseek-reasoner in streaming mode
+                            if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
+                                delta = response_json["choices"][0].get("delta", {})
+                                if "reasoning_content" in delta:
+                                    reasoning_content_accumulated += delta["reasoning_content"]
+                                if "content" in delta:
+                                    content_accumulated += delta["content"]
+                            elif "choices" in response_json and len(response_json["choices"]) > 0:
+                                # Handle other models normally
+                                delta = response_json["choices"][0].get("delta", {})
+                                if "content" in delta:
+                                    content_accumulated += delta["content"]
+                            if (
+                                "usage" in response_json and
+                                "prompt_tokens" in response_json["usage"]
+                            ):
+                                prompt_tokens = response_json[
+                                    "usage"
+                                ]["prompt_tokens"]
+                        except (
+                            KeyError,
+                            ValueError,
+                            IndexError
+                        ) as e:
+                            logging.error(
+                                f"解析流式响应单行 JSON 失败: {e}, "
+                                f"行内容: {line}"
+                            )
+                # Format the accumulated reasoning content after processing all chunks
+                if model_name == "deepseek-reasoner":
+                    formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
+                    response_content = formatted_reasoning + "\n" + content_accumulated
+                else:
+                    response_content = content_accumulated
                 user_content = ""
                 messages = data.get("messages", [])
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = response_content.replace(
+                    '\n', '\\n'
+                ).replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, "
                     f"提示token: {prompt_tokens}, "
                     f"首字用时: {first_token_time:.4f}秒, "
                     f"总共用时: {total_time:.4f}秒, "
                     f"使用的模型: {model_name}, "
+                    f"用户的内容: {user_content_replaced}, "
+                    f"输出的内容: {response_content_replaced}"
                 )
                 with data_lock:
                     request_timestamps.append(time.time())
                     token_counts.append(prompt_tokens + completion_tokens)
+                yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
+                yield "data: [DONE]\n\n"
             return Response(
                 stream_with_context(generate()),
+                content_type="text/event-stream"
             )
         else:
             # ... (Non-streaming part remains the same as in the previous response)
                     choice = response_json["choices"][0]
                     if "message" in choice:
                         if "reasoning_content" in choice["message"]:
+                            formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
                             response_content += formatted_reasoning + "\n"
                         if "content" in choice["message"]:
                             response_content += choice["message"]["content"]