Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -308,6 +308,8 @@ def check_tokens():
|
|
308 |
|
309 |
return jsonify(results)
|
310 |
|
|
|
|
|
311 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
312 |
def handsome_chat_completions():
|
313 |
if not check_authorization(request):
|
@@ -357,18 +359,31 @@ def handsome_chat_completions():
|
|
357 |
first_token_time = first_chunk_time - start_time if first_chunk_time else 0
|
358 |
total_time = end_time - start_time
|
359 |
|
360 |
-
#
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
# 提取用户输入的内容
|
373 |
user_content = ""
|
374 |
messages = data.get("messages", [])
|
@@ -383,6 +398,7 @@ def handsome_chat_completions():
|
|
383 |
)
|
384 |
return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
|
385 |
else:
|
|
|
386 |
response.raise_for_status()
|
387 |
end_time = time.time()
|
388 |
response_json = response.json()
|
@@ -417,6 +433,7 @@ def handsome_chat_completions():
|
|
417 |
return jsonify({"error": str(e)}), 500
|
418 |
|
419 |
|
|
|
420 |
@app.route('/handsome/v1/models', methods=['GET'])
|
421 |
def list_models():
|
422 |
if not check_authorization(request):
|
|
|
308 |
|
309 |
return jsonify(results)
|
310 |
|
311 |
+
import json
|
312 |
+
|
313 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
314 |
def handsome_chat_completions():
|
315 |
if not check_authorization(request):
|
|
|
359 |
first_token_time = first_chunk_time - start_time if first_chunk_time else 0
|
360 |
total_time = end_time - start_time
|
361 |
|
362 |
+
# 处理流式响应,逐行解析 JSON
|
363 |
+
prompt_tokens = 0
|
364 |
+
completion_tokens = 0
|
365 |
+
response_content = ""
|
366 |
+
for line in full_response_content.splitlines():
|
367 |
+
if line.startswith("data:"):
|
368 |
+
line = line[5:].strip()
|
369 |
+
if line == "[DONE]":
|
370 |
+
continue
|
371 |
+
try:
|
372 |
+
response_json = json.loads(line)
|
373 |
+
|
374 |
+
# 提取信息,这里只累加 completion_tokens 和 content
|
375 |
+
if "usage" in response_json and "completion_tokens" in response_json["usage"]:
|
376 |
+
completion_tokens = response_json["usage"]["completion_tokens"]
|
377 |
+
|
378 |
+
if "choices" in response_json and len(response_json["choices"]) > 0 and "delta" in response_json["choices"][0] and "content" in response_json["choices"][0]["delta"]:
|
379 |
+
response_content += response_json["choices"][0]["delta"]["content"]
|
380 |
+
|
381 |
+
if "usage" in response_json and "prompt_tokens" in response_json["usage"]:
|
382 |
+
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
383 |
+
|
384 |
+
except (KeyError, ValueError, IndexError) as e:
|
385 |
+
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
|
386 |
+
|
387 |
# 提取用户输入的内容
|
388 |
user_content = ""
|
389 |
messages = data.get("messages", [])
|
|
|
398 |
)
|
399 |
return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
|
400 |
else:
|
401 |
+
# 非流式响应处理... (保持原样)
|
402 |
response.raise_for_status()
|
403 |
end_time = time.time()
|
404 |
response_json = response.json()
|
|
|
433 |
return jsonify({"error": str(e)}), 500
|
434 |
|
435 |
|
436 |
+
|
437 |
@app.route('/handsome/v1/models', methods=['GET'])
|
438 |
def list_models():
|
439 |
if not check_authorization(request):
|