Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -67,7 +67,7 @@ def test_model_availability(api_key, model_name):
|
|
67 |
json={
|
68 |
"model": model_name,
|
69 |
"messages": [{"role": "user", "content": "hi"}],
|
70 |
-
"max_tokens":
|
71 |
"stream": False
|
72 |
},
|
73 |
timeout=10)
|
@@ -359,7 +359,6 @@ def handsome_chat_completions():
|
|
359 |
first_token_time = first_chunk_time - start_time if first_chunk_time else 0
|
360 |
total_time = end_time - start_time
|
361 |
|
362 |
-
# 处理流式响应,逐行解析 JSON
|
363 |
prompt_tokens = 0
|
364 |
completion_tokens = 0
|
365 |
response_content = ""
|
@@ -371,7 +370,6 @@ def handsome_chat_completions():
|
|
371 |
try:
|
372 |
response_json = json.loads(line)
|
373 |
|
374 |
-
# 提取信息,这里只累加 completion_tokens 和 content
|
375 |
if "usage" in response_json and "completion_tokens" in response_json["usage"]:
|
376 |
completion_tokens = response_json["usage"]["completion_tokens"]
|
377 |
|
@@ -384,7 +382,6 @@ def handsome_chat_completions():
|
|
384 |
except (KeyError, ValueError, IndexError) as e:
|
385 |
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
|
386 |
|
387 |
-
# 提取用户输入的内容
|
388 |
user_content = ""
|
389 |
messages = data.get("messages", [])
|
390 |
for message in messages:
|
@@ -392,19 +389,16 @@ def handsome_chat_completions():
|
|
392 |
user_content += message["content"] + " "
|
393 |
user_content = user_content.strip()
|
394 |
|
395 |
-
# 记录日志
|
396 |
logging.info(
|
397 |
f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: {first_token_time:.4f}秒, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content}, 输出的内容: {response_content}"
|
398 |
)
|
399 |
return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
|
400 |
else:
|
401 |
-
# 非流式响应处理... (保持原样)
|
402 |
response.raise_for_status()
|
403 |
end_time = time.time()
|
404 |
response_json = response.json()
|
405 |
total_time = end_time - start_time
|
406 |
|
407 |
-
# 从响应中提取信息
|
408 |
try:
|
409 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
410 |
completion_tokens = response_json["usage"]["completion_tokens"]
|
@@ -415,7 +409,6 @@ def handsome_chat_completions():
|
|
415 |
completion_tokens = 0
|
416 |
response_content = ""
|
417 |
|
418 |
-
# 提取用户输入的内容
|
419 |
user_content = ""
|
420 |
messages = data.get("messages", [])
|
421 |
for message in messages:
|
@@ -423,7 +416,6 @@ def handsome_chat_completions():
|
|
423 |
user_content += message["content"] + " "
|
424 |
user_content = user_content.strip()
|
425 |
|
426 |
-
# 记录日志
|
427 |
logging.info(
|
428 |
f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: 0, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content}, 输出的内容: {response_content}"
|
429 |
)
|
@@ -432,8 +424,6 @@ def handsome_chat_completions():
|
|
432 |
except requests.exceptions.RequestException as e:
|
433 |
return jsonify({"error": str(e)}), 500
|
434 |
|
435 |
-
|
436 |
-
|
437 |
@app.route('/handsome/v1/models', methods=['GET'])
|
438 |
def list_models():
|
439 |
if not check_authorization(request):
|
@@ -533,4 +523,4 @@ if __name__ == '__main__':
|
|
533 |
refresh_models()
|
534 |
logging.info("首次刷新模型列表已手动触发执行")
|
535 |
|
536 |
-
app.run(debug=False, host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
|
|
|
67 |
json={
|
68 |
"model": model_name,
|
69 |
"messages": [{"role": "user", "content": "hi"}],
|
70 |
+
"max_tokens": 5,
|
71 |
"stream": False
|
72 |
},
|
73 |
timeout=10)
|
|
|
359 |
first_token_time = first_chunk_time - start_time if first_chunk_time else 0
|
360 |
total_time = end_time - start_time
|
361 |
|
|
|
362 |
prompt_tokens = 0
|
363 |
completion_tokens = 0
|
364 |
response_content = ""
|
|
|
370 |
try:
|
371 |
response_json = json.loads(line)
|
372 |
|
|
|
373 |
if "usage" in response_json and "completion_tokens" in response_json["usage"]:
|
374 |
completion_tokens = response_json["usage"]["completion_tokens"]
|
375 |
|
|
|
382 |
except (KeyError, ValueError, IndexError) as e:
|
383 |
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
|
384 |
|
|
|
385 |
user_content = ""
|
386 |
messages = data.get("messages", [])
|
387 |
for message in messages:
|
|
|
389 |
user_content += message["content"] + " "
|
390 |
user_content = user_content.strip()
|
391 |
|
|
|
392 |
logging.info(
|
393 |
f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: {first_token_time:.4f}秒, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content}, 输出的内容: {response_content}"
|
394 |
)
|
395 |
return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
|
396 |
else:
|
|
|
397 |
response.raise_for_status()
|
398 |
end_time = time.time()
|
399 |
response_json = response.json()
|
400 |
total_time = end_time - start_time
|
401 |
|
|
|
402 |
try:
|
403 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
404 |
completion_tokens = response_json["usage"]["completion_tokens"]
|
|
|
409 |
completion_tokens = 0
|
410 |
response_content = ""
|
411 |
|
|
|
412 |
user_content = ""
|
413 |
messages = data.get("messages", [])
|
414 |
for message in messages:
|
|
|
416 |
user_content += message["content"] + " "
|
417 |
user_content = user_content.strip()
|
418 |
|
|
|
419 |
logging.info(
|
420 |
f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: 0, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content}, 输出的内容: {response_content}"
|
421 |
)
|
|
|
424 |
except requests.exceptions.RequestException as e:
|
425 |
return jsonify({"error": str(e)}), 500
|
426 |
|
|
|
|
|
427 |
@app.route('/handsome/v1/models', methods=['GET'])
|
428 |
def list_models():
|
429 |
if not check_authorization(request):
|
|
|
523 |
refresh_models()
|
524 |
logging.info("首次刷新模型列表已手动触发执行")
|
525 |
|
526 |
+
app.run(debug=False, host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
|