Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -547,8 +547,8 @@ def check_tokens():
|
|
547 |
return jsonify(results)
|
548 |
|
549 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
550 |
-
def handsome_chat_completions(
|
551 |
-
if not check_authorization(request
|
552 |
return jsonify({"error": "Unauthorized"}), 401
|
553 |
|
554 |
data = request.get_json()
|
@@ -556,13 +556,13 @@ def handsome_chat_completions(request, authorization_key):
|
|
556 |
return jsonify({"error": "Invalid request data"}), 400
|
557 |
|
558 |
model_name = data['model']
|
559 |
-
|
560 |
request_type = determine_request_type(
|
561 |
model_name,
|
562 |
text_models + image_models,
|
563 |
free_text_models + free_image_models
|
564 |
)
|
565 |
-
|
566 |
api_key = select_key(request_type, model_name)
|
567 |
|
568 |
if not api_key:
|
@@ -580,7 +580,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
580 |
"Authorization": f"Bearer {api_key}",
|
581 |
"Content-Type": "application/json"
|
582 |
}
|
583 |
-
|
584 |
if model_name in image_models:
|
585 |
# Handle image generation
|
586 |
# Map OpenAI-style parameters to SiliconFlow's parameters
|
@@ -595,7 +595,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
595 |
"seed": data.get("seed"),
|
596 |
"prompt_enhancement": False,
|
597 |
}
|
598 |
-
|
599 |
# Parameter validation and adjustments
|
600 |
if siliconflow_data["batch_size"] < 1:
|
601 |
siliconflow_data["batch_size"] = 1
|
@@ -606,12 +606,12 @@ def handsome_chat_completions(request, authorization_key):
|
|
606 |
siliconflow_data["num_inference_steps"] = 1
|
607 |
if siliconflow_data["num_inference_steps"] > 50:
|
608 |
siliconflow_data["num_inference_steps"] = 50
|
609 |
-
|
610 |
if siliconflow_data["guidance_scale"] < 0:
|
611 |
siliconflow_data["guidance_scale"] = 0
|
612 |
if siliconflow_data["guidance_scale"] > 100:
|
613 |
siliconflow_data["guidance_scale"] = 100
|
614 |
-
|
615 |
if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
|
616 |
siliconflow_data["image_size"] = "1024x1024"
|
617 |
|
@@ -623,7 +623,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
623 |
json=siliconflow_data,
|
624 |
timeout=120
|
625 |
)
|
626 |
-
|
627 |
if response.status_code == 429:
|
628 |
return jsonify(response.json()), 429
|
629 |
|
@@ -631,10 +631,10 @@ def handsome_chat_completions(request, authorization_key):
|
|
631 |
end_time = time.time()
|
632 |
response_json = response.json()
|
633 |
total_time = end_time - start_time
|
634 |
-
|
635 |
try:
|
636 |
images = response_json.get("images", [])
|
637 |
-
|
638 |
# Extract the first URL if available
|
639 |
image_url = ""
|
640 |
if images and isinstance(images[0], dict) and "url" in images[0]:
|
@@ -643,7 +643,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
643 |
elif images and isinstance(images[0], str):
|
644 |
image_url = images[0]
|
645 |
logging.info(f"Extracted image URL: {image_url}")
|
646 |
-
|
647 |
# Construct the expected JSON output - Mimicking OpenAI
|
648 |
response_data = {
|
649 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
@@ -656,10 +656,21 @@ def handsome_chat_completions(request, authorization_key):
|
|
656 |
"index": 0,
|
657 |
"message": {
|
658 |
"role": "assistant",
|
659 |
-
"content": None,
|
660 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
},
|
662 |
-
"finish_reason": "
|
663 |
}
|
664 |
],
|
665 |
"usage": { # Added usage
|
@@ -668,9 +679,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
668 |
"total_tokens": 0
|
669 |
}
|
670 |
}
|
671 |
-
|
672 |
-
response_data["choices"][0]["message"]["content"] = "Failed to generate image"
|
673 |
-
|
674 |
except (KeyError, ValueError, IndexError) as e:
|
675 |
logging.error(
|
676 |
f"解析响应 JSON 失败: {e}, "
|
@@ -688,7 +697,6 @@ def handsome_chat_completions(request, authorization_key):
|
|
688 |
"message": {
|
689 |
"role": "assistant",
|
690 |
"content": "Failed to process image data",
|
691 |
-
"image_url": None,
|
692 |
},
|
693 |
"finish_reason": "stop",
|
694 |
}
|
@@ -706,6 +714,10 @@ def handsome_chat_completions(request, authorization_key):
|
|
706 |
f"使用的模型: {model_name}"
|
707 |
)
|
708 |
|
|
|
|
|
|
|
|
|
709 |
return jsonify(response_data)
|
710 |
except requests.exceptions.RequestException as e:
|
711 |
logging.error(f"请求转发异常: {e}")
|
@@ -718,69 +730,191 @@ def handsome_chat_completions(request, authorization_key):
|
|
718 |
TEST_MODEL_ENDPOINT,
|
719 |
headers=headers,
|
720 |
json=data,
|
721 |
-
stream=False,
|
722 |
timeout=60
|
723 |
)
|
724 |
if response.status_code == 429:
|
725 |
return jsonify(response.json()), 429
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
762 |
)
|
763 |
|
764 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
response_content_replaced = response_content.replace(
|
770 |
-
'\n', '\\n'
|
771 |
-
).replace('\r', '\\n')
|
772 |
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
784 |
|
785 |
except requests.exceptions.RequestException as e:
|
786 |
logging.error(f"请求转发异常: {e}")
|
|
|
547 |
return jsonify(results)
|
548 |
|
549 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
550 |
+
def handsome_chat_completions():
|
551 |
+
if not check_authorization(request):
|
552 |
return jsonify({"error": "Unauthorized"}), 401
|
553 |
|
554 |
data = request.get_json()
|
|
|
556 |
return jsonify({"error": "Invalid request data"}), 400
|
557 |
|
558 |
model_name = data['model']
|
559 |
+
|
560 |
request_type = determine_request_type(
|
561 |
model_name,
|
562 |
text_models + image_models,
|
563 |
free_text_models + free_image_models
|
564 |
)
|
565 |
+
|
566 |
api_key = select_key(request_type, model_name)
|
567 |
|
568 |
if not api_key:
|
|
|
580 |
"Authorization": f"Bearer {api_key}",
|
581 |
"Content-Type": "application/json"
|
582 |
}
|
583 |
+
|
584 |
if model_name in image_models:
|
585 |
# Handle image generation
|
586 |
# Map OpenAI-style parameters to SiliconFlow's parameters
|
|
|
595 |
"seed": data.get("seed"),
|
596 |
"prompt_enhancement": False,
|
597 |
}
|
598 |
+
|
599 |
# Parameter validation and adjustments
|
600 |
if siliconflow_data["batch_size"] < 1:
|
601 |
siliconflow_data["batch_size"] = 1
|
|
|
606 |
siliconflow_data["num_inference_steps"] = 1
|
607 |
if siliconflow_data["num_inference_steps"] > 50:
|
608 |
siliconflow_data["num_inference_steps"] = 50
|
609 |
+
|
610 |
if siliconflow_data["guidance_scale"] < 0:
|
611 |
siliconflow_data["guidance_scale"] = 0
|
612 |
if siliconflow_data["guidance_scale"] > 100:
|
613 |
siliconflow_data["guidance_scale"] = 100
|
614 |
+
|
615 |
if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
|
616 |
siliconflow_data["image_size"] = "1024x1024"
|
617 |
|
|
|
623 |
json=siliconflow_data,
|
624 |
timeout=120
|
625 |
)
|
626 |
+
|
627 |
if response.status_code == 429:
|
628 |
return jsonify(response.json()), 429
|
629 |
|
|
|
631 |
end_time = time.time()
|
632 |
response_json = response.json()
|
633 |
total_time = end_time - start_time
|
634 |
+
|
635 |
try:
|
636 |
images = response_json.get("images", [])
|
637 |
+
|
638 |
# Extract the first URL if available
|
639 |
image_url = ""
|
640 |
if images and isinstance(images[0], dict) and "url" in images[0]:
|
|
|
643 |
elif images and isinstance(images[0], str):
|
644 |
image_url = images[0]
|
645 |
logging.info(f"Extracted image URL: {image_url}")
|
646 |
+
|
647 |
# Construct the expected JSON output - Mimicking OpenAI
|
648 |
response_data = {
|
649 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
|
|
656 |
"index": 0,
|
657 |
"message": {
|
658 |
"role": "assistant",
|
659 |
+
"content": None, # set to None as image is in tool_calls
|
660 |
+
"tool_calls": [
|
661 |
+
{
|
662 |
+
"id": f"call_{uuid.uuid4()}",
|
663 |
+
"type": "function",
|
664 |
+
"function": {
|
665 |
+
"name": "image_generation",
|
666 |
+
"arguments": json.dumps({
|
667 |
+
"image_url": image_url
|
668 |
+
})
|
669 |
+
}
|
670 |
+
}
|
671 |
+
]
|
672 |
},
|
673 |
+
"finish_reason": "tool_calls",
|
674 |
}
|
675 |
],
|
676 |
"usage": { # Added usage
|
|
|
679 |
"total_tokens": 0
|
680 |
}
|
681 |
}
|
682 |
+
|
|
|
|
|
683 |
except (KeyError, ValueError, IndexError) as e:
|
684 |
logging.error(
|
685 |
f"解析响应 JSON 失败: {e}, "
|
|
|
697 |
"message": {
|
698 |
"role": "assistant",
|
699 |
"content": "Failed to process image data",
|
|
|
700 |
},
|
701 |
"finish_reason": "stop",
|
702 |
}
|
|
|
714 |
f"使用的模型: {model_name}"
|
715 |
)
|
716 |
|
717 |
+
with data_lock:
|
718 |
+
request_timestamps.append(time.time())
|
719 |
+
token_counts.append(0) # Image generation doesn't use tokens
|
720 |
+
|
721 |
return jsonify(response_data)
|
722 |
except requests.exceptions.RequestException as e:
|
723 |
logging.error(f"请求转发异常: {e}")
|
|
|
730 |
TEST_MODEL_ENDPOINT,
|
731 |
headers=headers,
|
732 |
json=data,
|
733 |
+
stream=data.get("stream", False),
|
734 |
timeout=60
|
735 |
)
|
736 |
if response.status_code == 429:
|
737 |
return jsonify(response.json()), 429
|
738 |
+
|
739 |
+
if data.get("stream", False):
|
740 |
+
def generate():
|
741 |
+
first_chunk_time = None
|
742 |
+
full_response_content = ""
|
743 |
+
for chunk in response.iter_content(chunk_size=1024):
|
744 |
+
if chunk:
|
745 |
+
if first_chunk_time is None:
|
746 |
+
first_chunk_time = time.time()
|
747 |
+
full_response_content += chunk.decode("utf-8")
|
748 |
+
yield chunk
|
749 |
+
|
750 |
+
end_time = time.time()
|
751 |
+
first_token_time = (
|
752 |
+
first_chunk_time - start_time
|
753 |
+
if first_chunk_time else 0
|
754 |
+
)
|
755 |
+
total_time = end_time - start_time
|
756 |
+
|
757 |
+
prompt_tokens = 0
|
758 |
+
completion_tokens = 0
|
759 |
+
response_content = ""
|
760 |
+
for line in full_response_content.splitlines():
|
761 |
+
if line.startswith("data:"):
|
762 |
+
line = line[5:].strip()
|
763 |
+
if line == "[DONE]":
|
764 |
+
continue
|
765 |
+
try:
|
766 |
+
response_json = json.loads(line)
|
767 |
+
|
768 |
+
if (
|
769 |
+
"usage" in response_json and
|
770 |
+
"completion_tokens" in response_json["usage"]
|
771 |
+
):
|
772 |
+
completion_tokens = response_json[
|
773 |
+
"usage"
|
774 |
+
]["completion_tokens"]
|
775 |
+
|
776 |
+
if (
|
777 |
+
"choices" in response_json and
|
778 |
+
len(response_json["choices"]) > 0 and
|
779 |
+
"delta" in response_json["choices"][0] and
|
780 |
+
"content" in response_json[
|
781 |
+
"choices"
|
782 |
+
][0]["delta"]
|
783 |
+
):
|
784 |
+
response_content += response_json[
|
785 |
+
"choices"
|
786 |
+
][0]["delta"]["content"]
|
787 |
+
|
788 |
+
if (
|
789 |
+
"usage" in response_json and
|
790 |
+
"prompt_tokens" in response_json["usage"]
|
791 |
+
):
|
792 |
+
prompt_tokens = response_json[
|
793 |
+
"usage"
|
794 |
+
]["prompt_tokens"]
|
795 |
+
|
796 |
+
except (
|
797 |
+
KeyError,
|
798 |
+
ValueError,
|
799 |
+
IndexError
|
800 |
+
) as e:
|
801 |
+
logging.error(
|
802 |
+
f"解析流式响应单行 JSON 失败: {e}, "
|
803 |
+
f"行内容: {line}"
|
804 |
)
|
805 |
|
806 |
+
user_content = ""
|
807 |
+
messages = data.get("messages", [])
|
808 |
+
for message in messages:
|
809 |
+
if message["role"] == "user":
|
810 |
+
if isinstance(message["content"], str):
|
811 |
+
user_content += message["content"] + " "
|
812 |
+
elif isinstance(message["content"], list):
|
813 |
+
for item in message["content"]:
|
814 |
+
if (
|
815 |
+
isinstance(item, dict) and
|
816 |
+
item.get("type") == "text"
|
817 |
+
):
|
818 |
+
user_content += (
|
819 |
+
item.get("text", "") +
|
820 |
+
" "
|
821 |
+
)
|
822 |
+
|
823 |
+
user_content = user_content.strip()
|
824 |
+
|
825 |
+
user_content_replaced = user_content.replace(
|
826 |
+
'\n', '\\n'
|
827 |
+
).replace('\r', '\\n')
|
828 |
+
response_content_replaced = response_content.replace(
|
829 |
+
'\n', '\\n'
|
830 |
+
).replace('\r', '\\n')
|
831 |
+
|
832 |
+
logging.info(
|
833 |
+
f"使用的key: {api_key}, "
|
834 |
+
f"提示token: {prompt_tokens}, "
|
835 |
+
f"输出token: {completion_tokens}, "
|
836 |
+
f"首字用时: {first_token_time:.4f}秒, "
|
837 |
+
f"总共用时: {total_time:.4f}秒, "
|
838 |
+
f"使用的模型: {model_name}, "
|
839 |
+
f"用户的内容: {user_content_replaced}, "
|
840 |
+
f"输出的内容: {response_content_replaced}"
|
841 |
+
)
|
842 |
|
843 |
+
with data_lock:
|
844 |
+
request_timestamps.append(time.time())
|
845 |
+
token_counts.append(prompt_tokens+completion_tokens)
|
|
|
|
|
|
|
846 |
|
847 |
+
return Response(
|
848 |
+
stream_with_context(generate()),
|
849 |
+
content_type=response.headers['Content-Type']
|
850 |
+
)
|
851 |
+
else:
|
852 |
+
response.raise_for_status()
|
853 |
+
end_time = time.time()
|
854 |
+
response_json = response.json()
|
855 |
+
total_time = end_time - start_time
|
856 |
+
|
857 |
+
try:
|
858 |
+
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
859 |
+
completion_tokens = response_json[
|
860 |
+
"usage"
|
861 |
+
]["completion_tokens"]
|
862 |
+
response_content = response_json[
|
863 |
+
"choices"
|
864 |
+
][0]["message"]["content"]
|
865 |
+
except (KeyError, ValueError, IndexError) as e:
|
866 |
+
logging.error(
|
867 |
+
f"解析非流式响应 JSON 失败: {e}, "
|
868 |
+
f"完整内容: {response_json}"
|
869 |
+
)
|
870 |
+
prompt_tokens = 0
|
871 |
+
completion_tokens = 0
|
872 |
+
response_content = ""
|
873 |
+
|
874 |
+
user_content = ""
|
875 |
+
messages = data.get("messages", [])
|
876 |
+
for message in messages:
|
877 |
+
if message["role"] == "user":
|
878 |
+
if isinstance(message["content"], str):
|
879 |
+
user_content += message["content"] + " "
|
880 |
+
elif isinstance(message["content"], list):
|
881 |
+
for item in message["content"]:
|
882 |
+
if (
|
883 |
+
isinstance(item, dict) and
|
884 |
+
item.get("type") == "text"
|
885 |
+
):
|
886 |
+
user_content += (
|
887 |
+
item.get("text", "") +
|
888 |
+
" "
|
889 |
+
)
|
890 |
+
|
891 |
+
user_content = user_content.strip()
|
892 |
+
|
893 |
+
user_content_replaced = user_content.replace(
|
894 |
+
'\n', '\\n'
|
895 |
+
).replace('\r', '\\n')
|
896 |
+
response_content_replaced = response_content.replace(
|
897 |
+
'\n', '\\n'
|
898 |
+
).replace('\r', '\\n')
|
899 |
+
|
900 |
+
logging.info(
|
901 |
+
f"使用的key: {api_key}, "
|
902 |
+
f"提示token: {prompt_tokens}, "
|
903 |
+
f"输出token: {completion_tokens}, "
|
904 |
+
f"首字用时: 0, "
|
905 |
+
f"总共用时: {total_time:.4f}秒, "
|
906 |
+
f"使用的模型: {model_name}, "
|
907 |
+
f"用户的内容: {user_content_replaced}, "
|
908 |
+
f"输出的内容: {response_content_replaced}"
|
909 |
+
)
|
910 |
+
with data_lock:
|
911 |
+
request_timestamps.append(time.time())
|
912 |
+
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
|
913 |
+
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
|
914 |
+
else:
|
915 |
+
token_counts.append(0)
|
916 |
+
|
917 |
+
return jsonify(response_json)
|
918 |
|
919 |
except requests.exceptions.RequestException as e:
|
920 |
logging.error(f"请求转发异常: {e}")
|