Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -556,11 +556,13 @@ def handsome_chat_completions():
|
|
556 |
return jsonify({"error": "Invalid request data"}), 400
|
557 |
|
558 |
model_name = data['model']
|
|
|
559 |
request_type = determine_request_type(
|
560 |
model_name,
|
561 |
-
text_models,
|
562 |
-
free_text_models
|
563 |
)
|
|
|
564 |
api_key = select_key(request_type, model_name)
|
565 |
|
566 |
if not api_key:
|
@@ -578,86 +580,255 @@ def handsome_chat_completions():
|
|
578 |
"Authorization": f"Bearer {api_key}",
|
579 |
"Content-Type": "application/json"
|
580 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
|
|
|
|
|
|
591 |
|
592 |
-
|
593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
605 |
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
|
|
|
|
|
|
|
|
|
|
610 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
611 |
total_time = end_time - start_time
|
612 |
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
"usage"
|
630 |
-
]["completion_tokens"]
|
631 |
-
|
632 |
-
if (
|
633 |
-
"choices" in response_json and
|
634 |
-
len(response_json["choices"]) > 0 and
|
635 |
-
"delta" in response_json["choices"][0] and
|
636 |
-
"content" in response_json[
|
637 |
-
"choices"
|
638 |
-
][0]["delta"]
|
639 |
-
):
|
640 |
-
response_content += response_json[
|
641 |
-
"choices"
|
642 |
-
][0]["delta"]["content"]
|
643 |
-
|
644 |
-
if (
|
645 |
-
"usage" in response_json and
|
646 |
-
"prompt_tokens" in response_json["usage"]
|
647 |
-
):
|
648 |
-
prompt_tokens = response_json[
|
649 |
-
"usage"
|
650 |
-
]["prompt_tokens"]
|
651 |
-
|
652 |
-
except (
|
653 |
-
KeyError,
|
654 |
-
ValueError,
|
655 |
-
IndexError
|
656 |
-
) as e:
|
657 |
-
logging.error(
|
658 |
-
f"解析流式响应单行 JSON 失败: {e}, "
|
659 |
-
f"行内容: {line}"
|
660 |
-
)
|
661 |
|
662 |
user_content = ""
|
663 |
messages = data.get("messages", [])
|
@@ -672,8 +843,7 @@ def handsome_chat_completions():
|
|
672 |
item.get("type") == "text"
|
673 |
):
|
674 |
user_content += (
|
675 |
-
item.get("text", "") +
|
676 |
-
" "
|
677 |
)
|
678 |
|
679 |
user_content = user_content.strip()
|
@@ -689,91 +859,24 @@ def handsome_chat_completions():
|
|
689 |
f"使用的key: {api_key}, "
|
690 |
f"提示token: {prompt_tokens}, "
|
691 |
f"输出token: {completion_tokens}, "
|
692 |
-
f"首字用时:
|
693 |
f"总共用时: {total_time:.4f}秒, "
|
694 |
f"使用的模型: {model_name}, "
|
695 |
f"用户的内容: {user_content_replaced}, "
|
696 |
f"输出的内容: {response_content_replaced}"
|
697 |
)
|
698 |
-
|
699 |
with data_lock:
|
700 |
request_timestamps.append(time.time())
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
content_type=response.headers['Content-Type']
|
706 |
-
)
|
707 |
-
else:
|
708 |
-
response.raise_for_status()
|
709 |
-
end_time = time.time()
|
710 |
-
response_json = response.json()
|
711 |
-
total_time = end_time - start_time
|
712 |
-
|
713 |
-
try:
|
714 |
-
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
715 |
-
completion_tokens = response_json[
|
716 |
-
"usage"
|
717 |
-
]["completion_tokens"]
|
718 |
-
response_content = response_json[
|
719 |
-
"choices"
|
720 |
-
][0]["message"]["content"]
|
721 |
-
except (KeyError, ValueError, IndexError) as e:
|
722 |
-
logging.error(
|
723 |
-
f"解析非流式响应 JSON 失败: {e}, "
|
724 |
-
f"完整内容: {response_json}"
|
725 |
-
)
|
726 |
-
prompt_tokens = 0
|
727 |
-
completion_tokens = 0
|
728 |
-
response_content = ""
|
729 |
-
|
730 |
-
user_content = ""
|
731 |
-
messages = data.get("messages", [])
|
732 |
-
for message in messages:
|
733 |
-
if message["role"] == "user":
|
734 |
-
if isinstance(message["content"], str):
|
735 |
-
user_content += message["content"] + " "
|
736 |
-
elif isinstance(message["content"], list):
|
737 |
-
for item in message["content"]:
|
738 |
-
if (
|
739 |
-
isinstance(item, dict) and
|
740 |
-
item.get("type") == "text"
|
741 |
-
):
|
742 |
-
user_content += (
|
743 |
-
item.get("text", "") + " "
|
744 |
-
)
|
745 |
-
|
746 |
-
user_content = user_content.strip()
|
747 |
-
|
748 |
-
user_content_replaced = user_content.replace(
|
749 |
-
'\n', '\\n'
|
750 |
-
).replace('\r', '\\n')
|
751 |
-
response_content_replaced = response_content.replace(
|
752 |
-
'\n', '\\n'
|
753 |
-
).replace('\r', '\\n')
|
754 |
-
|
755 |
-
logging.info(
|
756 |
-
f"使用的key: {api_key}, "
|
757 |
-
f"提示token: {prompt_tokens}, "
|
758 |
-
f"输出token: {completion_tokens}, "
|
759 |
-
f"首字用时: 0, "
|
760 |
-
f"总共用时: {total_time:.4f}秒, "
|
761 |
-
f"使用的模型: {model_name}, "
|
762 |
-
f"用户的内容: {user_content_replaced}, "
|
763 |
-
f"输出的内容: {response_content_replaced}"
|
764 |
-
)
|
765 |
-
with data_lock:
|
766 |
-
request_timestamps.append(time.time())
|
767 |
-
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
|
768 |
-
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
|
769 |
-
else:
|
770 |
-
token_counts.append(0)
|
771 |
|
772 |
-
|
773 |
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
|
778 |
@app.route('/handsome/v1/models', methods=['GET'])
|
779 |
def list_models():
|
|
|
556 |
return jsonify({"error": "Invalid request data"}), 400
|
557 |
|
558 |
model_name = data['model']
|
559 |
+
|
560 |
request_type = determine_request_type(
|
561 |
model_name,
|
562 |
+
text_models + image_models,
|
563 |
+
free_text_models + free_image_models
|
564 |
)
|
565 |
+
|
566 |
api_key = select_key(request_type, model_name)
|
567 |
|
568 |
if not api_key:
|
|
|
580 |
"Authorization": f"Bearer {api_key}",
|
581 |
"Content-Type": "application/json"
|
582 |
}
|
583 |
+
|
584 |
+
if model_name in image_models:
|
585 |
+
# Handle image generation
|
586 |
+
# Map OpenAI-style parameters to SiliconFlow's parameters
|
587 |
+
siliconflow_data = {
|
588 |
+
"model": model_name,
|
589 |
+
"prompt": data.get("messages", [{}])[0].get("content") if isinstance(data.get("messages"), list) else "",
|
590 |
+
"image_size": data.get("size", "1024x1024"),
|
591 |
+
"batch_size": data.get("n", 1),
|
592 |
+
"num_inference_steps": data.get("steps", 20),
|
593 |
+
"guidance_scale": data.get("guidance_scale", 7.5),
|
594 |
+
"negative_prompt": data.get("negative_prompt"),
|
595 |
+
"seed": data.get("seed"),
|
596 |
+
"prompt_enhancement": False,
|
597 |
+
}
|
598 |
+
|
599 |
+
# Parameter validation and adjustments
|
600 |
+
if siliconflow_data["batch_size"] < 1:
|
601 |
+
siliconflow_data["batch_size"] = 1
|
602 |
+
if siliconflow_data["batch_size"] > 4:
|
603 |
+
siliconflow_data["batch_size"] = 4
|
604 |
|
605 |
+
if siliconflow_data["num_inference_steps"] < 1:
|
606 |
+
siliconflow_data["num_inference_steps"] = 1
|
607 |
+
if siliconflow_data["num_inference_steps"] > 50:
|
608 |
+
siliconflow_data["num_inference_steps"] = 50
|
609 |
+
|
610 |
+
if siliconflow_data["guidance_scale"] < 0:
|
611 |
+
siliconflow_data["guidance_scale"] = 0
|
612 |
+
if siliconflow_data["guidance_scale"] > 100:
|
613 |
+
siliconflow_data["guidance_scale"] = 100
|
614 |
+
|
615 |
+
if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
|
616 |
+
siliconflow_data["image_size"] = "1024x1024"
|
617 |
|
618 |
+
try:
|
619 |
+
start_time = time.time()
|
620 |
+
response = requests.post(
|
621 |
+
"https://api.siliconflow.cn/v1/images/generations",
|
622 |
+
headers=headers,
|
623 |
+
json=siliconflow_data,
|
624 |
+
timeout=120
|
625 |
+
)
|
626 |
+
|
627 |
+
if response.status_code == 429:
|
628 |
+
return jsonify(response.json()), 429
|
629 |
|
630 |
+
response.raise_for_status()
|
631 |
+
end_time = time.time()
|
632 |
+
response_json = response.json()
|
633 |
+
total_time = end_time - start_time
|
634 |
+
|
635 |
+
try:
|
636 |
+
images = response_json.get("images", [])
|
637 |
+
openai_images = []
|
638 |
+
for item in images:
|
639 |
+
if isinstance(item, dict) and "url" in item:
|
640 |
+
image_url = item["url"]
|
641 |
+
print(f"image_url: {image_url}") # 打印 URL
|
642 |
+
try:
|
643 |
+
image_data = requests.get(image_url, stream=True).raw
|
644 |
+
image = Image.open(image_data)
|
645 |
+
buffered = io.BytesIO()
|
646 |
+
image.save(buffered, format="PNG")
|
647 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
648 |
+
openai_images.append({"b64_json": img_str})
|
649 |
+
except Exception as e:
|
650 |
+
logging.error(f"图片转base64失败: {e}")
|
651 |
+
openai_images.append({"url": image_url})
|
652 |
+
else:
|
653 |
+
logging.error(f"无效的图片数据: {item}")
|
654 |
+
openai_images.append({"url": item})
|
655 |
|
656 |
+
|
657 |
+
response_data = {
|
658 |
+
"created": int(time.time()),
|
659 |
+
"data": openai_images
|
660 |
+
}
|
661 |
+
except (KeyError, ValueError, IndexError) as e:
|
662 |
+
logging.error(
|
663 |
+
f"解析响应 JSON 失败: {e}, "
|
664 |
+
f"完整内容: {response_json}"
|
665 |
)
|
666 |
+
response_data = {
|
667 |
+
"created": int(time.time()),
|
668 |
+
"data": []
|
669 |
+
}
|
670 |
+
|
671 |
+
logging.info(
|
672 |
+
f"使用的key: {api_key}, "
|
673 |
+
f"总共用时: {total_time:.4f}秒, "
|
674 |
+
f"使用的模型: {model_name}"
|
675 |
+
)
|
676 |
+
|
677 |
+
with data_lock:
|
678 |
+
request_timestamps.append(time.time())
|
679 |
+
token_counts.append(0) # Image generation doesn't use tokens
|
680 |
+
|
681 |
+
return jsonify(response_data)
|
682 |
+
except requests.exceptions.RequestException as e:
|
683 |
+
logging.error(f"请求转发异常: {e}")
|
684 |
+
return jsonify({"error": str(e)}), 500
|
685 |
+
else:
|
686 |
+
try:
|
687 |
+
start_time = time.time()
|
688 |
+
response = requests.post(
|
689 |
+
TEST_MODEL_ENDPOINT,
|
690 |
+
headers=headers,
|
691 |
+
json=data,
|
692 |
+
stream=data.get("stream", False),
|
693 |
+
timeout=60
|
694 |
+
)
|
695 |
+
if response.status_code == 429:
|
696 |
+
return jsonify(response.json()), 429
|
697 |
+
|
698 |
+
if data.get("stream", False):
|
699 |
+
def generate():
|
700 |
+
first_chunk_time = None
|
701 |
+
full_response_content = ""
|
702 |
+
for chunk in response.iter_content(chunk_size=1024):
|
703 |
+
if chunk:
|
704 |
+
if first_chunk_time is None:
|
705 |
+
first_chunk_time = time.time()
|
706 |
+
full_response_content += chunk.decode("utf-8")
|
707 |
+
yield chunk
|
708 |
+
|
709 |
+
end_time = time.time()
|
710 |
+
first_token_time = (
|
711 |
+
first_chunk_time - start_time
|
712 |
+
if first_chunk_time else 0
|
713 |
+
)
|
714 |
+
total_time = end_time - start_time
|
715 |
+
|
716 |
+
prompt_tokens = 0
|
717 |
+
completion_tokens = 0
|
718 |
+
response_content = ""
|
719 |
+
for line in full_response_content.splitlines():
|
720 |
+
if line.startswith("data:"):
|
721 |
+
line = line[5:].strip()
|
722 |
+
if line == "[DONE]":
|
723 |
+
continue
|
724 |
+
try:
|
725 |
+
response_json = json.loads(line)
|
726 |
+
|
727 |
+
if (
|
728 |
+
"usage" in response_json and
|
729 |
+
"completion_tokens" in response_json["usage"]
|
730 |
+
):
|
731 |
+
completion_tokens = response_json[
|
732 |
+
"usage"
|
733 |
+
]["completion_tokens"]
|
734 |
+
|
735 |
+
if (
|
736 |
+
"choices" in response_json and
|
737 |
+
len(response_json["choices"]) > 0 and
|
738 |
+
"delta" in response_json["choices"][0] and
|
739 |
+
"content" in response_json[
|
740 |
+
"choices"
|
741 |
+
][0]["delta"]
|
742 |
+
):
|
743 |
+
response_content += response_json[
|
744 |
+
"choices"
|
745 |
+
][0]["delta"]["content"]
|
746 |
+
|
747 |
+
if (
|
748 |
+
"usage" in response_json and
|
749 |
+
"prompt_tokens" in response_json["usage"]
|
750 |
+
):
|
751 |
+
prompt_tokens = response_json[
|
752 |
+
"usage"
|
753 |
+
]["prompt_tokens"]
|
754 |
+
|
755 |
+
except (
|
756 |
+
KeyError,
|
757 |
+
ValueError,
|
758 |
+
IndexError
|
759 |
+
) as e:
|
760 |
+
logging.error(
|
761 |
+
f"解析流式响应单行 JSON 失败: {e}, "
|
762 |
+
f"行内容: {line}"
|
763 |
+
)
|
764 |
+
|
765 |
+
user_content = ""
|
766 |
+
messages = data.get("messages", [])
|
767 |
+
for message in messages:
|
768 |
+
if message["role"] == "user":
|
769 |
+
if isinstance(message["content"], str):
|
770 |
+
user_content += message["content"] + " "
|
771 |
+
elif isinstance(message["content"], list):
|
772 |
+
for item in message["content"]:
|
773 |
+
if (
|
774 |
+
isinstance(item, dict) and
|
775 |
+
item.get("type") == "text"
|
776 |
+
):
|
777 |
+
user_content += (
|
778 |
+
item.get("text", "") +
|
779 |
+
" "
|
780 |
+
)
|
781 |
+
|
782 |
+
user_content = user_content.strip()
|
783 |
+
|
784 |
+
user_content_replaced = user_content.replace(
|
785 |
+
'\n', '\\n'
|
786 |
+
).replace('\r', '\\n')
|
787 |
+
response_content_replaced = response_content.replace(
|
788 |
+
'\n', '\\n'
|
789 |
+
).replace('\r', '\\n')
|
790 |
+
|
791 |
+
logging.info(
|
792 |
+
f"使用的key: {api_key}, "
|
793 |
+
f"提示token: {prompt_tokens}, "
|
794 |
+
f"输出token: {completion_tokens}, "
|
795 |
+
f"首字用时: {first_token_time:.4f}秒, "
|
796 |
+
f"总共用时: {total_time:.4f}秒, "
|
797 |
+
f"使用的模型: {model_name}, "
|
798 |
+
f"用户的内容: {user_content_replaced}, "
|
799 |
+
f"输出的内容: {response_content_replaced}"
|
800 |
+
)
|
801 |
+
|
802 |
+
with data_lock:
|
803 |
+
request_timestamps.append(time.time())
|
804 |
+
token_counts.append(prompt_tokens+completion_tokens)
|
805 |
+
|
806 |
+
return Response(
|
807 |
+
stream_with_context(generate()),
|
808 |
+
content_type=response.headers['Content-Type']
|
809 |
+
)
|
810 |
+
else:
|
811 |
+
response.raise_for_status()
|
812 |
+
end_time = time.time()
|
813 |
+
response_json = response.json()
|
814 |
total_time = end_time - start_time
|
815 |
|
816 |
+
try:
|
817 |
+
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
818 |
+
completion_tokens = response_json[
|
819 |
+
"usage"
|
820 |
+
]["completion_tokens"]
|
821 |
+
response_content = response_json[
|
822 |
+
"choices"
|
823 |
+
][0]["message"]["content"]
|
824 |
+
except (KeyError, ValueError, IndexError) as e:
|
825 |
+
logging.error(
|
826 |
+
f"解析非流式响应 JSON 失败: {e}, "
|
827 |
+
f"完整内容: {response_json}"
|
828 |
+
)
|
829 |
+
prompt_tokens = 0
|
830 |
+
completion_tokens = 0
|
831 |
+
response_content = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
832 |
|
833 |
user_content = ""
|
834 |
messages = data.get("messages", [])
|
|
|
843 |
item.get("type") == "text"
|
844 |
):
|
845 |
user_content += (
|
846 |
+
item.get("text", "") + " "
|
|
|
847 |
)
|
848 |
|
849 |
user_content = user_content.strip()
|
|
|
859 |
f"使用的key: {api_key}, "
|
860 |
f"提示token: {prompt_tokens}, "
|
861 |
f"输出token: {completion_tokens}, "
|
862 |
+
f"首字用时: 0, "
|
863 |
f"总共用时: {total_time:.4f}秒, "
|
864 |
f"使用的模型: {model_name}, "
|
865 |
f"用户的内容: {user_content_replaced}, "
|
866 |
f"输出的内容: {response_content_replaced}"
|
867 |
)
|
|
|
868 |
with data_lock:
|
869 |
request_timestamps.append(time.time())
|
870 |
+
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
|
871 |
+
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
|
872 |
+
else:
|
873 |
+
token_counts.append(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
874 |
|
875 |
+
return jsonify(response_json)
|
876 |
|
877 |
+
except requests.exceptions.RequestException as e:
|
878 |
+
logging.error(f"请求转发异常: {e}")
|
879 |
+
return jsonify({"error": str(e)}), 500
|
880 |
|
881 |
@app.route('/handsome/v1/models', methods=['GET'])
|
882 |
def list_models():
|