yangtb24 commited on
Commit
cd15040
·
verified ·
1 Parent(s): b18e21c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +256 -153
app.py CHANGED
@@ -556,11 +556,13 @@ def handsome_chat_completions():
556
  return jsonify({"error": "Invalid request data"}), 400
557
 
558
  model_name = data['model']
 
559
  request_type = determine_request_type(
560
  model_name,
561
- text_models,
562
- free_text_models
563
  )
 
564
  api_key = select_key(request_type, model_name)
565
 
566
  if not api_key:
@@ -578,86 +580,255 @@ def handsome_chat_completions():
578
  "Authorization": f"Bearer {api_key}",
579
  "Content-Type": "application/json"
580
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
- try:
583
- start_time = time.time()
584
- response = requests.post(
585
- TEST_MODEL_ENDPOINT,
586
- headers=headers,
587
- json=data,
588
- stream=data.get("stream", False),
589
- timeout=60
590
- )
 
 
 
591
 
592
- if response.status_code == 429:
593
- return jsonify(response.json()), 429
 
 
 
 
 
 
 
 
 
594
 
595
- if data.get("stream", False):
596
- def generate():
597
- first_chunk_time = None
598
- full_response_content = ""
599
- for chunk in response.iter_content(chunk_size=1024):
600
- if chunk:
601
- if first_chunk_time is None:
602
- first_chunk_time = time.time()
603
- full_response_content += chunk.decode("utf-8")
604
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
- end_time = time.time()
607
- first_token_time = (
608
- first_chunk_time - start_time
609
- if first_chunk_time else 0
 
 
 
 
 
610
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  total_time = end_time - start_time
612
 
613
- prompt_tokens = 0
614
- completion_tokens = 0
615
- response_content = ""
616
- for line in full_response_content.splitlines():
617
- if line.startswith("data:"):
618
- line = line[5:].strip()
619
- if line == "[DONE]":
620
- continue
621
- try:
622
- response_json = json.loads(line)
623
-
624
- if (
625
- "usage" in response_json and
626
- "completion_tokens" in response_json["usage"]
627
- ):
628
- completion_tokens = response_json[
629
- "usage"
630
- ]["completion_tokens"]
631
-
632
- if (
633
- "choices" in response_json and
634
- len(response_json["choices"]) > 0 and
635
- "delta" in response_json["choices"][0] and
636
- "content" in response_json[
637
- "choices"
638
- ][0]["delta"]
639
- ):
640
- response_content += response_json[
641
- "choices"
642
- ][0]["delta"]["content"]
643
-
644
- if (
645
- "usage" in response_json and
646
- "prompt_tokens" in response_json["usage"]
647
- ):
648
- prompt_tokens = response_json[
649
- "usage"
650
- ]["prompt_tokens"]
651
-
652
- except (
653
- KeyError,
654
- ValueError,
655
- IndexError
656
- ) as e:
657
- logging.error(
658
- f"解析流式响应单行 JSON 失败: {e}, "
659
- f"行内容: {line}"
660
- )
661
 
662
  user_content = ""
663
  messages = data.get("messages", [])
@@ -672,8 +843,7 @@ def handsome_chat_completions():
672
  item.get("type") == "text"
673
  ):
674
  user_content += (
675
- item.get("text", "") +
676
- " "
677
  )
678
 
679
  user_content = user_content.strip()
@@ -689,91 +859,24 @@ def handsome_chat_completions():
689
  f"使用的key: {api_key}, "
690
  f"提示token: {prompt_tokens}, "
691
  f"输出token: {completion_tokens}, "
692
- f"首字用时: {first_token_time:.4f}秒, "
693
  f"总共用时: {total_time:.4f}秒, "
694
  f"使用的模型: {model_name}, "
695
  f"用户的内容: {user_content_replaced}, "
696
  f"输出的内容: {response_content_replaced}"
697
  )
698
-
699
  with data_lock:
700
  request_timestamps.append(time.time())
701
- token_counts.append(prompt_tokens+completion_tokens)
702
-
703
- return Response(
704
- stream_with_context(generate()),
705
- content_type=response.headers['Content-Type']
706
- )
707
- else:
708
- response.raise_for_status()
709
- end_time = time.time()
710
- response_json = response.json()
711
- total_time = end_time - start_time
712
-
713
- try:
714
- prompt_tokens = response_json["usage"]["prompt_tokens"]
715
- completion_tokens = response_json[
716
- "usage"
717
- ]["completion_tokens"]
718
- response_content = response_json[
719
- "choices"
720
- ][0]["message"]["content"]
721
- except (KeyError, ValueError, IndexError) as e:
722
- logging.error(
723
- f"解析非流式响应 JSON 失败: {e}, "
724
- f"完整内容: {response_json}"
725
- )
726
- prompt_tokens = 0
727
- completion_tokens = 0
728
- response_content = ""
729
-
730
- user_content = ""
731
- messages = data.get("messages", [])
732
- for message in messages:
733
- if message["role"] == "user":
734
- if isinstance(message["content"], str):
735
- user_content += message["content"] + " "
736
- elif isinstance(message["content"], list):
737
- for item in message["content"]:
738
- if (
739
- isinstance(item, dict) and
740
- item.get("type") == "text"
741
- ):
742
- user_content += (
743
- item.get("text", "") + " "
744
- )
745
-
746
- user_content = user_content.strip()
747
-
748
- user_content_replaced = user_content.replace(
749
- '\n', '\\n'
750
- ).replace('\r', '\\n')
751
- response_content_replaced = response_content.replace(
752
- '\n', '\\n'
753
- ).replace('\r', '\\n')
754
-
755
- logging.info(
756
- f"使用的key: {api_key}, "
757
- f"提示token: {prompt_tokens}, "
758
- f"输出token: {completion_tokens}, "
759
- f"首字用时: 0, "
760
- f"总共用时: {total_time:.4f}秒, "
761
- f"使用的模型: {model_name}, "
762
- f"用户的内容: {user_content_replaced}, "
763
- f"输出的内容: {response_content_replaced}"
764
- )
765
- with data_lock:
766
- request_timestamps.append(time.time())
767
- if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
768
- token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
769
- else:
770
- token_counts.append(0)
771
 
772
- return jsonify(response_json)
773
 
774
- except requests.exceptions.RequestException as e:
775
- logging.error(f"请求转发异常: {e}")
776
- return jsonify({"error": str(e)}), 500
777
 
778
  @app.route('/handsome/v1/models', methods=['GET'])
779
  def list_models():
 
556
  return jsonify({"error": "Invalid request data"}), 400
557
 
558
  model_name = data['model']
559
+
560
  request_type = determine_request_type(
561
  model_name,
562
+ text_models + image_models,
563
+ free_text_models + free_image_models
564
  )
565
+
566
  api_key = select_key(request_type, model_name)
567
 
568
  if not api_key:
 
580
  "Authorization": f"Bearer {api_key}",
581
  "Content-Type": "application/json"
582
  }
583
+
584
+ if model_name in image_models:
585
+ # Handle image generation
586
+ # Map OpenAI-style parameters to SiliconFlow's parameters
587
+ siliconflow_data = {
588
+ "model": model_name,
589
+ "prompt": data.get("messages", [{}])[0].get("content") if isinstance(data.get("messages"), list) else "",
590
+ "image_size": data.get("size", "1024x1024"),
591
+ "batch_size": data.get("n", 1),
592
+ "num_inference_steps": data.get("steps", 20),
593
+ "guidance_scale": data.get("guidance_scale", 7.5),
594
+ "negative_prompt": data.get("negative_prompt"),
595
+ "seed": data.get("seed"),
596
+ "prompt_enhancement": False,
597
+ }
598
+
599
+ # Parameter validation and adjustments
600
+ if siliconflow_data["batch_size"] < 1:
601
+ siliconflow_data["batch_size"] = 1
602
+ if siliconflow_data["batch_size"] > 4:
603
+ siliconflow_data["batch_size"] = 4
604
 
605
+ if siliconflow_data["num_inference_steps"] < 1:
606
+ siliconflow_data["num_inference_steps"] = 1
607
+ if siliconflow_data["num_inference_steps"] > 50:
608
+ siliconflow_data["num_inference_steps"] = 50
609
+
610
+ if siliconflow_data["guidance_scale"] < 0:
611
+ siliconflow_data["guidance_scale"] = 0
612
+ if siliconflow_data["guidance_scale"] > 100:
613
+ siliconflow_data["guidance_scale"] = 100
614
+
615
+ if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
616
+ siliconflow_data["image_size"] = "1024x1024"
617
 
618
+ try:
619
+ start_time = time.time()
620
+ response = requests.post(
621
+ "https://api.siliconflow.cn/v1/images/generations",
622
+ headers=headers,
623
+ json=siliconflow_data,
624
+ timeout=120
625
+ )
626
+
627
+ if response.status_code == 429:
628
+ return jsonify(response.json()), 429
629
 
630
+ response.raise_for_status()
631
+ end_time = time.time()
632
+ response_json = response.json()
633
+ total_time = end_time - start_time
634
+
635
+ try:
636
+ images = response_json.get("images", [])
637
+ openai_images = []
638
+ for item in images:
639
+ if isinstance(item, dict) and "url" in item:
640
+ image_url = item["url"]
641
+ print(f"image_url: {image_url}") # 打印 URL
642
+ try:
643
+ image_data = requests.get(image_url, stream=True).raw
644
+ image = Image.open(image_data)
645
+ buffered = io.BytesIO()
646
+ image.save(buffered, format="PNG")
647
+ img_str = base64.b64encode(buffered.getvalue()).decode()
648
+ openai_images.append({"b64_json": img_str})
649
+ except Exception as e:
650
+ logging.error(f"图片转base64失败: {e}")
651
+ openai_images.append({"url": image_url})
652
+ else:
653
+ logging.error(f"无效的图片数据: {item}")
654
+ openai_images.append({"url": item})
655
 
656
+
657
+ response_data = {
658
+ "created": int(time.time()),
659
+ "data": openai_images
660
+ }
661
+ except (KeyError, ValueError, IndexError) as e:
662
+ logging.error(
663
+ f"解析响应 JSON 失败: {e}, "
664
+ f"完整内容: {response_json}"
665
  )
666
+ response_data = {
667
+ "created": int(time.time()),
668
+ "data": []
669
+ }
670
+
671
+ logging.info(
672
+ f"使用的key: {api_key}, "
673
+ f"总共用时: {total_time:.4f}秒, "
674
+ f"使用的模型: {model_name}"
675
+ )
676
+
677
+ with data_lock:
678
+ request_timestamps.append(time.time())
679
+ token_counts.append(0) # Image generation doesn't use tokens
680
+
681
+ return jsonify(response_data)
682
+ except requests.exceptions.RequestException as e:
683
+ logging.error(f"请求转发异常: {e}")
684
+ return jsonify({"error": str(e)}), 500
685
+ else:
686
+ try:
687
+ start_time = time.time()
688
+ response = requests.post(
689
+ TEST_MODEL_ENDPOINT,
690
+ headers=headers,
691
+ json=data,
692
+ stream=data.get("stream", False),
693
+ timeout=60
694
+ )
695
+ if response.status_code == 429:
696
+ return jsonify(response.json()), 429
697
+
698
+ if data.get("stream", False):
699
+ def generate():
700
+ first_chunk_time = None
701
+ full_response_content = ""
702
+ for chunk in response.iter_content(chunk_size=1024):
703
+ if chunk:
704
+ if first_chunk_time is None:
705
+ first_chunk_time = time.time()
706
+ full_response_content += chunk.decode("utf-8")
707
+ yield chunk
708
+
709
+ end_time = time.time()
710
+ first_token_time = (
711
+ first_chunk_time - start_time
712
+ if first_chunk_time else 0
713
+ )
714
+ total_time = end_time - start_time
715
+
716
+ prompt_tokens = 0
717
+ completion_tokens = 0
718
+ response_content = ""
719
+ for line in full_response_content.splitlines():
720
+ if line.startswith("data:"):
721
+ line = line[5:].strip()
722
+ if line == "[DONE]":
723
+ continue
724
+ try:
725
+ response_json = json.loads(line)
726
+
727
+ if (
728
+ "usage" in response_json and
729
+ "completion_tokens" in response_json["usage"]
730
+ ):
731
+ completion_tokens = response_json[
732
+ "usage"
733
+ ]["completion_tokens"]
734
+
735
+ if (
736
+ "choices" in response_json and
737
+ len(response_json["choices"]) > 0 and
738
+ "delta" in response_json["choices"][0] and
739
+ "content" in response_json[
740
+ "choices"
741
+ ][0]["delta"]
742
+ ):
743
+ response_content += response_json[
744
+ "choices"
745
+ ][0]["delta"]["content"]
746
+
747
+ if (
748
+ "usage" in response_json and
749
+ "prompt_tokens" in response_json["usage"]
750
+ ):
751
+ prompt_tokens = response_json[
752
+ "usage"
753
+ ]["prompt_tokens"]
754
+
755
+ except (
756
+ KeyError,
757
+ ValueError,
758
+ IndexError
759
+ ) as e:
760
+ logging.error(
761
+ f"解析流式响应单行 JSON 失败: {e}, "
762
+ f"行内容: {line}"
763
+ )
764
+
765
+ user_content = ""
766
+ messages = data.get("messages", [])
767
+ for message in messages:
768
+ if message["role"] == "user":
769
+ if isinstance(message["content"], str):
770
+ user_content += message["content"] + " "
771
+ elif isinstance(message["content"], list):
772
+ for item in message["content"]:
773
+ if (
774
+ isinstance(item, dict) and
775
+ item.get("type") == "text"
776
+ ):
777
+ user_content += (
778
+ item.get("text", "") +
779
+ " "
780
+ )
781
+
782
+ user_content = user_content.strip()
783
+
784
+ user_content_replaced = user_content.replace(
785
+ '\n', '\\n'
786
+ ).replace('\r', '\\n')
787
+ response_content_replaced = response_content.replace(
788
+ '\n', '\\n'
789
+ ).replace('\r', '\\n')
790
+
791
+ logging.info(
792
+ f"使用的key: {api_key}, "
793
+ f"提示token: {prompt_tokens}, "
794
+ f"输出token: {completion_tokens}, "
795
+ f"首字用时: {first_token_time:.4f}秒, "
796
+ f"总共用时: {total_time:.4f}秒, "
797
+ f"使用的模型: {model_name}, "
798
+ f"用户的内容: {user_content_replaced}, "
799
+ f"输出的内容: {response_content_replaced}"
800
+ )
801
+
802
+ with data_lock:
803
+ request_timestamps.append(time.time())
804
+ token_counts.append(prompt_tokens+completion_tokens)
805
+
806
+ return Response(
807
+ stream_with_context(generate()),
808
+ content_type=response.headers['Content-Type']
809
+ )
810
+ else:
811
+ response.raise_for_status()
812
+ end_time = time.time()
813
+ response_json = response.json()
814
  total_time = end_time - start_time
815
 
816
+ try:
817
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
818
+ completion_tokens = response_json[
819
+ "usage"
820
+ ]["completion_tokens"]
821
+ response_content = response_json[
822
+ "choices"
823
+ ][0]["message"]["content"]
824
+ except (KeyError, ValueError, IndexError) as e:
825
+ logging.error(
826
+ f"解析非流式响应 JSON 失败: {e}, "
827
+ f"完整内容: {response_json}"
828
+ )
829
+ prompt_tokens = 0
830
+ completion_tokens = 0
831
+ response_content = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
 
833
  user_content = ""
834
  messages = data.get("messages", [])
 
843
  item.get("type") == "text"
844
  ):
845
  user_content += (
846
+ item.get("text", "") + " "
 
847
  )
848
 
849
  user_content = user_content.strip()
 
859
  f"使用的key: {api_key}, "
860
  f"提示token: {prompt_tokens}, "
861
  f"输出token: {completion_tokens}, "
862
+ f"首字用时: 0, "
863
  f"总共用时: {total_time:.4f}秒, "
864
  f"使用的模型: {model_name}, "
865
  f"用户的内容: {user_content_replaced}, "
866
  f"输出的内容: {response_content_replaced}"
867
  )
 
868
  with data_lock:
869
  request_timestamps.append(time.time())
870
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
871
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
872
+ else:
873
+ token_counts.append(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
 
875
+ return jsonify(response_json)
876
 
877
+ except requests.exceptions.RequestException as e:
878
+ logging.error(f"请求转发异常: {e}")
879
+ return jsonify({"error": str(e)}), 500
880
 
881
  @app.route('/handsome/v1/models', methods=['GET'])
882
  def list_models():