yangtb24 commited on
Commit
228c133
·
verified ·
1 Parent(s): 47b958d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -158
app.py CHANGED
@@ -5,8 +5,11 @@ import requests
5
  import json
6
  import random
7
  import uuid
8
- import concurrent.futures
 
 
9
  import threading
 
10
  from datetime import datetime, timedelta
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
  from flask import Flask, request, jsonify, Response, stream_with_context
@@ -556,11 +559,15 @@ def handsome_chat_completions():
556
  return jsonify({"error": "Invalid request data"}), 400
557
 
558
  model_name = data['model']
 
559
  request_type = determine_request_type(
560
  model_name,
561
  text_models,
562
- free_text_models
 
 
563
  )
 
564
  api_key = select_key(request_type, model_name)
565
 
566
  if not api_key:
@@ -573,91 +580,248 @@ def handsome_chat_completions():
573
  )
574
  }
575
  ), 429
576
-
577
  headers = {
578
  "Authorization": f"Bearer {api_key}",
579
  "Content-Type": "application/json"
580
  }
581
 
582
- try:
583
- start_time = time.time()
584
- response = requests.post(
585
- TEST_MODEL_ENDPOINT,
586
- headers=headers,
587
- json=data,
588
- stream=data.get("stream", False),
589
- timeout=60
590
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
- if response.status_code == 429:
593
- return jsonify(response.json()), 429
594
 
595
- if data.get("stream", False):
596
- def generate():
597
- first_chunk_time = None
598
- full_response_content = ""
599
- for chunk in response.iter_content(chunk_size=1024):
600
- if chunk:
601
- if first_chunk_time is None:
602
- first_chunk_time = time.time()
603
- full_response_content += chunk.decode("utf-8")
604
- yield chunk
605
 
606
- end_time = time.time()
607
- first_token_time = (
608
- first_chunk_time - start_time
609
- if first_chunk_time else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  )
 
 
 
 
611
  total_time = end_time - start_time
612
 
613
- prompt_tokens = 0
614
- completion_tokens = 0
615
- response_content = ""
616
- for line in full_response_content.splitlines():
617
- if line.startswith("data:"):
618
- line = line[5:].strip()
619
- if line == "[DONE]":
620
- continue
621
- try:
622
- response_json = json.loads(line)
623
-
624
- if (
625
- "usage" in response_json and
626
- "completion_tokens" in response_json["usage"]
627
- ):
628
- completion_tokens = response_json[
629
- "usage"
630
- ]["completion_tokens"]
631
-
632
- if (
633
- "choices" in response_json and
634
- len(response_json["choices"]) > 0 and
635
- "delta" in response_json["choices"][0] and
636
- "content" in response_json[
637
- "choices"
638
- ][0]["delta"]
639
- ):
640
- response_content += response_json[
641
- "choices"
642
- ][0]["delta"]["content"]
643
-
644
- if (
645
- "usage" in response_json and
646
- "prompt_tokens" in response_json["usage"]
647
- ):
648
- prompt_tokens = response_json[
649
- "usage"
650
- ]["prompt_tokens"]
651
-
652
- except (
653
- KeyError,
654
- ValueError,
655
- IndexError
656
- ) as e:
657
- logging.error(
658
- f"解析流式响应单行 JSON 失败: {e}, "
659
- f"行内容: {line}"
660
- )
661
 
662
  user_content = ""
663
  messages = data.get("messages", [])
@@ -672,8 +836,7 @@ def handsome_chat_completions():
672
  item.get("type") == "text"
673
  ):
674
  user_content += (
675
- item.get("text", "") +
676
- " "
677
  )
678
 
679
  user_content = user_content.strip()
@@ -689,91 +852,24 @@ def handsome_chat_completions():
689
  f"使用的key: {api_key}, "
690
  f"提示token: {prompt_tokens}, "
691
  f"输出token: {completion_tokens}, "
692
- f"首字用时: {first_token_time:.4f}秒, "
693
  f"总共用时: {total_time:.4f}秒, "
694
  f"使用的模型: {model_name}, "
695
  f"用户的内容: {user_content_replaced}, "
696
  f"输出的内容: {response_content_replaced}"
697
  )
698
-
699
  with data_lock:
700
  request_timestamps.append(time.time())
701
- token_counts.append(prompt_tokens+completion_tokens)
702
-
703
- return Response(
704
- stream_with_context(generate()),
705
- content_type=response.headers['Content-Type']
706
- )
707
- else:
708
- response.raise_for_status()
709
- end_time = time.time()
710
- response_json = response.json()
711
- total_time = end_time - start_time
712
-
713
- try:
714
- prompt_tokens = response_json["usage"]["prompt_tokens"]
715
- completion_tokens = response_json[
716
- "usage"
717
- ]["completion_tokens"]
718
- response_content = response_json[
719
- "choices"
720
- ][0]["message"]["content"]
721
- except (KeyError, ValueError, IndexError) as e:
722
- logging.error(
723
- f"解析非流式响应 JSON 失败: {e}, "
724
- f"完整内容: {response_json}"
725
- )
726
- prompt_tokens = 0
727
- completion_tokens = 0
728
- response_content = ""
729
-
730
- user_content = ""
731
- messages = data.get("messages", [])
732
- for message in messages:
733
- if message["role"] == "user":
734
- if isinstance(message["content"], str):
735
- user_content += message["content"] + " "
736
- elif isinstance(message["content"], list):
737
- for item in message["content"]:
738
- if (
739
- isinstance(item, dict) and
740
- item.get("type") == "text"
741
- ):
742
- user_content += (
743
- item.get("text", "") + " "
744
- )
745
-
746
- user_content = user_content.strip()
747
-
748
- user_content_replaced = user_content.replace(
749
- '\n', '\\n'
750
- ).replace('\r', '\\n')
751
- response_content_replaced = response_content.replace(
752
- '\n', '\\n'
753
- ).replace('\r', '\\n')
754
-
755
- logging.info(
756
- f"使用的key: {api_key}, "
757
- f"提示token: {prompt_tokens}, "
758
- f"输出token: {completion_tokens}, "
759
- f"首字用时: 0, "
760
- f"总共用时: {total_time:.4f}秒, "
761
- f"使用的模型: {model_name}, "
762
- f"用户的内容: {user_content_replaced}, "
763
- f"输出的内容: {response_content_replaced}"
764
- )
765
- with data_lock:
766
- request_timestamps.append(time.time())
767
- if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
768
- token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
769
- else:
770
- token_counts.append(0)
771
 
772
- return jsonify(response_json)
773
 
774
- except requests.exceptions.RequestException as e:
775
- logging.error(f"请求转发异常: {e}")
776
- return jsonify({"error": str(e)}), 500
777
 
778
  @app.route('/handsome/v1/models', methods=['GET'])
779
  def list_models():
@@ -782,6 +878,7 @@ def list_models():
782
 
783
  detailed_models = []
784
 
 
785
  for model in text_models:
786
  detailed_models.append({
787
  "id": model,
@@ -808,6 +905,7 @@ def list_models():
808
  "parent": None
809
  })
810
 
 
811
  for model in embedding_models:
812
  detailed_models.append({
813
  "id": model,
@@ -834,6 +932,59 @@ def list_models():
834
  "parent": None
835
  })
836
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  return jsonify({
838
  "success": True,
839
  "data": detailed_models
@@ -1003,10 +1154,6 @@ def handsome_embeddings():
1003
  except requests.exceptions.RequestException as e:
1004
  return jsonify({"error": str(e)}), 500
1005
 
1006
- import base64
1007
- import io
1008
- from PIL import Image
1009
-
1010
  @app.route('/handsome/v1/images/generations', methods=['POST'])
1011
  def handsome_images_generations():
1012
  if not check_authorization(request):
 
5
  import json
6
  import random
7
  import uuid
8
+ import concurrent.fu
9
+ import base64
10
+ import io
11
  import threading
12
+ from PIL import Imagetures
13
  from datetime import datetime, timedelta
14
  from apscheduler.schedulers.background import BackgroundScheduler
15
  from flask import Flask, request, jsonify, Response, stream_with_context
 
559
  return jsonify({"error": "Invalid request data"}), 400
560
 
561
  model_name = data['model']
562
+
563
  request_type = determine_request_type(
564
  model_name,
565
  text_models,
566
+ free_text_models,
567
+ image_models,
568
+ free_image_models
569
  )
570
+
571
  api_key = select_key(request_type, model_name)
572
 
573
  if not api_key:
 
580
  )
581
  }
582
  ), 429
583
+
584
  headers = {
585
  "Authorization": f"Bearer {api_key}",
586
  "Content-Type": "application/json"
587
  }
588
 
589
+ if model_name in image_models or model_name in free_image_models:
590
+ # Handle image model
591
+ user_content = ""
592
+ messages = data.get("messages", [])
593
+ for message in messages:
594
+ if message["role"] == "user":
595
+ if isinstance(message["content"], str):
596
+ user_content += message["content"] + " "
597
+ elif isinstance(message["content"], list):
598
+ for item in message["content"]:
599
+ if (
600
+ isinstance(item, dict) and
601
+ item.get("type") == "text"
602
+ ):
603
+ user_content += (
604
+ item.get("text", "") +
605
+ " "
606
+ )
607
+ user_content = user_content.strip()
608
+
609
+ siliconflow_data = {
610
+ "model": model_name,
611
+ "prompt": user_content,
612
+ "image_size": "1024x1024",
613
+ "batch_size": 1,
614
+ "num_inference_steps": 20,
615
+ "guidance_scale": 7.5,
616
+ "negative_prompt": None,
617
+ "seed": None,
618
+ "prompt_enhancement": False,
619
+ }
620
+
621
+ try:
622
+ start_time = time.time()
623
+ response = requests.post(
624
+ "https://api.siliconflow.cn/v1/images/generations",
625
+ headers=headers,
626
+ json=siliconflow_data,
627
+ timeout=120
628
+ )
629
 
630
+ if response.status_code == 429:
631
+ return jsonify(response.json()), 429
632
 
633
+ response.raise_for_status()
634
+ end_time = time.time()
635
+ response_json = response.json()
636
+ total_time = end_time - start_time
637
+
638
+ try:
639
+ images = response_json.get("images", [])
640
+ openai_images = []
641
+ for image_url in images:
642
+ openai_images.append({"url": image_url})
643
 
644
+ response_data = {
645
+ "created": int(time.time()),
646
+ "data": openai_images
647
+ }
648
+
649
+ except (KeyError, ValueError, IndexError) as e:
650
+ logging.error(
651
+ f"解析响应 JSON 失败: {e}, "
652
+ f"完整内容: {response_json}"
653
+ )
654
+ response_data = {
655
+ "created": int(time.time()),
656
+ "data": []
657
+ }
658
+
659
+ logging.info(
660
+ f"使用的key: {api_key}, "
661
+ f"总共用时: {total_time:.4f}秒, "
662
+ f"使用的模型: {model_name}, "
663
+ f"用户的内容: {user_content}"
664
+ )
665
+
666
+ with data_lock:
667
+ request_timestamps.append(time.time())
668
+ token_counts.append(0) # Image generation doesn't use tokens
669
+
670
+ return jsonify(response_data)
671
+
672
+ except requests.exceptions.RequestException as e:
673
+ logging.error(f"请求转发异常: {e}")
674
+ return jsonify({"error": str(e)}), 500
675
+
676
+ else:
677
+ # Handle text model
678
+ try:
679
+ start_time = time.time()
680
+ response = requests.post(
681
+ TEST_MODEL_ENDPOINT,
682
+ headers=headers,
683
+ json=data,
684
+ stream=data.get("stream", False),
685
+ timeout=60
686
+ )
687
+
688
+ if response.status_code == 429:
689
+ return jsonify(response.json()), 429
690
+
691
+ if data.get("stream", False):
692
+ def generate():
693
+ first_chunk_time = None
694
+ full_response_content = ""
695
+ for chunk in response.iter_content(chunk_size=1024):
696
+ if chunk:
697
+ if first_chunk_time is None:
698
+ first_chunk_time = time.time()
699
+ full_response_content += chunk.decode("utf-8")
700
+ yield chunk
701
+
702
+ end_time = time.time()
703
+ first_token_time = (
704
+ first_chunk_time - start_time
705
+ if first_chunk_time else 0
706
+ )
707
+ total_time = end_time - start_time
708
+
709
+ prompt_tokens = 0
710
+ completion_tokens = 0
711
+ response_content = ""
712
+ for line in full_response_content.splitlines():
713
+ if line.startswith("data:"):
714
+ line = line[5:].strip()
715
+ if line == "[DONE]":
716
+ continue
717
+ try:
718
+ response_json = json.loads(line)
719
+
720
+ if (
721
+ "usage" in response_json and
722
+ "completion_tokens" in response_json["usage"]
723
+ ):
724
+ completion_tokens = response_json[
725
+ "usage"
726
+ ]["completion_tokens"]
727
+
728
+ if (
729
+ "choices" in response_json and
730
+ len(response_json["choices"]) > 0 and
731
+ "delta" in response_json["choices"][0] and
732
+ "content" in response_json[
733
+ "choices"
734
+ ][0]["delta"]
735
+ ):
736
+ response_content += response_json[
737
+ "choices"
738
+ ][0]["delta"]["content"]
739
+
740
+ if (
741
+ "usage" in response_json and
742
+ "prompt_tokens" in response_json["usage"]
743
+ ):
744
+ prompt_tokens = response_json[
745
+ "usage"
746
+ ]["prompt_tokens"]
747
+
748
+ except (
749
+ KeyError,
750
+ ValueError,
751
+ IndexError
752
+ ) as e:
753
+ logging.error(
754
+ f"解析流式响应单行 JSON 失败: {e}, "
755
+ f"行内容: {line}"
756
+ )
757
+
758
+ user_content = ""
759
+ messages = data.get("messages", [])
760
+ for message in messages:
761
+ if message["role"] == "user":
762
+ if isinstance(message["content"], str):
763
+ user_content += message["content"] + " "
764
+ elif isinstance(message["content"], list):
765
+ for item in message["content"]:
766
+ if (
767
+ isinstance(item, dict) and
768
+ item.get("type") == "text"
769
+ ):
770
+ user_content += (
771
+ item.get("text", "") +
772
+ " "
773
+ )
774
+
775
+ user_content = user_content.strip()
776
+
777
+ user_content_replaced = user_content.replace(
778
+ '\n', '\\n'
779
+ ).replace('\r', '\\n')
780
+ response_content_replaced = response_content.replace(
781
+ '\n', '\\n'
782
+ ).replace('\r', '\\n')
783
+
784
+ logging.info(
785
+ f"使用的key: {api_key}, "
786
+ f"提示token: {prompt_tokens}, "
787
+ f"输出token: {completion_tokens}, "
788
+ f"首字用时: {first_token_time:.4f}秒, "
789
+ f"总共用时: {total_time:.4f}秒, "
790
+ f"使用的模型: {model_name}, "
791
+ f"用户的内容: {user_content_replaced}, "
792
+ f"输出的内容: {response_content_replaced}"
793
+ )
794
+
795
+ with data_lock:
796
+ request_timestamps.append(time.time())
797
+ token_counts.append(prompt_tokens+completion_tokens)
798
+
799
+ return Response(
800
+ stream_with_context(generate()),
801
+ content_type=response.headers['Content-Type']
802
  )
803
+ else:
804
+ response.raise_for_status()
805
+ end_time = time.time()
806
+ response_json = response.json()
807
  total_time = end_time - start_time
808
 
809
+ try:
810
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
811
+ completion_tokens = response_json[
812
+ "usage"
813
+ ]["completion_tokens"]
814
+ response_content = response_json[
815
+ "choices"
816
+ ][0]["message"]["content"]
817
+ except (KeyError, ValueError, IndexError) as e:
818
+ logging.error(
819
+ f"解析非流式响应 JSON 失败: {e}, "
820
+ f"完整内容: {response_json}"
821
+ )
822
+ prompt_tokens = 0
823
+ completion_tokens = 0
824
+ response_content = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
 
826
  user_content = ""
827
  messages = data.get("messages", [])
 
836
  item.get("type") == "text"
837
  ):
838
  user_content += (
839
+ item.get("text", "") + " "
 
840
  )
841
 
842
  user_content = user_content.strip()
 
852
  f"使用的key: {api_key}, "
853
  f"提示token: {prompt_tokens}, "
854
  f"输出token: {completion_tokens}, "
855
+ f"首字用时: 0, "
856
  f"总共用时: {total_time:.4f}秒, "
857
  f"使用的模型: {model_name}, "
858
  f"用户的内容: {user_content_replaced}, "
859
  f"输出的内容: {response_content_replaced}"
860
  )
 
861
  with data_lock:
862
  request_timestamps.append(time.time())
863
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
864
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
865
+ else:
866
+ token_counts.append(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
 
868
+ return jsonify(response_json)
869
 
870
+ except requests.exceptions.RequestException as e:
871
+ logging.error(f"请求转发异常: {e}")
872
+ return jsonify({"error": str(e)}), 500
873
 
874
  @app.route('/handsome/v1/models', methods=['GET'])
875
  def list_models():
 
878
 
879
  detailed_models = []
880
 
881
+ # 添加文本模型
882
  for model in text_models:
883
  detailed_models.append({
884
  "id": model,
 
905
  "parent": None
906
  })
907
 
908
+ # 添加 embedding 模型
909
  for model in embedding_models:
910
  detailed_models.append({
911
  "id": model,
 
932
  "parent": None
933
  })
934
 
935
+ # 添加图像模型
936
+ for model in image_models:
937
+ detailed_models.append({
938
+ "id": model,
939
+ "object": "model",
940
+ "created": 1678888888,
941
+ "owned_by": "openai",
942
+ "permission": [
943
+ {
944
+ "id": f"modelperm-{uuid.uuid4().hex}",
945
+ "object": "model_permission",
946
+ "created": 1678888888,
947
+ "allow_create_engine": False,
948
+ "allow_sampling": True,
949
+ "allow_logprobs": False,
950
+ "allow_search_indices": False,
951
+ "allow_view": True,
952
+ "allow_fine_tuning": False,
953
+ "organization": "*",
954
+ "group": None,
955
+ "is_blocking": False
956
+ }
957
+ ],
958
+ "root": model,
959
+ "parent": None
960
+ })
961
+
962
+ for model in free_image_models:
963
+ detailed_models.append({
964
+ "id": model,
965
+ "object": "model",
966
+ "created": 1678888888,
967
+ "owned_by": "openai",
968
+ "permission": [
969
+ {
970
+ "id": f"modelperm-{uuid.uuid4().hex}",
971
+ "object": "model_permission",
972
+ "created": 1678888888,
973
+ "allow_create_engine": False,
974
+ "allow_sampling": True,
975
+ "allow_logprobs": False,
976
+ "allow_search_indices": False,
977
+ "allow_view": True,
978
+ "allow_fine_tuning": False,
979
+ "organization": "*",
980
+ "group": None,
981
+ "is_blocking": False
982
+ }
983
+ ],
984
+ "root": model,
985
+ "parent": None
986
+ })
987
+
988
  return jsonify({
989
  "success": True,
990
  "data": detailed_models
 
1154
  except requests.exceptions.RequestException as e:
1155
  return jsonify({"error": str(e)}), 500
1156
 
 
 
 
 
1157
  @app.route('/handsome/v1/images/generations', methods=['POST'])
1158
  def handsome_images_generations():
1159
  if not check_authorization(request):