yangtb24 commited on
Commit
6eb16ad
·
verified ·
1 Parent(s): 2f8cff3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -305
app.py CHANGED
@@ -5,11 +5,8 @@ import requests
5
  import json
6
  import random
7
  import uuid
8
- import concurrent.fu
9
- import base64
10
- import io
11
  import threading
12
- from PIL import Imagetures
13
  from datetime import datetime, timedelta
14
  from apscheduler.schedulers.background import BackgroundScheduler
15
  from flask import Flask, request, jsonify, Response, stream_with_context
@@ -559,15 +556,11 @@ def handsome_chat_completions():
559
  return jsonify({"error": "Invalid request data"}), 400
560
 
561
  model_name = data['model']
562
-
563
  request_type = determine_request_type(
564
  model_name,
565
  text_models,
566
- free_text_models,
567
- image_models,
568
- free_image_models
569
  )
570
-
571
  api_key = select_key(request_type, model_name)
572
 
573
  if not api_key:
@@ -580,248 +573,91 @@ def handsome_chat_completions():
580
  )
581
  }
582
  ), 429
583
-
584
  headers = {
585
  "Authorization": f"Bearer {api_key}",
586
  "Content-Type": "application/json"
587
  }
588
 
589
- if model_name in image_models or model_name in free_image_models:
590
- # Handle image model
591
- user_content = ""
592
- messages = data.get("messages", [])
593
- for message in messages:
594
- if message["role"] == "user":
595
- if isinstance(message["content"], str):
596
- user_content += message["content"] + " "
597
- elif isinstance(message["content"], list):
598
- for item in message["content"]:
599
- if (
600
- isinstance(item, dict) and
601
- item.get("type") == "text"
602
- ):
603
- user_content += (
604
- item.get("text", "") +
605
- " "
606
- )
607
- user_content = user_content.strip()
608
-
609
- siliconflow_data = {
610
- "model": model_name,
611
- "prompt": user_content,
612
- "image_size": "1024x1024",
613
- "batch_size": 1,
614
- "num_inference_steps": 20,
615
- "guidance_scale": 7.5,
616
- "negative_prompt": None,
617
- "seed": None,
618
- "prompt_enhancement": False,
619
- }
620
-
621
- try:
622
- start_time = time.time()
623
- response = requests.post(
624
- "https://api.siliconflow.cn/v1/images/generations",
625
- headers=headers,
626
- json=siliconflow_data,
627
- timeout=120
628
- )
629
-
630
- if response.status_code == 429:
631
- return jsonify(response.json()), 429
632
-
633
- response.raise_for_status()
634
- end_time = time.time()
635
- response_json = response.json()
636
- total_time = end_time - start_time
637
-
638
- try:
639
- images = response_json.get("images", [])
640
- openai_images = []
641
- for image_url in images:
642
- openai_images.append({"url": image_url})
643
-
644
- response_data = {
645
- "created": int(time.time()),
646
- "data": openai_images
647
- }
648
-
649
- except (KeyError, ValueError, IndexError) as e:
650
- logging.error(
651
- f"解析响应 JSON 失败: {e}, "
652
- f"完整内容: {response_json}"
653
- )
654
- response_data = {
655
- "created": int(time.time()),
656
- "data": []
657
- }
658
-
659
- logging.info(
660
- f"使用的key: {api_key}, "
661
- f"总共用时: {total_time:.4f}秒, "
662
- f"使用的模型: {model_name}, "
663
- f"用户的内容: {user_content}"
664
- )
665
-
666
- with data_lock:
667
- request_timestamps.append(time.time())
668
- token_counts.append(0) # Image generation doesn't use tokens
669
-
670
- return jsonify(response_data)
671
-
672
- except requests.exceptions.RequestException as e:
673
- logging.error(f"请求转发异常: {e}")
674
- return jsonify({"error": str(e)}), 500
675
-
676
- else:
677
- # Handle text model
678
- try:
679
- start_time = time.time()
680
- response = requests.post(
681
- TEST_MODEL_ENDPOINT,
682
- headers=headers,
683
- json=data,
684
- stream=data.get("stream", False),
685
- timeout=60
686
- )
687
-
688
- if response.status_code == 429:
689
- return jsonify(response.json()), 429
690
-
691
- if data.get("stream", False):
692
- def generate():
693
- first_chunk_time = None
694
- full_response_content = ""
695
- for chunk in response.iter_content(chunk_size=1024):
696
- if chunk:
697
- if first_chunk_time is None:
698
- first_chunk_time = time.time()
699
- full_response_content += chunk.decode("utf-8")
700
- yield chunk
701
-
702
- end_time = time.time()
703
- first_token_time = (
704
- first_chunk_time - start_time
705
- if first_chunk_time else 0
706
- )
707
- total_time = end_time - start_time
708
-
709
- prompt_tokens = 0
710
- completion_tokens = 0
711
- response_content = ""
712
- for line in full_response_content.splitlines():
713
- if line.startswith("data:"):
714
- line = line[5:].strip()
715
- if line == "[DONE]":
716
- continue
717
- try:
718
- response_json = json.loads(line)
719
-
720
- if (
721
- "usage" in response_json and
722
- "completion_tokens" in response_json["usage"]
723
- ):
724
- completion_tokens = response_json[
725
- "usage"
726
- ]["completion_tokens"]
727
-
728
- if (
729
- "choices" in response_json and
730
- len(response_json["choices"]) > 0 and
731
- "delta" in response_json["choices"][0] and
732
- "content" in response_json[
733
- "choices"
734
- ][0]["delta"]
735
- ):
736
- response_content += response_json[
737
- "choices"
738
- ][0]["delta"]["content"]
739
-
740
- if (
741
- "usage" in response_json and
742
- "prompt_tokens" in response_json["usage"]
743
- ):
744
- prompt_tokens = response_json[
745
- "usage"
746
- ]["prompt_tokens"]
747
-
748
- except (
749
- KeyError,
750
- ValueError,
751
- IndexError
752
- ) as e:
753
- logging.error(
754
- f"解析流式响应单行 JSON 失败: {e}, "
755
- f"行内容: {line}"
756
- )
757
 
758
- user_content = ""
759
- messages = data.get("messages", [])
760
- for message in messages:
761
- if message["role"] == "user":
762
- if isinstance(message["content"], str):
763
- user_content += message["content"] + " "
764
- elif isinstance(message["content"], list):
765
- for item in message["content"]:
766
- if (
767
- isinstance(item, dict) and
768
- item.get("type") == "text"
769
- ):
770
- user_content += (
771
- item.get("text", "") +
772
- " "
773
- )
774
-
775
- user_content = user_content.strip()
776
-
777
- user_content_replaced = user_content.replace(
778
- '\n', '\\n'
779
- ).replace('\r', '\\n')
780
- response_content_replaced = response_content.replace(
781
- '\n', '\\n'
782
- ).replace('\r', '\\n')
783
-
784
- logging.info(
785
- f"使用的key: {api_key}, "
786
- f"提示token: {prompt_tokens}, "
787
- f"输出token: {completion_tokens}, "
788
- f"首字用时: {first_token_time:.4f}秒, "
789
- f"总共用时: {total_time:.4f}秒, "
790
- f"使用的模型: {model_name}, "
791
- f"用户的内容: {user_content_replaced}, "
792
- f"输出的内容: {response_content_replaced}"
793
- )
794
 
795
- with data_lock:
796
- request_timestamps.append(time.time())
797
- token_counts.append(prompt_tokens+completion_tokens)
 
 
 
 
 
 
 
798
 
799
- return Response(
800
- stream_with_context(generate()),
801
- content_type=response.headers['Content-Type']
802
- )
803
- else:
804
- response.raise_for_status()
805
  end_time = time.time()
806
- response_json = response.json()
 
 
 
807
  total_time = end_time - start_time
808
 
809
- try:
810
- prompt_tokens = response_json["usage"]["prompt_tokens"]
811
- completion_tokens = response_json[
812
- "usage"
813
- ]["completion_tokens"]
814
- response_content = response_json[
815
- "choices"
816
- ][0]["message"]["content"]
817
- except (KeyError, ValueError, IndexError) as e:
818
- logging.error(
819
- f"解析非流式响应 JSON 失败: {e}, "
820
- f"完整内容: {response_json}"
821
- )
822
- prompt_tokens = 0
823
- completion_tokens = 0
824
- response_content = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
 
826
  user_content = ""
827
  messages = data.get("messages", [])
@@ -836,7 +672,8 @@ def handsome_chat_completions():
836
  item.get("type") == "text"
837
  ):
838
  user_content += (
839
- item.get("text", "") + " "
 
840
  )
841
 
842
  user_content = user_content.strip()
@@ -852,24 +689,91 @@ def handsome_chat_completions():
852
  f"使用的key: {api_key}, "
853
  f"提示token: {prompt_tokens}, "
854
  f"输出token: {completion_tokens}, "
855
- f"首字用时: 0, "
856
  f"总共用时: {total_time:.4f}秒, "
857
  f"使用的模型: {model_name}, "
858
  f"用户的内容: {user_content_replaced}, "
859
  f"输出的内容: {response_content_replaced}"
860
  )
 
861
  with data_lock:
862
  request_timestamps.append(time.time())
863
- if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
864
- token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
865
- else:
866
- token_counts.append(0)
867
 
868
- return jsonify(response_json)
 
 
 
 
 
 
 
 
869
 
870
- except requests.exceptions.RequestException as e:
871
- logging.error(f"请求转发异常: {e}")
872
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
 
874
  @app.route('/handsome/v1/models', methods=['GET'])
875
  def list_models():
@@ -878,7 +782,6 @@ def list_models():
878
 
879
  detailed_models = []
880
 
881
- # 添加文本模型
882
  for model in text_models:
883
  detailed_models.append({
884
  "id": model,
@@ -905,7 +808,6 @@ def list_models():
905
  "parent": None
906
  })
907
 
908
- # 添加 embedding 模型
909
  for model in embedding_models:
910
  detailed_models.append({
911
  "id": model,
@@ -932,59 +834,6 @@ def list_models():
932
  "parent": None
933
  })
934
 
935
- # 添加图像模型
936
- for model in image_models:
937
- detailed_models.append({
938
- "id": model,
939
- "object": "model",
940
- "created": 1678888888,
941
- "owned_by": "openai",
942
- "permission": [
943
- {
944
- "id": f"modelperm-{uuid.uuid4().hex}",
945
- "object": "model_permission",
946
- "created": 1678888888,
947
- "allow_create_engine": False,
948
- "allow_sampling": True,
949
- "allow_logprobs": False,
950
- "allow_search_indices": False,
951
- "allow_view": True,
952
- "allow_fine_tuning": False,
953
- "organization": "*",
954
- "group": None,
955
- "is_blocking": False
956
- }
957
- ],
958
- "root": model,
959
- "parent": None
960
- })
961
-
962
- for model in free_image_models:
963
- detailed_models.append({
964
- "id": model,
965
- "object": "model",
966
- "created": 1678888888,
967
- "owned_by": "openai",
968
- "permission": [
969
- {
970
- "id": f"modelperm-{uuid.uuid4().hex}",
971
- "object": "model_permission",
972
- "created": 1678888888,
973
- "allow_create_engine": False,
974
- "allow_sampling": True,
975
- "allow_logprobs": False,
976
- "allow_search_indices": False,
977
- "allow_view": True,
978
- "allow_fine_tuning": False,
979
- "organization": "*",
980
- "group": None,
981
- "is_blocking": False
982
- }
983
- ],
984
- "root": model,
985
- "parent": None
986
- })
987
-
988
  return jsonify({
989
  "success": True,
990
  "data": detailed_models
@@ -1154,6 +1003,10 @@ def handsome_embeddings():
1154
  except requests.exceptions.RequestException as e:
1155
  return jsonify({"error": str(e)}), 500
1156
 
 
 
 
 
1157
  @app.route('/handsome/v1/images/generations', methods=['POST'])
1158
  def handsome_images_generations():
1159
  if not check_authorization(request):
 
5
  import json
6
  import random
7
  import uuid
8
+ import concurrent.futures
 
 
9
  import threading
 
10
  from datetime import datetime, timedelta
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
  from flask import Flask, request, jsonify, Response, stream_with_context
 
556
  return jsonify({"error": "Invalid request data"}), 400
557
 
558
  model_name = data['model']
 
559
  request_type = determine_request_type(
560
  model_name,
561
  text_models,
562
+ free_text_models
 
 
563
  )
 
564
  api_key = select_key(request_type, model_name)
565
 
566
  if not api_key:
 
573
  )
574
  }
575
  ), 429
576
+
577
  headers = {
578
  "Authorization": f"Bearer {api_key}",
579
  "Content-Type": "application/json"
580
  }
581
 
582
+ try:
583
+ start_time = time.time()
584
+ response = requests.post(
585
+ TEST_MODEL_ENDPOINT,
586
+ headers=headers,
587
+ json=data,
588
+ stream=data.get("stream", False),
589
+ timeout=60
590
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
+ if response.status_code == 429:
593
+ return jsonify(response.json()), 429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
 
595
+ if data.get("stream", False):
596
+ def generate():
597
+ first_chunk_time = None
598
+ full_response_content = ""
599
+ for chunk in response.iter_content(chunk_size=1024):
600
+ if chunk:
601
+ if first_chunk_time is None:
602
+ first_chunk_time = time.time()
603
+ full_response_content += chunk.decode("utf-8")
604
+ yield chunk
605
 
 
 
 
 
 
 
606
  end_time = time.time()
607
+ first_token_time = (
608
+ first_chunk_time - start_time
609
+ if first_chunk_time else 0
610
+ )
611
  total_time = end_time - start_time
612
 
613
+ prompt_tokens = 0
614
+ completion_tokens = 0
615
+ response_content = ""
616
+ for line in full_response_content.splitlines():
617
+ if line.startswith("data:"):
618
+ line = line[5:].strip()
619
+ if line == "[DONE]":
620
+ continue
621
+ try:
622
+ response_json = json.loads(line)
623
+
624
+ if (
625
+ "usage" in response_json and
626
+ "completion_tokens" in response_json["usage"]
627
+ ):
628
+ completion_tokens = response_json[
629
+ "usage"
630
+ ]["completion_tokens"]
631
+
632
+ if (
633
+ "choices" in response_json and
634
+ len(response_json["choices"]) > 0 and
635
+ "delta" in response_json["choices"][0] and
636
+ "content" in response_json[
637
+ "choices"
638
+ ][0]["delta"]
639
+ ):
640
+ response_content += response_json[
641
+ "choices"
642
+ ][0]["delta"]["content"]
643
+
644
+ if (
645
+ "usage" in response_json and
646
+ "prompt_tokens" in response_json["usage"]
647
+ ):
648
+ prompt_tokens = response_json[
649
+ "usage"
650
+ ]["prompt_tokens"]
651
+
652
+ except (
653
+ KeyError,
654
+ ValueError,
655
+ IndexError
656
+ ) as e:
657
+ logging.error(
658
+ f"解析流式响应单行 JSON 失败: {e}, "
659
+ f"行内容: {line}"
660
+ )
661
 
662
  user_content = ""
663
  messages = data.get("messages", [])
 
672
  item.get("type") == "text"
673
  ):
674
  user_content += (
675
+ item.get("text", "") +
676
+ " "
677
  )
678
 
679
  user_content = user_content.strip()
 
689
  f"使用的key: {api_key}, "
690
  f"提示token: {prompt_tokens}, "
691
  f"输出token: {completion_tokens}, "
692
+ f"首字用时: {first_token_time:.4f}秒, "
693
  f"总共用时: {total_time:.4f}秒, "
694
  f"使用的模型: {model_name}, "
695
  f"用户的内容: {user_content_replaced}, "
696
  f"输出的内容: {response_content_replaced}"
697
  )
698
+
699
  with data_lock:
700
  request_timestamps.append(time.time())
701
+ token_counts.append(prompt_tokens+completion_tokens)
 
 
 
702
 
703
+ return Response(
704
+ stream_with_context(generate()),
705
+ content_type=response.headers['Content-Type']
706
+ )
707
+ else:
708
+ response.raise_for_status()
709
+ end_time = time.time()
710
+ response_json = response.json()
711
+ total_time = end_time - start_time
712
 
713
+ try:
714
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
715
+ completion_tokens = response_json[
716
+ "usage"
717
+ ]["completion_tokens"]
718
+ response_content = response_json[
719
+ "choices"
720
+ ][0]["message"]["content"]
721
+ except (KeyError, ValueError, IndexError) as e:
722
+ logging.error(
723
+ f"解析非流式响应 JSON 失败: {e}, "
724
+ f"完整内容: {response_json}"
725
+ )
726
+ prompt_tokens = 0
727
+ completion_tokens = 0
728
+ response_content = ""
729
+
730
+ user_content = ""
731
+ messages = data.get("messages", [])
732
+ for message in messages:
733
+ if message["role"] == "user":
734
+ if isinstance(message["content"], str):
735
+ user_content += message["content"] + " "
736
+ elif isinstance(message["content"], list):
737
+ for item in message["content"]:
738
+ if (
739
+ isinstance(item, dict) and
740
+ item.get("type") == "text"
741
+ ):
742
+ user_content += (
743
+ item.get("text", "") + " "
744
+ )
745
+
746
+ user_content = user_content.strip()
747
+
748
+ user_content_replaced = user_content.replace(
749
+ '\n', '\\n'
750
+ ).replace('\r', '\\n')
751
+ response_content_replaced = response_content.replace(
752
+ '\n', '\\n'
753
+ ).replace('\r', '\\n')
754
+
755
+ logging.info(
756
+ f"使用的key: {api_key}, "
757
+ f"提示token: {prompt_tokens}, "
758
+ f"输出token: {completion_tokens}, "
759
+ f"首字用时: 0, "
760
+ f"总共用时: {total_time:.4f}秒, "
761
+ f"使用的模型: {model_name}, "
762
+ f"用户的内容: {user_content_replaced}, "
763
+ f"输出的内容: {response_content_replaced}"
764
+ )
765
+ with data_lock:
766
+ request_timestamps.append(time.time())
767
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
768
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
769
+ else:
770
+ token_counts.append(0)
771
+
772
+ return jsonify(response_json)
773
+
774
+ except requests.exceptions.RequestException as e:
775
+ logging.error(f"请求转发异常: {e}")
776
+ return jsonify({"error": str(e)}), 500
777
 
778
  @app.route('/handsome/v1/models', methods=['GET'])
779
  def list_models():
 
782
 
783
  detailed_models = []
784
 
 
785
  for model in text_models:
786
  detailed_models.append({
787
  "id": model,
 
808
  "parent": None
809
  })
810
 
 
811
  for model in embedding_models:
812
  detailed_models.append({
813
  "id": model,
 
834
  "parent": None
835
  })
836
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  return jsonify({
838
  "success": True,
839
  "data": detailed_models
 
1003
  except requests.exceptions.RequestException as e:
1004
  return jsonify({"error": str(e)}), 500
1005
 
1006
+ import base64
1007
+ import io
1008
+ from PIL import Image
1009
+
1010
  @app.route('/handsome/v1/images/generations', methods=['POST'])
1011
  def handsome_images_generations():
1012
  if not check_authorization(request):