yangtb24 commited on
Commit
fccd514
1 Parent(s): 56b8faf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +208 -74
app.py CHANGED
@@ -547,8 +547,8 @@ def check_tokens():
547
  return jsonify(results)
548
 
549
  @app.route('/handsome/v1/chat/completions', methods=['POST'])
550
- def handsome_chat_completions(request, authorization_key):
551
- if not check_authorization(request, authorization_key):
552
  return jsonify({"error": "Unauthorized"}), 401
553
 
554
  data = request.get_json()
@@ -556,13 +556,13 @@ def handsome_chat_completions(request, authorization_key):
556
  return jsonify({"error": "Invalid request data"}), 400
557
 
558
  model_name = data['model']
559
-
560
  request_type = determine_request_type(
561
  model_name,
562
  text_models + image_models,
563
  free_text_models + free_image_models
564
  )
565
-
566
  api_key = select_key(request_type, model_name)
567
 
568
  if not api_key:
@@ -580,7 +580,7 @@ def handsome_chat_completions(request, authorization_key):
580
  "Authorization": f"Bearer {api_key}",
581
  "Content-Type": "application/json"
582
  }
583
-
584
  if model_name in image_models:
585
  # Handle image generation
586
  # Map OpenAI-style parameters to SiliconFlow's parameters
@@ -595,7 +595,7 @@ def handsome_chat_completions(request, authorization_key):
595
  "seed": data.get("seed"),
596
  "prompt_enhancement": False,
597
  }
598
-
599
  # Parameter validation and adjustments
600
  if siliconflow_data["batch_size"] < 1:
601
  siliconflow_data["batch_size"] = 1
@@ -606,12 +606,12 @@ def handsome_chat_completions(request, authorization_key):
606
  siliconflow_data["num_inference_steps"] = 1
607
  if siliconflow_data["num_inference_steps"] > 50:
608
  siliconflow_data["num_inference_steps"] = 50
609
-
610
  if siliconflow_data["guidance_scale"] < 0:
611
  siliconflow_data["guidance_scale"] = 0
612
  if siliconflow_data["guidance_scale"] > 100:
613
  siliconflow_data["guidance_scale"] = 100
614
-
615
  if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
616
  siliconflow_data["image_size"] = "1024x1024"
617
 
@@ -623,7 +623,7 @@ def handsome_chat_completions(request, authorization_key):
623
  json=siliconflow_data,
624
  timeout=120
625
  )
626
-
627
  if response.status_code == 429:
628
  return jsonify(response.json()), 429
629
 
@@ -631,10 +631,10 @@ def handsome_chat_completions(request, authorization_key):
631
  end_time = time.time()
632
  response_json = response.json()
633
  total_time = end_time - start_time
634
-
635
  try:
636
  images = response_json.get("images", [])
637
-
638
  # Extract the first URL if available
639
  image_url = ""
640
  if images and isinstance(images[0], dict) and "url" in images[0]:
@@ -643,7 +643,7 @@ def handsome_chat_completions(request, authorization_key):
643
  elif images and isinstance(images[0], str):
644
  image_url = images[0]
645
  logging.info(f"Extracted image URL: {image_url}")
646
-
647
  # Construct the expected JSON output - Mimicking OpenAI
648
  response_data = {
649
  "id": f"chatcmpl-{uuid.uuid4()}",
@@ -656,10 +656,21 @@ def handsome_chat_completions(request, authorization_key):
656
  "index": 0,
657
  "message": {
658
  "role": "assistant",
659
- "content": None,
660
- "image_url": image_url if image_url else None, # Return URL in image_url
 
 
 
 
 
 
 
 
 
 
 
661
  },
662
- "finish_reason": "length",
663
  }
664
  ],
665
  "usage": { # Added usage
@@ -668,9 +679,7 @@ def handsome_chat_completions(request, authorization_key):
668
  "total_tokens": 0
669
  }
670
  }
671
- if not image_url:
672
- response_data["choices"][0]["message"]["content"] = "Failed to generate image"
673
-
674
  except (KeyError, ValueError, IndexError) as e:
675
  logging.error(
676
  f"解析响应 JSON 失败: {e}, "
@@ -688,7 +697,6 @@ def handsome_chat_completions(request, authorization_key):
688
  "message": {
689
  "role": "assistant",
690
  "content": "Failed to process image data",
691
- "image_url": None,
692
  },
693
  "finish_reason": "stop",
694
  }
@@ -706,6 +714,10 @@ def handsome_chat_completions(request, authorization_key):
706
  f"使用的模型: {model_name}"
707
  )
708
 
 
 
 
 
709
  return jsonify(response_data)
710
  except requests.exceptions.RequestException as e:
711
  logging.error(f"请求转发异常: {e}")
@@ -718,69 +730,191 @@ def handsome_chat_completions(request, authorization_key):
718
  TEST_MODEL_ENDPOINT,
719
  headers=headers,
720
  json=data,
721
- stream=False,
722
  timeout=60
723
  )
724
  if response.status_code == 429:
725
  return jsonify(response.json()), 429
726
-
727
- response.raise_for_status()
728
- end_time = time.time()
729
- response_json = response.json()
730
- total_time = end_time - start_time
731
- try:
732
- prompt_tokens = response_json["usage"]["prompt_tokens"]
733
- completion_tokens = response_json[
734
- "usage"
735
- ]["completion_tokens"]
736
- response_content = response_json[
737
- "choices"
738
- ][0]["message"]["content"]
739
- except (KeyError, ValueError, IndexError) as e:
740
- logging.error(
741
- f"解析非流式响应 JSON 失败: {e}, "
742
- f"完整内容: {response_json}"
743
- )
744
- prompt_tokens = 0
745
- completion_tokens = 0
746
- response_content = ""
747
- user_content = ""
748
- messages = data.get("messages", [])
749
- for message in messages:
750
- if message["role"] == "user":
751
- if isinstance(message["content"], str):
752
- user_content += message["content"] + " "
753
- elif isinstance(message["content"], list):
754
- for item in message["content"]:
755
- if (
756
- isinstance(item, dict) and
757
- item.get("type") == "text"
758
- ):
759
- user_content += (
760
- item.get("text", "") +
761
- " "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  )
763
 
764
- user_content = user_content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
 
766
- user_content_replaced = user_content.replace(
767
- '\n', '\\n'
768
- ).replace('\r', '\\n')
769
- response_content_replaced = response_content.replace(
770
- '\n', '\\n'
771
- ).replace('\r', '\\n')
772
 
773
- logging.info(
774
- f"使用的key: {api_key}, "
775
- f"提示token: {prompt_tokens}, "
776
- f"输出token: {completion_tokens}, "
777
- f"首字用时: 0, "
778
- f"总共用时: {total_time:.4f}秒, "
779
- f"使用的模型: {model_name}, "
780
- f"用户的内容: {user_content_replaced}, "
781
- f"输出的内容: {response_content_replaced}"
782
- )
783
- return jsonify(response_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
 
785
  except requests.exceptions.RequestException as e:
786
  logging.error(f"请求转发异常: {e}")
 
547
  return jsonify(results)
548
 
549
  @app.route('/handsome/v1/chat/completions', methods=['POST'])
550
+ def handsome_chat_completions():
551
+ if not check_authorization(request):
552
  return jsonify({"error": "Unauthorized"}), 401
553
 
554
  data = request.get_json()
 
556
  return jsonify({"error": "Invalid request data"}), 400
557
 
558
  model_name = data['model']
559
+
560
  request_type = determine_request_type(
561
  model_name,
562
  text_models + image_models,
563
  free_text_models + free_image_models
564
  )
565
+
566
  api_key = select_key(request_type, model_name)
567
 
568
  if not api_key:
 
580
  "Authorization": f"Bearer {api_key}",
581
  "Content-Type": "application/json"
582
  }
583
+
584
  if model_name in image_models:
585
  # Handle image generation
586
  # Map OpenAI-style parameters to SiliconFlow's parameters
 
595
  "seed": data.get("seed"),
596
  "prompt_enhancement": False,
597
  }
598
+
599
  # Parameter validation and adjustments
600
  if siliconflow_data["batch_size"] < 1:
601
  siliconflow_data["batch_size"] = 1
 
606
  siliconflow_data["num_inference_steps"] = 1
607
  if siliconflow_data["num_inference_steps"] > 50:
608
  siliconflow_data["num_inference_steps"] = 50
609
+
610
  if siliconflow_data["guidance_scale"] < 0:
611
  siliconflow_data["guidance_scale"] = 0
612
  if siliconflow_data["guidance_scale"] > 100:
613
  siliconflow_data["guidance_scale"] = 100
614
+
615
  if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
616
  siliconflow_data["image_size"] = "1024x1024"
617
 
 
623
  json=siliconflow_data,
624
  timeout=120
625
  )
626
+
627
  if response.status_code == 429:
628
  return jsonify(response.json()), 429
629
 
 
631
  end_time = time.time()
632
  response_json = response.json()
633
  total_time = end_time - start_time
634
+
635
  try:
636
  images = response_json.get("images", [])
637
+
638
  # Extract the first URL if available
639
  image_url = ""
640
  if images and isinstance(images[0], dict) and "url" in images[0]:
 
643
  elif images and isinstance(images[0], str):
644
  image_url = images[0]
645
  logging.info(f"Extracted image URL: {image_url}")
646
+
647
  # Construct the expected JSON output - Mimicking OpenAI
648
  response_data = {
649
  "id": f"chatcmpl-{uuid.uuid4()}",
 
656
  "index": 0,
657
  "message": {
658
  "role": "assistant",
659
+ "content": None, # set to None as image is in tool_calls
660
+ "tool_calls": [
661
+ {
662
+ "id": f"call_{uuid.uuid4()}",
663
+ "type": "function",
664
+ "function": {
665
+ "name": "image_generation",
666
+ "arguments": json.dumps({
667
+ "image_url": image_url
668
+ })
669
+ }
670
+ }
671
+ ]
672
  },
673
+ "finish_reason": "tool_calls",
674
  }
675
  ],
676
  "usage": { # Added usage
 
679
  "total_tokens": 0
680
  }
681
  }
682
+
 
 
683
  except (KeyError, ValueError, IndexError) as e:
684
  logging.error(
685
  f"解析响应 JSON 失败: {e}, "
 
697
  "message": {
698
  "role": "assistant",
699
  "content": "Failed to process image data",
 
700
  },
701
  "finish_reason": "stop",
702
  }
 
714
  f"使用的模型: {model_name}"
715
  )
716
 
717
+ with data_lock:
718
+ request_timestamps.append(time.time())
719
+ token_counts.append(0) # Image generation doesn't use tokens
720
+
721
  return jsonify(response_data)
722
  except requests.exceptions.RequestException as e:
723
  logging.error(f"请求转发异常: {e}")
 
730
  TEST_MODEL_ENDPOINT,
731
  headers=headers,
732
  json=data,
733
+ stream=data.get("stream", False),
734
  timeout=60
735
  )
736
  if response.status_code == 429:
737
  return jsonify(response.json()), 429
738
+
739
+ if data.get("stream", False):
740
+ def generate():
741
+ first_chunk_time = None
742
+ full_response_content = ""
743
+ for chunk in response.iter_content(chunk_size=1024):
744
+ if chunk:
745
+ if first_chunk_time is None:
746
+ first_chunk_time = time.time()
747
+ full_response_content += chunk.decode("utf-8")
748
+ yield chunk
749
+
750
+ end_time = time.time()
751
+ first_token_time = (
752
+ first_chunk_time - start_time
753
+ if first_chunk_time else 0
754
+ )
755
+ total_time = end_time - start_time
756
+
757
+ prompt_tokens = 0
758
+ completion_tokens = 0
759
+ response_content = ""
760
+ for line in full_response_content.splitlines():
761
+ if line.startswith("data:"):
762
+ line = line[5:].strip()
763
+ if line == "[DONE]":
764
+ continue
765
+ try:
766
+ response_json = json.loads(line)
767
+
768
+ if (
769
+ "usage" in response_json and
770
+ "completion_tokens" in response_json["usage"]
771
+ ):
772
+ completion_tokens = response_json[
773
+ "usage"
774
+ ]["completion_tokens"]
775
+
776
+ if (
777
+ "choices" in response_json and
778
+ len(response_json["choices"]) > 0 and
779
+ "delta" in response_json["choices"][0] and
780
+ "content" in response_json[
781
+ "choices"
782
+ ][0]["delta"]
783
+ ):
784
+ response_content += response_json[
785
+ "choices"
786
+ ][0]["delta"]["content"]
787
+
788
+ if (
789
+ "usage" in response_json and
790
+ "prompt_tokens" in response_json["usage"]
791
+ ):
792
+ prompt_tokens = response_json[
793
+ "usage"
794
+ ]["prompt_tokens"]
795
+
796
+ except (
797
+ KeyError,
798
+ ValueError,
799
+ IndexError
800
+ ) as e:
801
+ logging.error(
802
+ f"解析流式响应单行 JSON 失败: {e}, "
803
+ f"行内容: {line}"
804
  )
805
 
806
+ user_content = ""
807
+ messages = data.get("messages", [])
808
+ for message in messages:
809
+ if message["role"] == "user":
810
+ if isinstance(message["content"], str):
811
+ user_content += message["content"] + " "
812
+ elif isinstance(message["content"], list):
813
+ for item in message["content"]:
814
+ if (
815
+ isinstance(item, dict) and
816
+ item.get("type") == "text"
817
+ ):
818
+ user_content += (
819
+ item.get("text", "") +
820
+ " "
821
+ )
822
+
823
+ user_content = user_content.strip()
824
+
825
+ user_content_replaced = user_content.replace(
826
+ '\n', '\\n'
827
+ ).replace('\r', '\\n')
828
+ response_content_replaced = response_content.replace(
829
+ '\n', '\\n'
830
+ ).replace('\r', '\\n')
831
+
832
+ logging.info(
833
+ f"使用的key: {api_key}, "
834
+ f"提示token: {prompt_tokens}, "
835
+ f"输出token: {completion_tokens}, "
836
+ f"首字用时: {first_token_time:.4f}秒, "
837
+ f"总共用时: {total_time:.4f}秒, "
838
+ f"使用的模型: {model_name}, "
839
+ f"用户的内容: {user_content_replaced}, "
840
+ f"输出的内容: {response_content_replaced}"
841
+ )
842
 
843
+ with data_lock:
844
+ request_timestamps.append(time.time())
845
+ token_counts.append(prompt_tokens+completion_tokens)
 
 
 
846
 
847
+ return Response(
848
+ stream_with_context(generate()),
849
+ content_type=response.headers['Content-Type']
850
+ )
851
+ else:
852
+ response.raise_for_status()
853
+ end_time = time.time()
854
+ response_json = response.json()
855
+ total_time = end_time - start_time
856
+
857
+ try:
858
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
859
+ completion_tokens = response_json[
860
+ "usage"
861
+ ]["completion_tokens"]
862
+ response_content = response_json[
863
+ "choices"
864
+ ][0]["message"]["content"]
865
+ except (KeyError, ValueError, IndexError) as e:
866
+ logging.error(
867
+ f"解析非流式响应 JSON 失败: {e}, "
868
+ f"完整内容: {response_json}"
869
+ )
870
+ prompt_tokens = 0
871
+ completion_tokens = 0
872
+ response_content = ""
873
+
874
+ user_content = ""
875
+ messages = data.get("messages", [])
876
+ for message in messages:
877
+ if message["role"] == "user":
878
+ if isinstance(message["content"], str):
879
+ user_content += message["content"] + " "
880
+ elif isinstance(message["content"], list):
881
+ for item in message["content"]:
882
+ if (
883
+ isinstance(item, dict) and
884
+ item.get("type") == "text"
885
+ ):
886
+ user_content += (
887
+ item.get("text", "") +
888
+ " "
889
+ )
890
+
891
+ user_content = user_content.strip()
892
+
893
+ user_content_replaced = user_content.replace(
894
+ '\n', '\\n'
895
+ ).replace('\r', '\\n')
896
+ response_content_replaced = response_content.replace(
897
+ '\n', '\\n'
898
+ ).replace('\r', '\\n')
899
+
900
+ logging.info(
901
+ f"使用的key: {api_key}, "
902
+ f"提示token: {prompt_tokens}, "
903
+ f"输出token: {completion_tokens}, "
904
+ f"首字用时: 0, "
905
+ f"总共用时: {total_time:.4f}秒, "
906
+ f"使用的模型: {model_name}, "
907
+ f"用户的内容: {user_content_replaced}, "
908
+ f"输出的内容: {response_content_replaced}"
909
+ )
910
+ with data_lock:
911
+ request_timestamps.append(time.time())
912
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
913
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
914
+ else:
915
+ token_counts.append(0)
916
+
917
+ return jsonify(response_json)
918
 
919
  except requests.exceptions.RequestException as e:
920
  logging.error(f"请求转发异常: {e}")