sone-latest

Running

App Files Files Community

yangtb24 commited on Dec 11, 2024

Commit

c31239d

verified ·

1 Parent(s): 756c521

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -30

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 import requests
 import json
 import random
-import uuid
 import concurrent.futures
 from datetime import datetime, timedelta
 from apscheduler.schedulers.background import BackgroundScheduler
@@ -16,11 +16,14 @@ logging.basicConfig(level=logging.INFO,
 API_ENDPOINT = "https://api.siliconflow.cn/v1/user/info"
 TEST_MODEL_ENDPOINT = "https://api.siliconflow.cn/v1/chat/completions"
 MODELS_ENDPOINT = "https://api.siliconflow.cn/v1/models"
 app = Flask(__name__)
 all_models = []
 free_models = []
 invalid_keys_global = []
 free_keys_global = []
@@ -74,7 +77,7 @@ def test_model_availability(api_key, model_name):
                                      "stream": False
                                  },
                                  timeout=10)
-        if response.status_code == 429 or response.status_code == 200 :
             return True
         else:
             return False
@@ -86,13 +89,16 @@ def refresh_models():
     """
     刷新模型列表和免费模型列表。
     """
-    global all_models, free_models
-    all_models = get_all_models(FREE_MODEL_TEST_KEY)
     free_models = []
     with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
-        future_to_model = {executor.submit(test_model_availability, FREE_MODEL_TEST_KEY, model): model for model in all_models}
         for future in concurrent.futures.as_completed(future_to_model):
             model = future_to_model[future]
             try:
@@ -102,8 +108,47 @@ def refresh_models():
             except Exception as exc:
                 logging.error(f"模型 {model} 测试生成异常: {exc}")
-    logging.info(f"所有模型列表：{all_models}")
-    logging.info(f"免费模型列表：{free_models}")
 def load_keys():
     """
@@ -171,7 +216,7 @@ def process_key(key, test_model):
             else:
                 return "unverified"
-def get_all_models(api_key):
     """
     获取所有模型列表。
     """
@@ -180,11 +225,12 @@ def get_all_models(api_key):
         "Content-Type": "application/json"
     }
     try:
-        response = requests.get(MODELS_ENDPOINT, headers=headers, params={"sub_type": "chat"})
         response.raise_for_status()
         data = response.json()
         if isinstance(data, dict) and 'data' in data and isinstance(data['data'], list):
-            return [model.get("id") for model in data["data"] if isinstance(model, dict) and "id" in model]
         else:
             logging.error("获取模型列表失败：响应数据格式不正确")
             return []
@@ -195,13 +241,13 @@ def get_all_models(api_key):
         logging.error(f"解析模型列表失败，API Key：{api_key}，错误信息：{e}")
         return []
-def determine_request_type(model_name):
     """
     根据用户请求的模型判断请求类型。
     """
-    if model_name in free_models:
         return "free"
-    elif model_name in all_models:
         return "paid"
     else:
         return "unknown"
@@ -225,7 +271,7 @@ def select_key(request_type, model_name):
     for _ in range(len(available_keys)):
         key = available_keys[current_index % len(available_keys)]
         current_index += 1
         if key_is_valid(key, request_type):
             model_key_indices[model_name] = current_index
             return key
@@ -241,13 +287,13 @@ def key_is_valid(key, request_type):
     """
     if request_type == "invalid":
         return False
     credit_summary = get_credit_summary(key)
     if credit_summary is None:
         return False
     total_balance = credit_summary.get("total_balance", 0)
     if request_type == "free":
         return True
     elif request_type == "paid" or request_type == "unverified":
@@ -299,13 +345,16 @@ def check_tokens():
                 credit_summary = get_credit_summary(token)
                 balance = credit_summary.get("total_balance", 0) if credit_summary else 0
                 if key_type == "invalid":
-                    results.append({"token": token, "type": "无效 KEY", "balance": balance, "message": "无法获取额度信息"})
                 elif key_type == "free":
                     results.append({"token": token, "type": "免费 KEY", "balance": balance, "message": "额度不足"})
                 elif key_type == "unverified":
-                    results.append({"token": token, "type": "未实名 KEY", "balance": balance, "message": "无法使用指定模型"})
                 elif key_type == "valid":
-                    results.append({"token": token, "type": "有效 KEY", "balance": balance, "message": "可以使用指定模型"})
             except Exception as exc:
                 logging.error(f"处理 Token {token} 生成异常: {exc}")
@@ -321,11 +370,12 @@ def handsome_chat_completions():
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
-    request_type = determine_request_type(model_name)
     api_key = select_key(request_type, model_name)
     if not api_key:
-        return jsonify({"error": "No available API key for this request type or all keys have reached their limits"}), 429
     headers = {
         "Authorization": f"Bearer {api_key}",
@@ -374,7 +424,9 @@ def handsome_chat_completions():
                             if "usage" in response_json and "completion_tokens" in response_json["usage"]:
                                 completion_tokens = response_json["usage"]["completion_tokens"]
-                            if "choices" in response_json and len(response_json["choices"]) > 0 and "delta" in response_json["choices"][0] and "content" in response_json["choices"][0]["delta"]:
                                 response_content += response_json["choices"][0]["delta"]["content"]
                             if "usage" in response_json and "prompt_tokens" in response_json["usage"]:
@@ -393,15 +445,16 @@ def handsome_chat_completions():
                             for item in message["content"]:
                                 if isinstance(item, dict) and item.get("type") == "text":
                                     user_content += item.get("text", "") + " "
                 user_content = user_content.strip()
                 user_content_replaced = user_content.replace('\n', '\\n').replace('\r', '\\n')
                 response_content_replaced = response_content.replace('\n', '\\n').replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: {first_token_time:.4f}秒, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content_replaced}, 输出的内容: {response_content_replaced}"
                 )
             return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
         else:
             response.raise_for_status()
@@ -429,12 +482,12 @@ def handsome_chat_completions():
                             for item in message["content"]:
                                 if isinstance(item, dict) and item.get("type") == "text":
                                     user_content += item.get("text", "") + " "
                 user_content = user_content.strip()
                 user_content_replaced = user_content.replace('\n', '\\n').replace('\r', '\\n')
                 response_content_replaced = response_content.replace('\n', '\\n').replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: 0, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content_replaced}, 输出的内容: {response_content_replaced}"
                 )
@@ -552,6 +605,70 @@ def billing_subscription():
         "business_address": None
     })
 if __name__ == '__main__':
     import json
     logging.info(f"环境变量：{os.environ}")
@@ -569,4 +686,4 @@ if __name__ == '__main__':
     refresh_models()
     logging.info("首次刷新模型列表已手动触发执行")
-    app.run(debug=False, host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))

 import requests
 import json
 import random
+import uuid
 import concurrent.futures
 from datetime import datetime, timedelta
 from apscheduler.schedulers.background import BackgroundScheduler
 API_ENDPOINT = "https://api.siliconflow.cn/v1/user/info"
 TEST_MODEL_ENDPOINT = "https://api.siliconflow.cn/v1/chat/completions"
 MODELS_ENDPOINT = "https://api.siliconflow.cn/v1/models"
+EMBEDDINGS_ENDPOINT = "https://api.siliconflow.cn/v1/embeddings"
 app = Flask(__name__)
 all_models = []
 free_models = []
+embedding_models = []
+free_embedding_models = []
 invalid_keys_global = []
 free_keys_global = []
                                      "stream": False
                                  },
                                  timeout=10)
+        if response.status_code == 429 or response.status_code == 200:
             return True
         else:
             return False
     """
     刷新模型列表和免费模型列表。
     """
+    global all_models, free_models, embedding_models, free_embedding_models
+    all_models = get_all_models(FREE_MODEL_TEST_KEY, "chat")
+    embedding_models = get_all_models(FREE_MODEL_TEST_KEY, "embedding")
     free_models = []
+    free_embedding_models = []
     with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+        future_to_model = {executor.submit(test_model_availability, FREE_MODEL_TEST_KEY, model): model for model in
+                           all_models}
         for future in concurrent.futures.as_completed(future_to_model):
             model = future_to_model[future]
             try:
             except Exception as exc:
                 logging.error(f"模型 {model} 测试生成异常: {exc}")
+    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+        future_to_model = {
+            executor.submit(test_embedding_model_availability, FREE_MODEL_TEST_KEY, model): model for model in
+            embedding_models}
+        for future in concurrent.futures.as_completed(future_to_model):
+            model = future_to_model[future]
+            try:
+                is_free = future.result()
+                if is_free:
+                    free_embedding_models.append(model)
+            except Exception as exc:
+                logging.error(f"模型 {model} 测试生成异常: {exc}")
+    logging.info(f"所有文本模型列表：{all_models}")
+    logging.info(f"免费文本模型列表：{free_models}")
+    logging.info(f"所有向量模型列表：{embedding_models}")
+    logging.info(f"免费向量模型列表：{free_embedding_models}")
+def test_embedding_model_availability(api_key, model_name):
+    """
+    测试指定的向量模型是否可用。
+    """
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    try:
+        response = requests.post(EMBEDDINGS_ENDPOINT,
+                                 headers=headers,
+                                 json={
+                                     "model": model_name,
+                                     "input": ["hi"],
+                                 },
+                                 timeout=10)
+        if response.status_code == 429 or response.status_code == 200:
+            return True
+        else:
+            return False
+    except requests.exceptions.RequestException as e:
+        logging.error(f"测试向量模型 {model_name} 可用性失败，API Key：{api_key}，错误信息：{e}")
+        return False
 def load_keys():
     """
             else:
                 return "unverified"
+def get_all_models(api_key, sub_type):
     """
     获取所有模型列表。
     """
         "Content-Type": "application/json"
     }
     try:
+        response = requests.get(MODELS_ENDPOINT, headers=headers, params={"sub_type": sub_type})
         response.raise_for_status()
         data = response.json()
         if isinstance(data, dict) and 'data' in data and isinstance(data['data'], list):
+            return [model.get("id") for model in data["data"] if
+                    isinstance(model, dict) and "id" in model]
         else:
             logging.error("获取模型列表失败：响应数据格式不正确")
             return []
         logging.error(f"解析模型列表失败，API Key：{api_key}，错误信息：{e}")
         return []
+def determine_request_type(model_name, model_list, free_model_list):
     """
     根据用户请求的模型判断请求类型。
     """
+    if model_name in free_model_list:
         return "free"
+    elif model_name in model_list:
         return "paid"
     else:
         return "unknown"
     for _ in range(len(available_keys)):
         key = available_keys[current_index % len(available_keys)]
         current_index += 1
         if key_is_valid(key, request_type):
             model_key_indices[model_name] = current_index
             return key
     """
     if request_type == "invalid":
         return False
     credit_summary = get_credit_summary(key)
     if credit_summary is None:
         return False
     total_balance = credit_summary.get("total_balance", 0)
     if request_type == "free":
         return True
     elif request_type == "paid" or request_type == "unverified":
                 credit_summary = get_credit_summary(token)
                 balance = credit_summary.get("total_balance", 0) if credit_summary else 0
                 if key_type == "invalid":
+                    results.append(
+                        {"token": token, "type": "无效 KEY", "balance": balance, "message": "无法获取额度信息"})
                 elif key_type == "free":
                     results.append({"token": token, "type": "免费 KEY", "balance": balance, "message": "额度不足"})
                 elif key_type == "unverified":
+                    results.append(
+                        {"token": token, "type": "未实名 KEY", "balance": balance, "message": "无法使用指定模型"})
                 elif key_type == "valid":
+                    results.append(
+                        {"token": token, "type": "有效 KEY", "balance": balance, "message": "可以使用指定模型"})
             except Exception as exc:
                 logging.error(f"处理 Token {token} 生成异常: {exc}")
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
+    request_type = determine_request_type(model_name, all_models, free_models)
     api_key = select_key(request_type, model_name)
     if not api_key:
+        return jsonify(
+            {"error": "No available API key for this request type or all keys have reached their limits"}), 429
     headers = {
         "Authorization": f"Bearer {api_key}",
                             if "usage" in response_json and "completion_tokens" in response_json["usage"]:
                                 completion_tokens = response_json["usage"]["completion_tokens"]
+                            if "choices" in response_json and len(response_json["choices"]) > 0 and "delta" in \
+                                    response_json["choices"][0] and "content" in response_json["choices"][0][
+                                "delta"]:
                                 response_content += response_json["choices"][0]["delta"]["content"]
                             if "usage" in response_json and "prompt_tokens" in response_json["usage"]:
                             for item in message["content"]:
                                 if isinstance(item, dict) and item.get("type") == "text":
                                     user_content += item.get("text", "") + " "
                 user_content = user_content.strip()
                 user_content_replaced = user_content.replace('\n', '\\n').replace('\r', '\\n')
                 response_content_replaced = response_content.replace('\n', '\\n').replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: {first_token_time:.4f}秒, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content_replaced}, 输出的内容: {response_content_replaced}"
                 )
             return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
         else:
             response.raise_for_status()
                             for item in message["content"]:
                                 if isinstance(item, dict) and item.get("type") == "text":
                                     user_content += item.get("text", "") + " "
                 user_content = user_content.strip()
                 user_content_replaced = user_content.replace('\n', '\\n').replace('\r', '\\n')
                 response_content_replaced = response_content.replace('\n', '\\n').replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: 0, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content_replaced}, 输出的内容: {response_content_replaced}"
                 )
         "business_address": None
     })
+@app.route('/handsome/v1/embeddings', methods=['POST'])
+def handsome_embeddings():
+    if not check_authorization(request):
+        return jsonify({"error": "Unauthorized"}), 401
+    data = request.get_json()
+    if not data or 'model' not in data:
+        return jsonify({"error": "Invalid request data"}), 400
+    model_name = data['model']
+    request_type = determine_request_type(model_name, embedding_models, free_embedding_models)
+    api_key = select_key(request_type, model_name)
+    if not api_key:
+        return jsonify(
+            {"error": "No available API key for this request type or all keys have reached their limits"}), 429
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    try:
+        start_time = time.time()
+        response = requests.post(
+            EMBEDDINGS_ENDPOINT,
+            headers=headers,
+            json=data,
+            timeout=60
+        )
+        if response.status_code == 429:
+            return jsonify(response.json()), 429
+        response.raise_for_status()
+        end_time = time.time()
+        response_json = response.json()
+        total_time = end_time - start_time
+        try:
+            prompt_tokens = response_json["usage"]["prompt_tokens"]
+            embedding_data = response_json["data"]
+        except (KeyError, ValueError, IndexError) as e:
+            logging.error(f"解析响应 JSON 失败: {e}, 完整内容: {response_json}")
+            prompt_tokens = 0
+            embedding_data = []
+        logging.info(
+            f"使用的key: {api_key}, 提示token: {prompt_tokens}, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}"
+        )
+        return jsonify({
+            "object": "list",
+            "data": embedding_data,
+            "model": model_name,
+            "usage": {
+                "prompt_tokens": prompt_tokens,
+                "total_tokens": prompt_tokens
+            }
+        })
+    except requests.exceptions.RequestException as e:
+        return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     import json
     logging.info(f"环境变量：{os.environ}")
     refresh_models()
     logging.info("首次刷新模型列表已手动触发执行")
+    app.run(debug=False, host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))