yangtb24 commited on
Commit
90cb193
·
verified ·
1 Parent(s): b413541

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -104
app.py CHANGED
@@ -3,20 +3,13 @@ import time
3
  import logging
4
  import requests
5
  import json
6
- import random
7
  import uuid
8
  import concurrent.futures
9
  import threading
10
- import base64
11
- import io
12
- from PIL import Image
13
  from datetime import datetime, timedelta
14
  from apscheduler.schedulers.background import BackgroundScheduler
15
  from flask import Flask, request, jsonify, Response, stream_with_context
16
 
17
- os.environ['TZ'] = 'Asia/Shanghai'
18
- time.tzset()
19
-
20
  logging.basicConfig(level=logging.INFO,
21
  format='%(asctime)s - %(levelname)s - %(message)s')
22
 
@@ -39,9 +32,6 @@ token_counts = []
39
  data_lock = threading.Lock()
40
 
41
  def get_credit_summary(api_key):
42
- """
43
- 使用 API 密钥获取额度信息,并将美元余额转换为人民币。
44
- """
45
  headers = {
46
  "Authorization": f"Bearer {api_key}",
47
  "Content-Type": "application/json"
@@ -53,7 +43,7 @@ def get_credit_summary(api_key):
53
  if not data.get("is_available", False):
54
  logging.warning(f"API Key: {api_key} is not available.")
55
  return None
56
-
57
  balance_infos = data.get("balance_infos", [])
58
  total_balance_cny = 0.0
59
  usd_balance = 0.0
@@ -87,10 +77,6 @@ def get_credit_summary(api_key):
87
  return None
88
 
89
  def get_usd_to_cny_rate():
90
- """
91
- 获取美元兑人民币的汇率。
92
- 这里使用一个公共的汇率 API,你可以替换成你自己的。
93
- """
94
  try:
95
  response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
96
  response.raise_for_status()
@@ -105,12 +91,6 @@ def refresh_models():
105
  logging.info(f"所有文本模型列表:{text_models}")
106
 
107
  def load_keys():
108
- """
109
- 从环境变量中加载 keys,进行去重,
110
- 并根据额度和模型可用性进行分类,
111
- 然后记录到日志中。
112
- 使用线程池并发处理每个 key。
113
- """
114
  keys_str = os.environ.get("KEYS")
115
  keys = [key.strip() for key in keys_str.split(',')]
116
  unique_keys = list(set(keys))
@@ -152,9 +132,6 @@ def load_keys():
152
  valid_keys_global = valid_keys
153
 
154
  def process_key(key):
155
- """
156
- 处理单个 key,判断其类型。
157
- """
158
  credit_summary = get_credit_summary(key)
159
  if credit_summary is None:
160
  return "invalid"
@@ -166,10 +143,6 @@ def process_key(key):
166
  return "valid"
167
 
168
  def select_key(model_name):
169
- """
170
- 根据请求类型和模型名称选择合适的 KEY,
171
- 并实现轮询和重试机制。
172
- """
173
  available_keys = valid_keys_global
174
 
175
  current_index = model_key_indices.get(model_name, 0)
@@ -184,10 +157,6 @@ def select_key(model_name):
184
  return None
185
 
186
  def check_authorization(request):
187
- """
188
- 检查请求头中的 Authorization 字段
189
- 是否匹配环境变量 AUTHORIZATION_KEY。
190
- """
191
  authorization_key = os.environ.get("AUTHORIZATION_KEY")
192
  if not authorization_key:
193
  logging.warning("环境变量 AUTHORIZATION_KEY 未设置,请设置后重试。")
@@ -198,9 +167,9 @@ def check_authorization(request):
198
  logging.warning("请求头中缺少 Authorization 字段。")
199
  return False
200
 
201
- if auth_header != f"Bearer {authorization_key}":
202
- logging.warning(f"无效的 Authorization 密钥:{auth_header}")
203
- return False
204
 
205
  return True
206
 
@@ -270,12 +239,12 @@ def check_tokens():
270
  )
271
 
272
  return jsonify(results)
273
-
274
  @app.route('/handsome/v1/models', methods=['GET'])
275
  def list_models():
276
  if not check_authorization(request):
277
  return jsonify({"error": "Unauthorized"}), 401
278
-
279
  detailed_models = [
280
  {
281
  "id": "deepseek-chat",
@@ -379,38 +348,6 @@ def billing_usage():
379
  "total_usage": 0
380
  })
381
 
382
- @app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
383
- def billing_subscription():
384
- if not check_authorization(request):
385
- return jsonify({"error": "Unauthorized"}), 401
386
-
387
- total_balance = get_billing_info()
388
-
389
- return jsonify({
390
- "object": "billing_subscription",
391
- "has_payment_method": False,
392
- "canceled": False,
393
- "canceled_at": None,
394
- "delinquent": None,
395
- "access_until": int(datetime(9999, 12, 31).timestamp()),
396
- "soft_limit": 0,
397
- "hard_limit": total_balance,
398
- "system_hard_limit": total_balance,
399
- "soft_limit_usd": 0,
400
- "hard_limit_usd": total_balance,
401
- "system_hard_limit_usd": total_balance,
402
- "plan": {
403
- "name": "SiliconFlow API",
404
- "id": "siliconflow-api"
405
- },
406
- "account_name": "SiliconFlow User",
407
- "po_number": None,
408
- "billing_email": None,
409
- "tax_ids": [],
410
- "billing_address": None,
411
- "business_address": None
412
- })
413
-
414
  @app.route('/handsome/v1/chat/completions', methods=['POST'])
415
  def handsome_chat_completions():
416
  if not check_authorization(request):
@@ -421,7 +358,7 @@ def handsome_chat_completions():
421
  return jsonify({"error": "Invalid request data"}), 400
422
 
423
  model_name = data['model']
424
-
425
  api_key = select_key(model_name)
426
 
427
  if not api_key:
@@ -434,7 +371,7 @@ def handsome_chat_completions():
434
  )
435
  }
436
  ), 429
437
-
438
  if model_name == "deepseek-reasoner":
439
  for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
440
  if param in data:
@@ -444,7 +381,7 @@ def handsome_chat_completions():
444
  "Authorization": f"Bearer {api_key}",
445
  "Content-Type": "application/json"
446
  }
447
-
448
  try:
449
  start_time = time.time()
450
  response = requests.post(
@@ -462,12 +399,40 @@ def handsome_chat_completions():
462
  def generate():
463
  first_chunk_time = None
464
  full_response_content = ""
 
 
 
 
465
  for chunk in response.iter_content(chunk_size=1024):
466
  if chunk:
467
  if first_chunk_time is None:
468
  first_chunk_time = time.time()
469
  full_response_content += chunk.decode("utf-8")
470
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
  end_time = time.time()
473
  first_token_time = (
@@ -478,7 +443,6 @@ def handsome_chat_completions():
478
 
479
  prompt_tokens = 0
480
  completion_tokens = 0
481
- response_content = ""
482
  for line in full_response_content.splitlines():
483
  if line.startswith("data:"):
484
  line = line[5:].strip()
@@ -491,22 +455,9 @@ def handsome_chat_completions():
491
  "usage" in response_json and
492
  "completion_tokens" in response_json["usage"]
493
  ):
494
- completion_tokens = response_json[
495
  "usage"
496
  ]["completion_tokens"]
497
-
498
- if (
499
- "choices" in response_json and
500
- len(response_json["choices"]) > 0 and
501
- "delta" in response_json["choices"][0] and
502
- "content" in response_json[
503
- "choices"
504
- ][0]["delta"]
505
- ):
506
- response_content += response_json[
507
- "choices"
508
- ][0]["delta"]["content"]
509
-
510
  if (
511
  "usage" in response_json and
512
  "prompt_tokens" in response_json["usage"]
@@ -547,7 +498,8 @@ def handsome_chat_completions():
547
  user_content_replaced = user_content.replace(
548
  '\n', '\\n'
549
  ).replace('\r', '\\n')
550
- response_content_replaced = response_content.replace(
 
551
  '\n', '\\n'
552
  ).replace('\r', '\\n')
553
 
@@ -564,13 +516,16 @@ def handsome_chat_completions():
564
 
565
  with data_lock:
566
  request_timestamps.append(time.time())
567
- token_counts.append(prompt_tokens+completion_tokens)
 
 
568
 
569
  return Response(
570
  stream_with_context(generate()),
571
- content_type=response.headers['Content-Type']
572
  )
573
  else:
 
574
  response.raise_for_status()
575
  end_time = time.time()
576
  response_json = response.json()
@@ -578,12 +533,21 @@ def handsome_chat_completions():
578
 
579
  try:
580
  prompt_tokens = response_json["usage"]["prompt_tokens"]
581
- completion_tokens = response_json[
582
- "usage"
583
- ]["completion_tokens"]
584
- response_content = response_json[
585
- "choices"
586
- ][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
587
  except (KeyError, ValueError, IndexError) as e:
588
  logging.error(
589
  f"解析非流式响应 JSON 失败: {e}, "
@@ -606,7 +570,8 @@ def handsome_chat_completions():
606
  item.get("type") == "text"
607
  ):
608
  user_content += (
609
- item.get("text", "") + " "
 
610
  )
611
 
612
  user_content = user_content.strip()
@@ -630,12 +595,32 @@ def handsome_chat_completions():
630
  )
631
  with data_lock:
632
  request_timestamps.append(time.time())
633
- if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
634
- token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
635
- else:
636
- token_counts.append(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
 
638
- return jsonify(response_json)
639
 
640
  except requests.exceptions.RequestException as e:
641
  logging.error(f"请求转发异常: {e}")
@@ -661,4 +646,4 @@ if __name__ == '__main__':
661
  debug=False,
662
  host='0.0.0.0',
663
  port=int(os.environ.get('PORT', 7860))
664
- )
 
3
  import logging
4
  import requests
5
  import json
 
6
  import uuid
7
  import concurrent.futures
8
  import threading
 
 
 
9
  from datetime import datetime, timedelta
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
  from flask import Flask, request, jsonify, Response, stream_with_context
12
 
 
 
 
13
  logging.basicConfig(level=logging.INFO,
14
  format='%(asctime)s - %(levelname)s - %(message)s')
15
 
 
32
  data_lock = threading.Lock()
33
 
34
  def get_credit_summary(api_key):
 
 
 
35
  headers = {
36
  "Authorization": f"Bearer {api_key}",
37
  "Content-Type": "application/json"
 
43
  if not data.get("is_available", False):
44
  logging.warning(f"API Key: {api_key} is not available.")
45
  return None
46
+
47
  balance_infos = data.get("balance_infos", [])
48
  total_balance_cny = 0.0
49
  usd_balance = 0.0
 
77
  return None
78
 
79
  def get_usd_to_cny_rate():
 
 
 
 
80
  try:
81
  response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
82
  response.raise_for_status()
 
91
  logging.info(f"所有文本模型列表:{text_models}")
92
 
93
  def load_keys():
 
 
 
 
 
 
94
  keys_str = os.environ.get("KEYS")
95
  keys = [key.strip() for key in keys_str.split(',')]
96
  unique_keys = list(set(keys))
 
132
  valid_keys_global = valid_keys
133
 
134
  def process_key(key):
 
 
 
135
  credit_summary = get_credit_summary(key)
136
  if credit_summary is None:
137
  return "invalid"
 
143
  return "valid"
144
 
145
  def select_key(model_name):
 
 
 
 
146
  available_keys = valid_keys_global
147
 
148
  current_index = model_key_indices.get(model_name, 0)
 
157
  return None
158
 
159
  def check_authorization(request):
 
 
 
 
160
  authorization_key = os.environ.get("AUTHORIZATION_KEY")
161
  if not authorization_key:
162
  logging.warning("环境变量 AUTHORIZATION_KEY 未设置,请设置后重试。")
 
167
  logging.warning("请求头中缺少 Authorization 字段。")
168
  return False
169
 
170
+ # if auth_header != f"Bearer {authorization_key}":
171
+ # logging.warning(f"无效的 Authorization 密钥:{auth_header}")
172
+ # return False
173
 
174
  return True
175
 
 
239
  )
240
 
241
  return jsonify(results)
242
+
243
  @app.route('/handsome/v1/models', methods=['GET'])
244
  def list_models():
245
  if not check_authorization(request):
246
  return jsonify({"error": "Unauthorized"}), 401
247
+
248
  detailed_models = [
249
  {
250
  "id": "deepseek-chat",
 
348
  "total_usage": 0
349
  })
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  @app.route('/handsome/v1/chat/completions', methods=['POST'])
352
  def handsome_chat_completions():
353
  if not check_authorization(request):
 
358
  return jsonify({"error": "Invalid request data"}), 400
359
 
360
  model_name = data['model']
361
+
362
  api_key = select_key(model_name)
363
 
364
  if not api_key:
 
371
  )
372
  }
373
  ), 429
374
+
375
  if model_name == "deepseek-reasoner":
376
  for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
377
  if param in data:
 
381
  "Authorization": f"Bearer {api_key}",
382
  "Content-Type": "application/json"
383
  }
384
+
385
  try:
386
  start_time = time.time()
387
  response = requests.post(
 
399
  def generate():
400
  first_chunk_time = None
401
  full_response_content = ""
402
+ reasoning_content_accumulated = ""
403
+ content_accumulated = ""
404
+ first_reasoning_chunk = True
405
+
406
  for chunk in response.iter_content(chunk_size=1024):
407
  if chunk:
408
  if first_chunk_time is None:
409
  first_chunk_time = time.time()
410
  full_response_content += chunk.decode("utf-8")
411
+
412
+ for line in chunk.decode("utf-8").splitlines():
413
+ if line.startswith("data:"):
414
+ try:
415
+ chunk_json = json.loads(line.lstrip("data: ").strip())
416
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
417
+ delta = chunk_json["choices"][0].get("delta", {})
418
+
419
+ if delta.get("reasoning_content") is not None:
420
+ reasoning_chunk = delta["reasoning_content"]
421
+ if first_reasoning_chunk:
422
+ reasoning_chunk = "```Thinking\n" + reasoning_chunk
423
+ first_reasoning_chunk = False
424
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
425
+
426
+ if delta.get("content") is not None:
427
+ if not first_reasoning_chunk:
428
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n```'}, 'index': 0}]})}\n\n"
429
+ first_reasoning_chunk = True
430
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
431
+
432
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
433
+ logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
434
+ continue
435
+
436
 
437
  end_time = time.time()
438
  first_token_time = (
 
443
 
444
  prompt_tokens = 0
445
  completion_tokens = 0
 
446
  for line in full_response_content.splitlines():
447
  if line.startswith("data:"):
448
  line = line[5:].strip()
 
455
  "usage" in response_json and
456
  "completion_tokens" in response_json["usage"]
457
  ):
458
+ completion_tokens += response_json[
459
  "usage"
460
  ]["completion_tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  if (
462
  "usage" in response_json and
463
  "prompt_tokens" in response_json["usage"]
 
498
  user_content_replaced = user_content.replace(
499
  '\n', '\\n'
500
  ).replace('\r', '\\n')
501
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
502
+ response_content_replaced = response_content_replaced.replace(
503
  '\n', '\\n'
504
  ).replace('\r', '\\n')
505
 
 
516
 
517
  with data_lock:
518
  request_timestamps.append(time.time())
519
+ token_counts.append(prompt_tokens + completion_tokens)
520
+
521
+ yield "data: [DONE]\n\n"
522
 
523
  return Response(
524
  stream_with_context(generate()),
525
+ content_type="text/event-stream"
526
  )
527
  else:
528
+ # ... (Non-streaming part remains the same as in the previous response)
529
  response.raise_for_status()
530
  end_time = time.time()
531
  response_json = response.json()
 
533
 
534
  try:
535
  prompt_tokens = response_json["usage"]["prompt_tokens"]
536
+ completion_tokens = response_json["usage"]["completion_tokens"]
537
+ response_content = ""
538
+
539
+ # Special handling for deepseek-reasoner in non-streaming mode
540
+ if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
541
+ choice = response_json["choices"][0]
542
+ if "message" in choice:
543
+ if "reasoning_content" in choice["message"]:
544
+ formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
545
+ response_content += formatted_reasoning + "\n"
546
+ if "content" in choice["message"]:
547
+ response_content += choice["message"]["content"]
548
+ elif "choices" in response_json and len(response_json["choices"]) > 0:
549
+ response_content = response_json["choices"][0]["message"]["content"]
550
+
551
  except (KeyError, ValueError, IndexError) as e:
552
  logging.error(
553
  f"解析非流式响应 JSON 失败: {e}, "
 
570
  item.get("type") == "text"
571
  ):
572
  user_content += (
573
+ item.get("text", "") +
574
+ " "
575
  )
576
 
577
  user_content = user_content.strip()
 
595
  )
596
  with data_lock:
597
  request_timestamps.append(time.time())
598
+ token_counts.append(prompt_tokens + completion_tokens)
599
+
600
+ # Reformat the response to standard OpenAI format for non-streaming responses
601
+ formatted_response = {
602
+ "id": response_json.get("id", ""),
603
+ "object": "chat.completion",
604
+ "created": response_json.get("created", int(time.time())),
605
+ "model": model_name,
606
+ "choices": [
607
+ {
608
+ "index": 0,
609
+ "message": {
610
+ "role": "assistant",
611
+ "content": response_content
612
+ },
613
+ "finish_reason": "stop"
614
+ }
615
+ ],
616
+ "usage": {
617
+ "prompt_tokens": prompt_tokens,
618
+ "completion_tokens": completion_tokens,
619
+ "total_tokens": prompt_tokens + completion_tokens
620
+ }
621
+ }
622
 
623
+ return jsonify(formatted_response)
624
 
625
  except requests.exceptions.RequestException as e:
626
  logging.error(f"请求转发异常: {e}")
 
646
  debug=False,
647
  host='0.0.0.0',
648
  port=int(os.environ.get('PORT', 7860))
649
+ )