Spaces:
Running
Running
Upload 4 files
Browse files
app.py
CHANGED
@@ -90,10 +90,14 @@ class ResponseWrapper:
|
|
90 |
self._data = data
|
91 |
self._text = self._extract_text()
|
92 |
self._finish_reason = self._extract_finish_reason()
|
|
|
|
|
|
|
93 |
self._prompt_token_count = self._extract_prompt_token_count()
|
94 |
self._candidates_token_count = self._extract_candidates_token_count()
|
95 |
self._total_token_count = self._extract_total_token_count()
|
96 |
self._thoughts = self._extract_thoughts()
|
|
|
97 |
|
98 |
def _extract_thoughts(self) -> Optional[str]:
|
99 |
try:
|
@@ -157,6 +161,14 @@ class ResponseWrapper:
|
|
157 |
def total_token_count(self) -> Optional[int]:
|
158 |
return self._total_token_count
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
class APIKeyManager:
|
161 |
def __init__(self):
|
162 |
self.api_keys = re.findall(r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('KeyArray'))
|
@@ -392,7 +404,7 @@ def chat_completions():
|
|
392 |
response_type = 'streamGenerateContent' if stream else 'generateContent'
|
393 |
is_SSE = '&alt=sse' if stream else ''
|
394 |
|
395 |
-
|
396 |
|
397 |
if error_response:
|
398 |
logger.error(f"处理输入消息时出错↙\n {error_response}")
|
@@ -414,7 +426,7 @@ def chat_completions():
|
|
414 |
}
|
415 |
|
416 |
data = {
|
417 |
-
"contents":
|
418 |
"generationConfig": {
|
419 |
"temperature": temperature,
|
420 |
"maxOutputTokens": max_tokens,
|
@@ -429,15 +441,16 @@ def chat_completions():
|
|
429 |
response.raise_for_status()
|
430 |
|
431 |
if stream:
|
432 |
-
|
433 |
return 1, response
|
434 |
else:
|
435 |
-
|
436 |
return 1, ResponseWrapper(response.json())
|
437 |
except requests.exceptions.RequestException as e:
|
438 |
return handle_api_error(e, attempt)
|
|
|
|
|
439 |
|
440 |
def generate_stream(response):
|
|
|
441 |
buffer = b""
|
442 |
try:
|
443 |
for line in response.iter_lines():
|
@@ -486,9 +499,10 @@ def chat_completions():
|
|
486 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
487 |
|
488 |
yield f"data: {json.dumps({'choices': [{'delta': {}, 'finish_reason': 'stop', 'index': 0}]})}\n\n"
|
489 |
-
|
|
|
490 |
except Exception as e:
|
491 |
-
logger.error(f"
|
492 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
493 |
|
494 |
attempt = 0
|
@@ -498,8 +512,8 @@ def chat_completions():
|
|
498 |
logger.info(f"第 {attempt}/{MAX_RETRIES} 次尝试 ...")
|
499 |
success, response = do_request(current_api_key, attempt)
|
500 |
|
501 |
-
if success ==
|
502 |
-
|
503 |
elif success == 2:
|
504 |
|
505 |
logger.error(f"{model} 很可能暂时不可用,请更换模型或未来一段时间再试")
|
@@ -511,6 +525,48 @@ def chat_completions():
|
|
511 |
}
|
512 |
return jsonify(response), 503
|
513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
514 |
else:
|
515 |
logger.error(f"{MAX_RETRIES} 次尝试均失败,请调整配置,或等待官方恢复,或向Moonfanz反馈")
|
516 |
response = {
|
@@ -521,52 +577,6 @@ def chat_completions():
|
|
521 |
}
|
522 |
return jsonify(response), 500 if response is not None else 503
|
523 |
|
524 |
-
if stream:
|
525 |
-
return Response(
|
526 |
-
stream_with_context(generate_stream(response)),
|
527 |
-
mimetype='text/event-stream'
|
528 |
-
)
|
529 |
-
else:
|
530 |
-
try:
|
531 |
-
text_content = response.text
|
532 |
-
prompt_tokens = response.prompt_token_count
|
533 |
-
completion_tokens = response.candidates_token_count
|
534 |
-
total_tokens = response.total_token_count
|
535 |
-
finish_reason = response.finish_reason
|
536 |
-
if is_thinking and show_thoughts:
|
537 |
-
# 把thoughts加到text_content的前面再加一个回车
|
538 |
-
text_content = response.thoughts + '\n' + text_content
|
539 |
-
logger.info(f"finish_reason: {finish_reason}")
|
540 |
-
except AttributeError as e:
|
541 |
-
return jsonify({
|
542 |
-
'error': {
|
543 |
-
'message': 'AI响应处理失败',
|
544 |
-
'type': 'response_processing_error'
|
545 |
-
}
|
546 |
-
}), 500
|
547 |
-
|
548 |
-
response_data = {
|
549 |
-
'id': 'chatcmpl-xxxxxxxxxxxx',
|
550 |
-
'object': 'chat.completion',
|
551 |
-
'created': int(datetime.now().timestamp()),
|
552 |
-
'model': model,
|
553 |
-
'choices': [{
|
554 |
-
'index': 0,
|
555 |
-
'message': {
|
556 |
-
'role': 'assistant',
|
557 |
-
'content': text_content
|
558 |
-
},
|
559 |
-
'finish_reason': finish_reason
|
560 |
-
}],
|
561 |
-
'usage': {
|
562 |
-
'prompt_tokens': prompt_tokens,
|
563 |
-
'completion_tokens': completion_tokens,
|
564 |
-
'total_tokens': total_tokens
|
565 |
-
}
|
566 |
-
}
|
567 |
-
logger.info(f"200!")
|
568 |
-
return jsonify(response_data)
|
569 |
-
|
570 |
@app.route('/hf/v1/models', methods=['GET'])
|
571 |
def list_models():
|
572 |
response = {"object": "list", "data": GEMINI_MODELS}
|
|
|
90 |
self._data = data
|
91 |
self._text = self._extract_text()
|
92 |
self._finish_reason = self._extract_finish_reason()
|
93 |
+
if self.finish_reason != "STOP":
|
94 |
+
# 抛出错误
|
95 |
+
raise StopCandidateException(f"生成文本失败: {self.finish_reason}")
|
96 |
self._prompt_token_count = self._extract_prompt_token_count()
|
97 |
self._candidates_token_count = self._extract_candidates_token_count()
|
98 |
self._total_token_count = self._extract_total_token_count()
|
99 |
self._thoughts = self._extract_thoughts()
|
100 |
+
self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
|
101 |
|
102 |
def _extract_thoughts(self) -> Optional[str]:
|
103 |
try:
|
|
|
161 |
def total_token_count(self) -> Optional[int]:
|
162 |
return self._total_token_count
|
163 |
|
164 |
+
@property
|
165 |
+
def thoughts(self) -> Optional[str]:
|
166 |
+
return self._thoughts
|
167 |
+
|
168 |
+
@property
|
169 |
+
def json_dumps(self) -> str:
|
170 |
+
return self._json_dumps
|
171 |
+
|
172 |
class APIKeyManager:
|
173 |
def __init__(self):
|
174 |
self.api_keys = re.findall(r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('KeyArray'))
|
|
|
404 |
response_type = 'streamGenerateContent' if stream else 'generateContent'
|
405 |
is_SSE = '&alt=sse' if stream else ''
|
406 |
|
407 |
+
contents, system_instruction, error_response = func.process_messages_for_gemini(messages)
|
408 |
|
409 |
if error_response:
|
410 |
logger.error(f"处理输入消息时出错↙\n {error_response}")
|
|
|
426 |
}
|
427 |
|
428 |
data = {
|
429 |
+
"contents": contents,
|
430 |
"generationConfig": {
|
431 |
"temperature": temperature,
|
432 |
"maxOutputTokens": max_tokens,
|
|
|
441 |
response.raise_for_status()
|
442 |
|
443 |
if stream:
|
|
|
444 |
return 1, response
|
445 |
else:
|
|
|
446 |
return 1, ResponseWrapper(response.json())
|
447 |
except requests.exceptions.RequestException as e:
|
448 |
return handle_api_error(e, attempt)
|
449 |
+
except StopCandidateException as e:
|
450 |
+
return handle_api_error(e, attempt)
|
451 |
|
452 |
def generate_stream(response):
|
453 |
+
logger.info(f"流式开始 →")
|
454 |
buffer = b""
|
455 |
try:
|
456 |
for line in response.iter_lines():
|
|
|
499 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
500 |
|
501 |
yield f"data: {json.dumps({'choices': [{'delta': {}, 'finish_reason': 'stop', 'index': 0}]})}\n\n"
|
502 |
+
logger.info(f"流式结束 ←")
|
503 |
+
logger.info(f"200!")
|
504 |
except Exception as e:
|
505 |
+
logger.error(f"流式处理错误↙\n{e}")
|
506 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
507 |
|
508 |
attempt = 0
|
|
|
512 |
logger.info(f"第 {attempt}/{MAX_RETRIES} 次尝试 ...")
|
513 |
success, response = do_request(current_api_key, attempt)
|
514 |
|
515 |
+
if success == 0:
|
516 |
+
continue
|
517 |
elif success == 2:
|
518 |
|
519 |
logger.error(f"{model} 很可能暂时不可用,请更换模型或未来一段时间再试")
|
|
|
525 |
}
|
526 |
return jsonify(response), 503
|
527 |
|
528 |
+
if stream:
|
529 |
+
return Response(
|
530 |
+
stream_with_context(generate_stream(response)),
|
531 |
+
mimetype='text/event-stream'
|
532 |
+
)
|
533 |
+
else:
|
534 |
+
try:
|
535 |
+
text_content = response.text
|
536 |
+
prompt_tokens = response.prompt_token_count
|
537 |
+
completion_tokens = response.candidates_token_count
|
538 |
+
total_tokens = response.total_token_count
|
539 |
+
finish_reason = response.finish_reason
|
540 |
+
json_dumps = response.json_dumps
|
541 |
+
logger.info(f"AI响应处理成功↓\n{json_dumps}")
|
542 |
+
if is_thinking and show_thoughts:
|
543 |
+
text_content = response.thoughts + '\n' + text_content
|
544 |
+
except StopCandidateException as e:
|
545 |
+
logger.error(f"生成内容失败↙\n{e}")
|
546 |
+
continue
|
547 |
+
|
548 |
+
response_data = {
|
549 |
+
'id': 'chatcmpl-xxxxxxxxxxxx',
|
550 |
+
'object': 'chat.completion',
|
551 |
+
'created': int(datetime.now().timestamp()),
|
552 |
+
'model': model,
|
553 |
+
'choices': [{
|
554 |
+
'index': 0,
|
555 |
+
'message': {
|
556 |
+
'role': 'assistant',
|
557 |
+
'content': text_content
|
558 |
+
},
|
559 |
+
'finish_reason': finish_reason
|
560 |
+
}],
|
561 |
+
'usage': {
|
562 |
+
'prompt_tokens': prompt_tokens,
|
563 |
+
'completion_tokens': completion_tokens,
|
564 |
+
'total_tokens': total_tokens
|
565 |
+
}
|
566 |
+
}
|
567 |
+
logger.info(f"200!")
|
568 |
+
return jsonify(response_data)
|
569 |
+
|
570 |
else:
|
571 |
logger.error(f"{MAX_RETRIES} 次尝试均失败,请调整配置,或等待官方恢复,或向Moonfanz反馈")
|
572 |
response = {
|
|
|
577 |
}
|
578 |
return jsonify(response), 500 if response is not None else 503
|
579 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
@app.route('/hf/v1/models', methods=['GET'])
|
581 |
def list_models():
|
582 |
response = {"object": "list", "data": GEMINI_MODELS}
|
func.py
CHANGED
@@ -35,7 +35,7 @@ def authenticate_request(request):
|
|
35 |
return True, None, None
|
36 |
|
37 |
def process_messages_for_gemini(messages):
|
38 |
-
|
39 |
errors = []
|
40 |
system_instruction_text = ""
|
41 |
is_system_phase = True
|
@@ -53,11 +53,11 @@ def process_messages_for_gemini(messages):
|
|
53 |
is_system_phase = False
|
54 |
|
55 |
if role == 'user':
|
56 |
-
|
57 |
elif role == 'system':
|
58 |
-
|
59 |
elif role == 'assistant':
|
60 |
-
|
61 |
else:
|
62 |
errors.append(f"Invalid role: {role}")
|
63 |
elif isinstance(content, list):
|
@@ -100,13 +100,13 @@ def process_messages_for_gemini(messages):
|
|
100 |
|
101 |
if parts:
|
102 |
if role in ['user', 'system']:
|
103 |
-
|
104 |
elif role in ['assistant']:
|
105 |
-
|
106 |
else:
|
107 |
errors.append(f"Invalid role: {role}")
|
108 |
|
109 |
if errors:
|
110 |
-
return
|
111 |
else:
|
112 |
-
return
|
|
|
35 |
return True, None, None
|
36 |
|
37 |
def process_messages_for_gemini(messages):
|
38 |
+
contents = []
|
39 |
errors = []
|
40 |
system_instruction_text = ""
|
41 |
is_system_phase = True
|
|
|
53 |
is_system_phase = False
|
54 |
|
55 |
if role == 'user':
|
56 |
+
contents.append({"role": "user", "parts": [{"text": content}]})
|
57 |
elif role == 'system':
|
58 |
+
contents.append({"role": "user", "parts": [{"text": content}]})
|
59 |
elif role == 'assistant':
|
60 |
+
contents.append({"role": "model", "parts": [{"text": content}]})
|
61 |
else:
|
62 |
errors.append(f"Invalid role: {role}")
|
63 |
elif isinstance(content, list):
|
|
|
100 |
|
101 |
if parts:
|
102 |
if role in ['user', 'system']:
|
103 |
+
contents.append({"role": "user", "parts": parts})
|
104 |
elif role in ['assistant']:
|
105 |
+
contents.append({"role": "model", "parts": parts})
|
106 |
else:
|
107 |
errors.append(f"Invalid role: {role}")
|
108 |
|
109 |
if errors:
|
110 |
+
return contents, {"parts": [{"text": system_instruction_text}]}, (jsonify({'error': errors}), 400)
|
111 |
else:
|
112 |
+
return contents, {"parts": [{"text": system_instruction_text}]}, None
|