Update api/utils.py
Browse files- api/utils.py +28 -25
api/utils.py
CHANGED
@@ -120,9 +120,8 @@ async def process_streaming_response(request: ChatRequest):
|
|
120 |
|
121 |
json_data = build_json_data(request, h_value, model_prefix)
|
122 |
|
123 |
-
# Initialize buffer to handle BLOCKED_MESSAGE
|
124 |
-
|
125 |
-
buffer_size = len(BLOCKED_MESSAGE) - 1
|
126 |
|
127 |
async with httpx.AsyncClient() as client:
|
128 |
try:
|
@@ -136,32 +135,36 @@ async def process_streaming_response(request: ChatRequest):
|
|
136 |
response.raise_for_status()
|
137 |
async for chunk in response.aiter_text():
|
138 |
if chunk:
|
139 |
-
buffer
|
|
|
140 |
|
141 |
-
# Remove any occurrence of BLOCKED_MESSAGE in
|
142 |
-
if BLOCKED_MESSAGE in
|
143 |
logger.info("Blocked message detected in response.")
|
144 |
-
|
145 |
-
|
146 |
-
#
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
161 |
logger.info("Blocked message detected in remaining buffer.")
|
162 |
-
|
163 |
|
164 |
-
cleaned_content = strip_model_prefix(
|
165 |
timestamp = int(datetime.now().timestamp())
|
166 |
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
167 |
|
|
|
120 |
|
121 |
json_data = build_json_data(request, h_value, model_prefix)
|
122 |
|
123 |
+
# Initialize rolling buffer to handle BLOCKED_MESSAGE split across chunks
|
124 |
+
rolling_buffer = ""
|
|
|
125 |
|
126 |
async with httpx.AsyncClient() as client:
|
127 |
try:
|
|
|
135 |
response.raise_for_status()
|
136 |
async for chunk in response.aiter_text():
|
137 |
if chunk:
|
138 |
+
# Combine rolling buffer with current chunk
|
139 |
+
combined_chunk = rolling_buffer + chunk
|
140 |
|
141 |
+
# Remove any occurrence of BLOCKED_MESSAGE in combined_chunk
|
142 |
+
if BLOCKED_MESSAGE in combined_chunk:
|
143 |
logger.info("Blocked message detected in response.")
|
144 |
+
combined_chunk = combined_chunk.replace(BLOCKED_MESSAGE, '')
|
145 |
+
|
146 |
+
# Remove model prefix if present
|
147 |
+
cleaned_content = strip_model_prefix(combined_chunk, model_prefix)
|
148 |
+
|
149 |
+
# Yield the cleaned content
|
150 |
+
timestamp = int(datetime.now().timestamp())
|
151 |
+
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
152 |
+
|
153 |
+
# Update rolling buffer with the end of the combined_chunk
|
154 |
+
# Keep only the last len(BLOCKED_MESSAGE) - 1 characters
|
155 |
+
rolling_buffer = combined_chunk[-(len(BLOCKED_MESSAGE) - 1):]
|
156 |
+
else:
|
157 |
+
# If chunk is empty, reset rolling buffer
|
158 |
+
rolling_buffer = ""
|
159 |
+
|
160 |
+
# After streaming is done, check if any remaining content is in the rolling buffer
|
161 |
+
if rolling_buffer:
|
162 |
+
# Remove any occurrence of BLOCKED_MESSAGE in rolling buffer
|
163 |
+
if BLOCKED_MESSAGE in rolling_buffer:
|
164 |
logger.info("Blocked message detected in remaining buffer.")
|
165 |
+
rolling_buffer = rolling_buffer.replace(BLOCKED_MESSAGE, '')
|
166 |
|
167 |
+
cleaned_content = strip_model_prefix(rolling_buffer, model_prefix)
|
168 |
timestamp = int(datetime.now().timestamp())
|
169 |
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
170 |
|