test24

Sleeping

App Files Files Community

Niansuh commited on Nov 18, 2024

Commit

7fb95d2

verified ·

1 Parent(s): e710865

Update api/utils.py

Browse files

Files changed (1) hide show

api/utils.py +28 -25

api/utils.py CHANGED Viewed

@@ -120,9 +120,8 @@ async def process_streaming_response(request: ChatRequest):
     json_data = build_json_data(request, h_value, model_prefix)
-    # Initialize buffer to handle BLOCKED_MESSAGE that may be split across chunks
-    buffer = ""
-    buffer_size = len(BLOCKED_MESSAGE) - 1
     async with httpx.AsyncClient() as client:
         try:
@@ -136,32 +135,36 @@ async def process_streaming_response(request: ChatRequest):
                 response.raise_for_status()
                 async for chunk in response.aiter_text():
                     if chunk:
-                        buffer += chunk
-                        # Remove any occurrence of BLOCKED_MESSAGE in buffer
-                        if BLOCKED_MESSAGE in buffer:
                             logger.info("Blocked message detected in response.")
-                            buffer = buffer.replace(BLOCKED_MESSAGE, '')
-                        # Process the buffer
-                        # Keep last buffer_size characters in buffer to handle partial BLOCKED_MESSAGE
-                        while len(buffer) >= buffer_size:
-                            content_to_yield = buffer[:-buffer_size+1]
-                            buffer = buffer[-buffer_size+1:]
-                            # Remove model prefix if present
-                            cleaned_content = strip_model_prefix(content_to_yield, model_prefix)
-                            timestamp = int(datetime.now().timestamp())
-                            yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
-                # Process any remaining content in buffer
-                if buffer:
-                    # Remove any occurrence of BLOCKED_MESSAGE in buffer
-                    if BLOCKED_MESSAGE in buffer:
                         logger.info("Blocked message detected in remaining buffer.")
-                        buffer = buffer.replace(BLOCKED_MESSAGE, '')
-                    cleaned_content = strip_model_prefix(buffer, model_prefix)
                     timestamp = int(datetime.now().timestamp())
                     yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"

     json_data = build_json_data(request, h_value, model_prefix)
+    # Initialize rolling buffer to handle BLOCKED_MESSAGE split across chunks
+    rolling_buffer = ""
     async with httpx.AsyncClient() as client:
         try:
                 response.raise_for_status()
                 async for chunk in response.aiter_text():
                     if chunk:
+                        # Combine rolling buffer with current chunk
+                        combined_chunk = rolling_buffer + chunk
+                        # Remove any occurrence of BLOCKED_MESSAGE in combined_chunk
+                        if BLOCKED_MESSAGE in combined_chunk:
                             logger.info("Blocked message detected in response.")
+                            combined_chunk = combined_chunk.replace(BLOCKED_MESSAGE, '')
+                        # Remove model prefix if present
+                        cleaned_content = strip_model_prefix(combined_chunk, model_prefix)
+                        # Yield the cleaned content
+                        timestamp = int(datetime.now().timestamp())
+                        yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
+                        # Update rolling buffer with the end of the combined_chunk
+                        # Keep only the last len(BLOCKED_MESSAGE) - 1 characters
+                        rolling_buffer = combined_chunk[-(len(BLOCKED_MESSAGE) - 1):]
+                    else:
+                        # If chunk is empty, reset rolling buffer
+                        rolling_buffer = ""
+                # After streaming is done, check if any remaining content is in the rolling buffer
+                if rolling_buffer:
+                    # Remove any occurrence of BLOCKED_MESSAGE in rolling buffer
+                    if BLOCKED_MESSAGE in rolling_buffer:
                         logger.info("Blocked message detected in remaining buffer.")
+                        rolling_buffer = rolling_buffer.replace(BLOCKED_MESSAGE, '')
+                    cleaned_content = strip_model_prefix(rolling_buffer, model_prefix)
                     timestamp = int(datetime.now().timestamp())
                     yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"