Update app.py
Browse files
app.py
CHANGED
@@ -63,7 +63,7 @@ def st_capture(output_func: Callable[[str], None]) -> Generator:
|
|
63 |
stdout.write = new_write # type: ignore
|
64 |
yield
|
65 |
|
66 |
-
async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=
|
67 |
# First construct messages
|
68 |
messages = []
|
69 |
if system_prompt is not None:
|
@@ -94,10 +94,10 @@ async def run_samba_api_inference(query, system_prompt = None, ignore_context=Fa
|
|
94 |
post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
|
95 |
post_response.raise_for_status()
|
96 |
except requests.exceptions.HTTPError as e:
|
97 |
-
if post_response.status_code in {401, 503
|
98 |
st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
|
99 |
return ""
|
100 |
-
if post_response.status_code in {429}:
|
101 |
st.info("Rate limit hit because of all the pipelined queries, wait one second...")
|
102 |
await asyncio.sleep(num_seconds_to_sleep)
|
103 |
return await run_samba_api_inference(query) # Retry the request
|
|
|
63 |
stdout.write = new_write # type: ignore
|
64 |
yield
|
65 |
|
66 |
+
async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=5):
|
67 |
# First construct messages
|
68 |
messages = []
|
69 |
if system_prompt is not None:
|
|
|
94 |
post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
|
95 |
post_response.raise_for_status()
|
96 |
except requests.exceptions.HTTPError as e:
|
97 |
+
if post_response.status_code in {401, 503}:
|
98 |
st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
|
99 |
return ""
|
100 |
+
if post_response.status_code in {429, 504}:
|
101 |
st.info("Rate limit hit because of all the pipelined queries, wait one second...")
|
102 |
await asyncio.sleep(num_seconds_to_sleep)
|
103 |
return await run_samba_api_inference(query) # Retry the request
|