zolicsaki commited on
Commit
96c79d5
·
verified ·
1 Parent(s): c18ceb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -63,7 +63,7 @@ def st_capture(output_func: Callable[[str], None]) -> Generator:
63
  stdout.write = new_write # type: ignore
64
  yield
65
 
66
- async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=2):
67
  # First construct messages
68
  messages = []
69
  if system_prompt is not None:
@@ -94,10 +94,10 @@ async def run_samba_api_inference(query, system_prompt = None, ignore_context=Fa
94
  post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
95
  post_response.raise_for_status()
96
  except requests.exceptions.HTTPError as e:
97
- if post_response.status_code in {401, 503, 504}:
98
  st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
99
  return ""
100
- if post_response.status_code in {429}:
101
  st.info("Rate limit hit because of all the pipelined queries, wait one second...")
102
  await asyncio.sleep(num_seconds_to_sleep)
103
  return await run_samba_api_inference(query) # Retry the request
 
63
  stdout.write = new_write # type: ignore
64
  yield
65
 
66
+ async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=5):
67
  # First construct messages
68
  messages = []
69
  if system_prompt is not None:
 
94
  post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
95
  post_response.raise_for_status()
96
  except requests.exceptions.HTTPError as e:
97
+ if post_response.status_code in {401, 503}:
98
  st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
99
  return ""
100
+ if post_response.status_code in {429, 504}:
101
  st.info("Rate limit hit because of all the pipelined queries, wait one second...")
102
  await asyncio.sleep(num_seconds_to_sleep)
103
  return await run_samba_api_inference(query) # Retry the request