ffreemt commited on
Commit
1681f8a
1 Parent(s): bd2d2e2
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -47,7 +47,8 @@ _ = (
47
  "golay" in platform.node()
48
  or "okteto" in platform.node()
49
  or Path("/kaggle").exists()
50
- or psutil.cpu_count(logical=False) < 4
 
51
  )
52
 
53
  if _:
@@ -116,7 +117,7 @@ except Exception as exc_:
116
  LLM = AutoModelForCausalLM.from_pretrained(
117
  model_loc,
118
  model_type="llama",
119
- threads=cpu_count,
120
  )
121
 
122
  logger.info(f"done load llm {model_loc=} {file_size=}G")
@@ -145,7 +146,7 @@ class GenerationConfig:
145
  seed: int = 42
146
  reset: bool = False
147
  stream: bool = True
148
- threads: int = cpu_count
149
  # stop: list[str] = field(default_factory=lambda: [stop_string])
150
 
151
 
@@ -237,7 +238,7 @@ def predict_api(prompt):
237
  seed=42,
238
  reset=True, # reset history (cache)
239
  stream=False,
240
- threads=cpu_count,
241
  # stop=prompt_prefix[1:2],
242
  )
243
 
@@ -392,18 +393,18 @@ with gr.Blocks(
392
  fn=user,
393
  inputs=[msg, chatbot],
394
  outputs=[msg, chatbot],
395
- queue=False,
396
  show_progress="full",
397
- api_name=False,
398
  ).then(bot, chatbot, chatbot, queue=False)
399
  submit.click(
400
  fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
401
  inputs=[msg, chatbot],
402
  outputs=[msg, chatbot],
403
- # queue=True,
404
- queue=False,
405
  show_progress="full",
406
- api_name=False,
407
  ).then(bot, chatbot, chatbot, queue=False)
408
 
409
  clear.click(lambda: None, None, chatbot, queue=False)
@@ -429,13 +430,16 @@ with gr.Blocks(
429
  # CPU UPGRADE cpu_count=8 32G, model 7G
430
 
431
  # does not work
 
432
  # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
433
  # concurrency_count = max(_, 1)
434
-
435
- if psutil.cpu_count(logical=False) > 8:
436
- concurrency_count = max(int(32 / file_size) - 1, 1)
437
  else:
438
- concurrency_count = max(int(16 / file_size) - 1, 1)
 
 
 
439
  logger.info(f"{concurrency_count=}")
440
 
441
- block.queue(concurrency_count=1, max_size=5).launch(debug=True)
 
47
  "golay" in platform.node()
48
  or "okteto" in platform.node()
49
  or Path("/kaggle").exists()
50
+ # or psutil.cpu_count(logical=False) < 4
51
+ or 1 # run 7b in hf
52
  )
53
 
54
  if _:
 
117
  LLM = AutoModelForCausalLM.from_pretrained(
118
  model_loc,
119
  model_type="llama",
120
+ # threads=cpu_count,
121
  )
122
 
123
  logger.info(f"done load llm {model_loc=} {file_size=}G")
 
146
  seed: int = 42
147
  reset: bool = False
148
  stream: bool = True
149
+ # threads: int = cpu_count
150
  # stop: list[str] = field(default_factory=lambda: [stop_string])
151
 
152
 
 
238
  seed=42,
239
  reset=True, # reset history (cache)
240
  stream=False,
241
+ # threads=cpu_count,
242
  # stop=prompt_prefix[1:2],
243
  )
244
 
 
393
  fn=user,
394
  inputs=[msg, chatbot],
395
  outputs=[msg, chatbot],
396
+ queue=True,
397
  show_progress="full",
398
+ api_name=None,
399
  ).then(bot, chatbot, chatbot, queue=False)
400
  submit.click(
401
  fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
402
  inputs=[msg, chatbot],
403
  outputs=[msg, chatbot],
404
+ queue=True,
405
+ # queue=False,
406
  show_progress="full",
407
+ api_name=None,
408
  ).then(bot, chatbot, chatbot, queue=False)
409
 
410
  clear.click(lambda: None, None, chatbot, queue=False)
 
430
  # CPU UPGRADE cpu_count=8 32G, model 7G
431
 
432
  # does not work
433
+ _ = """
434
  # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
435
  # concurrency_count = max(_, 1)
436
+ if psutil.cpu_count(logical=False) >= 8:
437
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
 
438
  else:
439
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
440
+ # """
441
+
442
+ concurrency_count = 1
443
  logger.info(f"{concurrency_count=}")
444
 
445
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)