ffreemt commited on
Commit
e92ff33
1 Parent(s): c885b09

Adjust queue setup

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -378,4 +378,9 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) a
378
  # demo.queue().launch(share=False, inbrowser=True)
379
  # demo.queue().launch(share=True, inbrowser=True, debug=True)
380
 
381
- demo.queue().launch(debug=True)
 
 
 
 
 
 
378
  # demo.queue().launch(share=False, inbrowser=True)
379
  # demo.queue().launch(share=True, inbrowser=True, debug=True)
380
 
381
+ # concurrency_count > 1 requires more memory, max_size: queue size
382
+ # T4 medium: 30GB, model size: ~6G concurrency_count = 3
383
+ # leave one for api access
384
+ # reduce to 3 if OOM
385
+
386
+ block.queue(concurrency_count=4, max_size=20).launch(debug=True)