chansung commited on
Commit
560ca38
·
1 Parent(s): 1df3c74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -190,16 +190,16 @@ with gr.Blocks(css=STYLE) as hf_endpoint:
190
  with gr.Column(elem_classes=["group-border"]):
191
  with gr.Row():
192
  with gr.Column():
193
- gr.Markdown("""## Hugging Face account ID (name)""")
194
  hf_account_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
195
 
196
  with gr.Column():
197
- gr.Markdown("## Hugging Face access token")
198
  hf_token_input = gr.Textbox(show_label=False, type="password", elem_classes=["no-label", "small-big"])
199
 
200
  with gr.Row():
201
  with gr.Column():
202
- gr.Markdown("""## Target model
203
 
204
  Model from the Hugging Face hub""")
205
  repository_selector = gr.Textbox(
@@ -210,7 +210,7 @@ Model from the Hugging Face hub""")
210
  )
211
 
212
  with gr.Column():
213
- gr.Markdown("""## Target model version(branch)
214
 
215
  Branch name of the Model""")
216
  revision_selector = gr.Textbox(
@@ -222,14 +222,14 @@ Branch name of the Model""")
222
 
223
  with gr.Column(elem_classes=["group-border"]):
224
  with gr.Column():
225
- gr.Markdown("""## Endpoint name
226
 
227
  Name for your new endpoint""")
228
  endpoint_name_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
229
 
230
  with gr.Row():
231
  with gr.Column():
232
- gr.Markdown("""## Cloud Provider""")
233
  provider_selector = gr.Dropdown(
234
  choices=providers.keys(),
235
  interactive=True,
@@ -238,7 +238,7 @@ Name for your new endpoint""")
238
  )
239
 
240
  with gr.Column():
241
- gr.Markdown("""## Cloud Region""")
242
  region_selector = gr.Dropdown(
243
  [],
244
  value="",
@@ -249,7 +249,7 @@ Name for your new endpoint""")
249
 
250
  with gr.Row(visible=False):
251
  with gr.Column():
252
- gr.Markdown("## Task")
253
  task_selector = gr.Textbox(
254
  value="Text Generation",
255
  interactive=False,
@@ -258,7 +258,7 @@ Name for your new endpoint""")
258
  )
259
 
260
  with gr.Column():
261
- gr.Markdown("## Framework")
262
  framework_selector = gr.Textbox(
263
  value="PyTorch",
264
  interactive=False,
@@ -267,7 +267,7 @@ Name for your new endpoint""")
267
  )
268
 
269
  with gr.Column():
270
- gr.Markdown("""## Select Compute Instance Type""")
271
  compute_selector = gr.Dropdown(
272
  [],
273
  value="",
@@ -279,7 +279,7 @@ Name for your new endpoint""")
279
  with gr.Row():
280
  with gr.Row(scale=1):
281
  with gr.Column():
282
- gr.Markdown("""## Min Number of Nodes""")
283
  min_node_selector = gr.Number(
284
  value=1,
285
  interactive=True,
@@ -288,7 +288,7 @@ Name for your new endpoint""")
288
  )
289
 
290
  with gr.Column():
291
- gr.Markdown("""## Max Number of Nodes""")
292
  max_node_selector = gr.Number(
293
  value=1,
294
  interactive=True,
@@ -297,7 +297,7 @@ Name for your new endpoint""")
297
  )
298
 
299
  with gr.Column(scale=2):
300
- gr.Markdown("""## Security Level""")
301
  security_selector = gr.Radio(
302
  choices=["Protected", "Public", "Private"],
303
  value="Public",
@@ -308,14 +308,14 @@ Name for your new endpoint""")
308
 
309
  with gr.Column(elem_classes=["group-border"]):
310
  with gr.Column():
311
- gr.Markdown("""## Container Type
312
 
313
  Text Generation Inference is an optimized container for text generation task""")
314
  _ = gr.Textbox("Text Generation Inference", show_label=False, elem_classes=["no-label", "small-big"])
315
 
316
  with gr.Row():
317
  with gr.Column():
318
- gr.Markdown("""## Custom Cuda Kernels
319
 
320
  TGI uses custom kernels to speed up inference for some models. You can try disabling them if you encounter issues.""")
321
  _ = gr.Dropdown(
@@ -327,7 +327,7 @@ TGI uses custom kernels to speed up inference for some models. You can try disab
327
  )
328
 
329
  with gr.Column():
330
- gr.Markdown("""## Quantization
331
 
332
  Quantization can reduce the model size and improve latency, with little degradation in model accuracy.""")
333
  _ = gr.Dropdown(
@@ -340,7 +340,7 @@ Quantization can reduce the model size and improve latency, with little degradat
340
 
341
  with gr.Row():
342
  with gr.Column():
343
- gr.Markdown("""## Max Input Length (per Query)
344
 
345
  Increasing this value can impact the amount of RAM required. Some models can only handle a finite range of sequences.""")
346
  _ = gr.Number(
@@ -351,7 +351,7 @@ Increasing this value can impact the amount of RAM required. Some models can onl
351
  )
352
 
353
  with gr.Column():
354
- gr.Markdown("""## Max Number of Tokens (per Query)
355
 
356
  The larger this value, the more memory each request will consume and the less effective batching can be.""")
357
  _ = gr.Number(
@@ -363,7 +363,7 @@ The larger this value, the more memory each request will consume and the less ef
363
 
364
  with gr.Row():
365
  with gr.Column():
366
- gr.Markdown("""## Max Batch Prefill Tokens
367
 
368
  Number of prefill tokens used during continuous batching. It can be useful to adjust this number since the prefill operation is memory-intensive and compute-bound.""")
369
  _ = gr.Number(
@@ -374,7 +374,7 @@ Number of prefill tokens used during continuous batching. It can be useful to ad
374
  )
375
 
376
  with gr.Column():
377
- gr.Markdown("""## Max Batch Total Tokens
378
 
379
  Number of tokens that can be passed before forcing waiting queries to be put on the batch. A value of 1000 can fit 10 queries of 100 tokens or a single query of 1000 tokens.""")
380
  _ = gr.Number(
 
190
  with gr.Column(elem_classes=["group-border"]):
191
  with gr.Row():
192
  with gr.Column():
193
+ gr.Markdown("""### Hugging Face account ID (name)""")
194
  hf_account_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
195
 
196
  with gr.Column():
197
+ gr.Markdown("### Hugging Face access token")
198
  hf_token_input = gr.Textbox(show_label=False, type="password", elem_classes=["no-label", "small-big"])
199
 
200
  with gr.Row():
201
  with gr.Column():
202
+ gr.Markdown("""### Target model
203
 
204
  Model from the Hugging Face hub""")
205
  repository_selector = gr.Textbox(
 
210
  )
211
 
212
  with gr.Column():
213
+ gr.Markdown("""### Target model version(branch)
214
 
215
  Branch name of the Model""")
216
  revision_selector = gr.Textbox(
 
222
 
223
  with gr.Column(elem_classes=["group-border"]):
224
  with gr.Column():
225
+ gr.Markdown("""### Endpoint name
226
 
227
  Name for your new endpoint""")
228
  endpoint_name_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
229
 
230
  with gr.Row():
231
  with gr.Column():
232
+ gr.Markdown("""### Cloud Provider""")
233
  provider_selector = gr.Dropdown(
234
  choices=providers.keys(),
235
  interactive=True,
 
238
  )
239
 
240
  with gr.Column():
241
+ gr.Markdown("""### Cloud Region""")
242
  region_selector = gr.Dropdown(
243
  [],
244
  value="",
 
249
 
250
  with gr.Row(visible=False):
251
  with gr.Column():
252
+ gr.Markdown("### Task")
253
  task_selector = gr.Textbox(
254
  value="Text Generation",
255
  interactive=False,
 
258
  )
259
 
260
  with gr.Column():
261
+ gr.Markdown("### Framework")
262
  framework_selector = gr.Textbox(
263
  value="PyTorch",
264
  interactive=False,
 
267
  )
268
 
269
  with gr.Column():
270
+ gr.Markdown("""### Compute Instance Type""")
271
  compute_selector = gr.Dropdown(
272
  [],
273
  value="",
 
279
  with gr.Row():
280
  with gr.Row(scale=1):
281
  with gr.Column():
282
+ gr.Markdown("""### Min Number of Nodes""")
283
  min_node_selector = gr.Number(
284
  value=1,
285
  interactive=True,
 
288
  )
289
 
290
  with gr.Column():
291
+ gr.Markdown("""### Max Number of Nodes""")
292
  max_node_selector = gr.Number(
293
  value=1,
294
  interactive=True,
 
297
  )
298
 
299
  with gr.Column(scale=2):
300
+ gr.Markdown("""### Security Level""")
301
  security_selector = gr.Radio(
302
  choices=["Protected", "Public", "Private"],
303
  value="Public",
 
308
 
309
  with gr.Column(elem_classes=["group-border"]):
310
  with gr.Column():
311
+ gr.Markdown("""### Container Type
312
 
313
  Text Generation Inference is an optimized container for text generation task""")
314
  _ = gr.Textbox("Text Generation Inference", show_label=False, elem_classes=["no-label", "small-big"])
315
 
316
  with gr.Row():
317
  with gr.Column():
318
+ gr.Markdown("""### Custom Cuda Kernels
319
 
320
  TGI uses custom kernels to speed up inference for some models. You can try disabling them if you encounter issues.""")
321
  _ = gr.Dropdown(
 
327
  )
328
 
329
  with gr.Column():
330
+ gr.Markdown("""### Quantization
331
 
332
  Quantization can reduce the model size and improve latency, with little degradation in model accuracy.""")
333
  _ = gr.Dropdown(
 
340
 
341
  with gr.Row():
342
  with gr.Column():
343
+ gr.Markdown("""### Max Input Length (per Query)
344
 
345
  Increasing this value can impact the amount of RAM required. Some models can only handle a finite range of sequences.""")
346
  _ = gr.Number(
 
351
  )
352
 
353
  with gr.Column():
354
+ gr.Markdown("""### Max Number of Tokens (per Query)
355
 
356
  The larger this value, the more memory each request will consume and the less effective batching can be.""")
357
  _ = gr.Number(
 
363
 
364
  with gr.Row():
365
  with gr.Column():
366
+ gr.Markdown("""### Max Batch Prefill Tokens
367
 
368
  Number of prefill tokens used during continuous batching. It can be useful to adjust this number since the prefill operation is memory-intensive and compute-bound.""")
369
  _ = gr.Number(
 
374
  )
375
 
376
  with gr.Column():
377
+ gr.Markdown("""### Max Batch Total Tokens
378
 
379
  Number of tokens that can be passed before forcing waiting queries to be put on the batch. A value of 1000 can fit 10 queries of 100 tokens or a single query of 1000 tokens.""")
380
  _ = gr.Number(