Spaces:
Running
Running
Commit
·
a743d61
1
Parent(s):
8a0053d
update submit
Browse files- app.py +42 -2
- src/about.py +62 -43
app.py
CHANGED
@@ -437,8 +437,48 @@ with demo:
|
|
437 |
gr.HTML(TABLE_TEXT)
|
438 |
gr.Markdown(LLM_BENCHMARKS_TEXT2, elem_classes="markdown-text")
|
439 |
with gr.TabItem("📤 Submit here!", elem_id="submit-model-tab", id=2):
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
with gr.Row():
|
443 |
model_name = gr.Textbox(label="Model Name")
|
444 |
revision_commit = gr.Textbox(label="Revision commit")
|
|
|
437 |
gr.HTML(TABLE_TEXT)
|
438 |
gr.Markdown(LLM_BENCHMARKS_TEXT2, elem_classes="markdown-text")
|
439 |
with gr.TabItem("📤 Submit here!", elem_id="submit-model-tab", id=2):
|
440 |
+
with gr.Column():
|
441 |
+
with gr.Row():
|
442 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
443 |
+
|
444 |
+
with gr.Column():
|
445 |
+
with gr.Accordion(
|
446 |
+
f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
447 |
+
open=False,
|
448 |
+
):
|
449 |
+
with gr.Row():
|
450 |
+
finished_eval_table = gr.components.Dataframe(
|
451 |
+
value=finished_eval_queue_df,
|
452 |
+
headers=EVAL_COLS,
|
453 |
+
datatype=EVAL_TYPES,
|
454 |
+
row_count=5,
|
455 |
+
)
|
456 |
+
with gr.Accordion(
|
457 |
+
f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
|
458 |
+
open=False,
|
459 |
+
):
|
460 |
+
with gr.Row():
|
461 |
+
running_eval_table = gr.components.Dataframe(
|
462 |
+
value=running_eval_queue_df,
|
463 |
+
headers=EVAL_COLS,
|
464 |
+
datatype=EVAL_TYPES,
|
465 |
+
row_count=5,
|
466 |
+
)
|
467 |
+
|
468 |
+
with gr.Accordion(
|
469 |
+
f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
470 |
+
open=False,
|
471 |
+
):
|
472 |
+
with gr.Row():
|
473 |
+
pending_eval_table = gr.components.Dataframe(
|
474 |
+
value=pending_eval_queue_df,
|
475 |
+
headers=EVAL_COLS,
|
476 |
+
datatype=EVAL_TYPES,
|
477 |
+
row_count=5,
|
478 |
+
)
|
479 |
+
with gr.Row():
|
480 |
+
# 1. Submit your modelinfos here!
|
481 |
+
gr.Markdown("✨ Submit your modelinfos here!")
|
482 |
with gr.Row():
|
483 |
model_name = gr.Textbox(label="Model Name")
|
484 |
revision_commit = gr.Textbox(label="Revision commit")
|
src/about.py
CHANGED
@@ -360,38 +360,47 @@ Planning
|
|
360 |
"""
|
361 |
|
362 |
EVALUATION_QUEUE_TEXT = """
|
363 |
-
|
364 |
-
Models added here will be automatically evaluated on the FlagEval cluster.
|
365 |
|
366 |
-
|
367 |
-
1. If you choose to evaluate via API call, you need to provide the Model interface, name and corresponding API key.
|
368 |
-
2. If you choose to do open source model evaluation directly through huggingface, you don't need to fill in the Model online api url and Model online api key.
|
369 |
|
370 |
-
|
371 |
|
372 |
-
|
373 |
-
1. model_name: Name of the model to use
|
374 |
-
2. api_key: API access key
|
375 |
-
3. api_base: Base URL for the API service
|
376 |
|
377 |
-
|
|
|
378 |
|
379 |
-
|
380 |
|
381 |
-
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
3. Set up the initialization and inference pipeline
|
387 |
|
388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
-
Here is an example:[model_adapter.py](https://github.com/flageval-baai/FlagEvalMM/blob/main/model_zoo/vlm/qwen_vl/model_adapter.py)
|
391 |
|
392 |
-
#### 1. Create Preprocess Custom Dataset Class
|
|
|
|
|
393 |
|
394 |
-
Inherit from `ServerDataset` to handle data loading:
|
395 |
```python
|
396 |
# model_adapter.py
|
397 |
class CustomDataset(ServerDataset):
|
@@ -411,8 +420,9 @@ class CustomDataset(ServerDataset):
|
|
411 |
return question_id, img_path_idx, qs
|
412 |
```
|
413 |
|
414 |
-
The function `get_data` returns a structure like this:
|
415 |
-
|
|
|
416 |
{
|
417 |
"img_path": "A list where each element is an absolute path to an image that can be read directly using PIL, cv2, etc.",
|
418 |
"question": "A string containing the question, where image positions are marked with <image1> <image2>",
|
@@ -421,11 +431,15 @@ The function `get_data` returns a structure like this:
|
|
421 |
}
|
422 |
```
|
423 |
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
|
|
|
|
|
|
|
|
429 |
```python
|
430 |
# model_adapter.py
|
431 |
class ModelAdapter(BaseModelAdapter):
|
@@ -461,30 +475,35 @@ class ModelAdapter(BaseModelAdapter):
|
|
461 |
Use the provided meta_info and rank parameters to manage result storage as needed.
|
462 |
'''
|
463 |
```
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
|
|
|
|
|
|
|
|
|
|
470 |
```
|
471 |
-
After collecting all results, they are saved using `save_result()`
|
472 |
|
473 |
-
|
474 |
-
run.sh is the entry script for launching model evaluation, used to set environment variables and start the evaluation program.
|
475 |
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
#!/bin/bash
|
478 |
current_file="$0"
|
479 |
current_dir="$(dirname "$current_file")"
|
480 |
SERVER_IP=$1
|
481 |
SERVER_PORT=$2
|
482 |
-
PYTHONPATH=$current_dir:$PYTHONPATH python $current_dir/model_adapter.py
|
483 |
-
--server_ip $SERVER_IP \
|
484 |
-
--server_port $SERVER_PORT \
|
485 |
-
"${@:3}"
|
486 |
```
|
487 |
-
|
488 |
"""
|
489 |
|
490 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
|
|
360 |
"""
|
361 |
|
362 |
EVALUATION_QUEUE_TEXT = """
|
363 |
+
Submit here!功能部署内容
|
|
|
364 |
|
365 |
+
Evaluation Queue for the FlagEval VLM Leaderboard
|
|
|
|
|
366 |
|
367 |
+
Models added here will be automatically evaluated on the FlagEval cluster.
|
368 |
|
369 |
+
Currently, we offer two methods for model evaluation, including API calls and private deployments:
|
|
|
|
|
|
|
370 |
|
371 |
+
1. If you choose to evaluate via API call, you need to provide the Model interface, name and corresponding API key.
|
372 |
+
2. If you choose to do open source model evaluation directly through huggingface, you don't need to fill in the Model online api url and Model online api key.
|
373 |
|
374 |
+
## Open API model Integration Documentation
|
375 |
|
376 |
+
For models accessed via API calls (such as OpenAI API, Anthropic API, etc.), the integration process is straightforward and only requires providing necessary configuration information.
|
377 |
|
378 |
+
1. `model_name`: Name of the model to use
|
379 |
+
2. `api_key`: API access key
|
380 |
+
3. `api_base`: Base URL for the API service
|
|
|
381 |
|
382 |
+
---
|
383 |
+
|
384 |
+
## Adding a Custom Model to the Platform
|
385 |
+
|
386 |
+
This guide explains how to integrate your custom model into the platform by implementing a model adapter and `run.sh` script. We'll use the Qwen-VL implementation as a reference example.
|
387 |
+
|
388 |
+
### Overview
|
389 |
+
|
390 |
+
To add your custom model, you need to:
|
391 |
+
|
392 |
+
1. Create a custom dataset class
|
393 |
+
2. Implement a model adapter class
|
394 |
+
3. Set up the initialization and inference pipeline
|
395 |
+
|
396 |
+
### Step-by-Step Implementation
|
397 |
|
398 |
+
Here is an example: [Qwen-VL model_adapter.py](https://github.com/flageval-baai/FlagEvalMM/blob/main/model_zoo/vlm/qwen_vl/model_adapter.py)
|
399 |
|
400 |
+
#### 1. Create Preprocess Custom Dataset Class
|
401 |
+
|
402 |
+
Inherit from `ServerDataset` to handle data loading:
|
403 |
|
|
|
404 |
```python
|
405 |
# model_adapter.py
|
406 |
class CustomDataset(ServerDataset):
|
|
|
420 |
return question_id, img_path_idx, qs
|
421 |
```
|
422 |
|
423 |
+
The function `get_data` returns a structure like this:
|
424 |
+
|
425 |
+
```json
|
426 |
{
|
427 |
"img_path": "A list where each element is an absolute path to an image that can be read directly using PIL, cv2, etc.",
|
428 |
"question": "A string containing the question, where image positions are marked with <image1> <image2>",
|
|
|
431 |
}
|
432 |
```
|
433 |
|
434 |
+
---
|
435 |
+
|
436 |
+
#### 2. Implement Model Adapter
|
437 |
+
|
438 |
+
Inherit from `BaseModelAdapter` and implement the required methods:
|
439 |
+
|
440 |
+
- `model_init`: is responsible for model initialization and serves as the entry point for model loading and setup.
|
441 |
+
- `run_one_task`: implements the inference pipeline, handling data processing and result generation for a single evaluation task.
|
442 |
+
|
443 |
```python
|
444 |
# model_adapter.py
|
445 |
class ModelAdapter(BaseModelAdapter):
|
|
|
475 |
Use the provided meta_info and rank parameters to manage result storage as needed.
|
476 |
'''
|
477 |
```
|
478 |
+
|
479 |
+
**Note:**
|
480 |
+
|
481 |
+
`results` is a list of dictionaries.
|
482 |
+
Each dictionary must contain two keys:
|
483 |
+
|
484 |
+
```json
|
485 |
+
{
|
486 |
+
"question_id": "identifies the specific question",
|
487 |
+
"answer": "contains the model's prediction/output"
|
488 |
+
}
|
489 |
```
|
|
|
490 |
|
491 |
+
After collecting all results, they are saved using `save_result()`.
|
|
|
492 |
|
493 |
+
---
|
494 |
+
|
495 |
+
#### 3. Launch Script (`run.sh`)
|
496 |
+
|
497 |
+
`run.sh` is the entry script for launching model evaluation, used to set environment variables and start the evaluation program.
|
498 |
+
|
499 |
+
```bash
|
500 |
#!/bin/bash
|
501 |
current_file="$0"
|
502 |
current_dir="$(dirname "$current_file")"
|
503 |
SERVER_IP=$1
|
504 |
SERVER_PORT=$2
|
505 |
+
PYTHONPATH=$current_dir:$PYTHONPATH python $current_dir/model_adapter.py --server_ip $SERVER_IP --server_port $SERVER_PORT "${@:3}"
|
|
|
|
|
|
|
506 |
```
|
|
|
507 |
"""
|
508 |
|
509 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|