File size: 1,074 Bytes
2a26d3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
export CUDA_VISIBLE_DEVICES=0

MODEL_PATHS=("/data4/sft_output/qwen2.5-7b-ins-1012/checkpoint-3200") # 指定需要运行测评的权重

for MODEL_PATH in "${MODEL_PATHS[@]}"
do
    echo "Running scripts for model at ${MODEL_PATH}"

    # table_instruct
    python table_related_benchmarks/run_table_instruct_eval.py --model-path ${MODEL_PATH} 
    wait

    # table_bench
    python table_related_benchmarks/run_table_bench_eval.py --model_path ${MODEL_PATH}
    wait

    # nl2sql
    python table_related_benchmarks/run_text2sql_eval.py --model_path ${MODEL_PATH} --is_use_knowledge 
    wait 

    # MBPP
    python general_benchmarks/MBPP/eval_instruct_vllm.py --model_path ${MODEL_PATH}
    wait

    # human-eval
    python general_benchmarks/HumanEval/eval_instruct_vllm.py --model_path ${MODEL_PATH}
    wait

    # cmmlu
    python general_benchmarks/MMLU/evaluator.py --task cmmlu --lang zh --model_path ${MODEL_PATH}

    # mmlu
    python general_benchmarks/MMLU/evaluator.py --task mmlu --lang en --model_path ${MODEL_PATH}
done

echo "All models processed."