File size: 2,760 Bytes
2a26d3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
from inference import load_model, load_tokenizer_and_template
from table_bench_eval.run_eval import model_infer_and_save, run_eval, execute_samples_and_save
def main(args):
llm_model = load_model(args.model_path, args.max_model_len, args.gpus_num)
tokenizer = load_tokenizer_and_template(args.model_path, args.template)
inference_output_dir = args.inference_output_dir
base_model_name = args.model_path
generate_args = {
"temperature": args.temperature,
"max_tokens": args.max_new_tokens,
"model_type": args.model_type,
"top_p": 0.95,
"n": 1
}
# inference for output
all_samples = model_infer_and_save(args.eval_dataset_path, llm_model, tokenizer, generate_args, inference_output_dir, base_model_name)
# get execuate results
all_samples = execute_samples_and_save(all_samples, inference_output_dir, base_model_name)
# eval and save results
run_eval(all_samples, inference_output_dir, base_model_name)
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description="Table bench evaluation")
parser.add_argument(
"--gpus_num", type=int, default=1, help="the number of GPUs you want to use."
)
parser.add_argument(
"--temperature", type=float, default=0.01, help="Temperature setting"
)
parser.add_argument(
"--model_path", type=str, help="Path to the model", default="/data4/sft_output/qwen2.5-7b-ins-1012/checkpoint-3200"
)
parser.add_argument(
"--eval_dataset_path",
type=str,
default="table_related_benchmarks/evalset/TableBench",
help="Test Set Path",
)
parser.add_argument(
"--inference_output_dir",
type=str,
default="table_related_benchmarks/evalset/TableBench/eval_results",
help="Max iteration for llm to run each code correction task",
)
parser.add_argument(
"--model_type",
choices=["base_model", "chat_model"],
default="chat_model",
help="Base model or Chat model",
)
parser.add_argument(
"--max_model_len", type=int, default=16384, help="Max model length"
)
parser.add_argument(
"--max_new_tokens",
type=int,
default=2048,
help="Maximum number of output tokens",
)
parser.add_argument(
"--template",
type=str,
choices=[None, "llama3", "baichuan", "chatglm"],
default=None,
help="The template must be specified if not present in the config file",
)
args = parser.parse_args()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
main(args)
# Example
"""
python table_related_benchmarks/run_table_bench_eval.py
""" |