File size: 3,082 Bytes
2a26d3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import argparse
from inference import generate_outputs, load_model, load_tokenizer_and_template
from reject_eval.run_eval import (eval_outputs,
format_inputs,
load_json)
def main(args):
temperature = args.temperature
model_path = args.model_path
max_new_tokens = args.max_new_tokens
max_model_len = args.max_model_len
test_path = args.test_path
template = args.template
gpus_num = args.gpus_num
model_type = args.model_type
# 加载 model 和 tokenizer
llm_model = load_model(model_path, max_model_len, gpus_num)
tokenizer = load_tokenizer_and_template(model_path, template)
# 推理参数
generate_args = {
"temperature": temperature,
"max_tokens": max_new_tokens,
"model_type": model_type,
}
# 推理&评估
test_datas = load_json(test_path)
format_message_datas = format_inputs(test_datas)
# 这是第一轮输出, 由于eval-llm指令遵循能力可能比较弱,不会按照指定格式输出,因而在第一轮后增加一轮校正输出
model_outputs_tmp = generate_outputs(
format_message_datas, llm_model, tokenizer, generate_args
)
# save_json("./model_outputs.json", model_outputs_tmp)
eval_outputs(model_outputs_tmp, test_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="eval reject")
parser.add_argument(
"--gpus_num", type=int, default=1, help="the number of GPUs you want to use."
)
parser.add_argument(
"--temperature", type=float, default=0.01, help="Temperature setting"
)
parser.add_argument(
"--model_path", type=str, required=True, help="Path to the model"
)
parser.add_argument(
"--model_type",
choices=["base_model", "chat_model"],
default="chat_model",
help="Base model or Chat model",
)
parser.add_argument(
"--max_new_tokens",
type=int,
default=1024,
help="Maximum number of output new tokens",
)
parser.add_argument(
"--max_model_len", type=int, default=8192, help="Max model length"
)
parser.add_argument(
"--template",
type=str,
choices=[None, "llama3", "baichuan", "chatglm"],
default=None,
help="The template must be specified if not present in the config file",
)
parser.add_argument(
"--test_path",
type=str,
default="table_related_benchmarks/evalset/reject_test/test_query.json",
help="Test File Path",
)
parser.add_argument(
"--save_path",
type=str,
default="output/result_reject.json",
help="LLM output samples save path",
)
args = parser.parse_args()
main(args)
# example /home/dev/weights/CodeQwen1.5-7B-Chat /data0/pretrained-models/checkpoints/qwen2/checkpoint-1200
"""
python table_related_benchmarks/run_reject_eval.py --model_path /data4/sft_output/qwen2-base-0817
"""
|