Crystalcareai commited on
Commit
16797b7
1 Parent(s): 44add4f

Create eval.sh

Browse files
Files changed (1) hide show
  1. eval.sh +38 -0
eval.sh ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install required package
2
+ pip install antlr4-python3-runtime==4.11 immutabledict langdetect
3
+
4
+ python -c "import nltk; nltk.download('punkt')"
5
+
6
+ MODEL_PATHS=(
7
+ meta-llama/Meta-Llama-3.1-8B-Instruct
8
+ )
9
+
10
+ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
11
+ MODEL_NAME=$(basename "$MODEL_PATH")
12
+ MODEL_DIR="./results/$MODEL_NAME"
13
+ mkdir -p "$MODEL_DIR"
14
+
15
+ MODEL_ARGS="trust_remote_code=True,pretrained=$MODEL_PATH,dtype=bfloat16"
16
+
17
+ BASE_COMMAND="accelerate launch -m lm_eval --model hf --model_args $MODEL_ARGS --batch_size 4 --fewshot_as_multiturn --apply_chat_template"
18
+
19
+ # IFEval
20
+ $BASE_COMMAND --tasks leaderboard_ifeval --fewshot_as_multiturn --output_path "$MODEL_DIR/ifeval"
21
+
22
+ # BBH (Big-Bench Hard)
23
+ $BASE_COMMAND --tasks leaderboard_bbh --num_fewshot 3 --fewshot_as_multiturn --output_path "$MODEL_DIR/bbh"
24
+
25
+ # GPQA
26
+ $BASE_COMMAND --tasks leaderboard_gpqa --fewshot_as_multiturn --output_path "$MODEL_DIR/gpqa"
27
+
28
+ # MMLU-Pro
29
+ $BASE_COMMAND --tasks leaderboard_mmlu_pro --num_fewshot 5 --fewshot_as_multiturn --output_path "$MODEL_DIR/mmlu_pro"
30
+
31
+ # Math Level-5
32
+ $BASE_COMMAND --tasks leaderboard_math_hard --num_fewshot 4 --fewshot_as_multiturn --output_path "$MODEL_DIR/math_hard"
33
+
34
+ # TruthfulQA
35
+ $BASE_COMMAND --tasks truthfulqa_mc2 --fewshot_as_multiturn --output_path "$MODEL_DIR/truthfulqa"
36
+
37
+
38
+ done