# Train ## Environment ```bash cd scripts python -m venv venv source venv/bin/activate pip install -U -r requirements.in ``` ## Tokenizer ```bash python -B train_tokenizer.py ``` ## Dataset ```bash python -B prepare_pretrain_dataset.py ``` ```python from litdata import StreamingDataset, StreamingDataLoader, TokensLoader dataset = StreamingDataset( input_dir='../pretrain-data/', item_loader=TokensLoader(block_size=2048 + 1), ) print(len(dataset)) ``` ## Model ### Pretrain ```bash litgpt pretrain --config ./pretrain-model.yaml ``` ```bash litgpt convert_from_litgpt out/pretrain/final/ out/converted_model cp config.json out/pretrain/final/ cp config.json out/converted_model/ ``` ```python import torch from safetensors.torch import save_file state_dict = torch.load('out/converted_model/model.pth', map_location='cpu') save_file(state_dict, 'out/converted_model/model.safetensors') ``` ## Evaluate ```bash litgpt evaluate --tasks 'hellaswag,gsm8k,truthfulqa_mc2,mmlu,winogrande,arc_challenge' --out_dir 'evaluate-quick/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` | Tasks |Version| Filter |n-shot| Metric | |Value | |Stderr| |---------------------------------------|------:|----------------|-----:|-----------|---|-----:|---|-----:| |arc_challenge | 1|none | 0|acc |↑ |0.1962|± |0.0116| | | |none | 0|acc_norm |↑ |0.2304|± |0.0123| |gsm8k | 3|flexible-extract| 5|exact_match|↑ |0.0144|± |0.0033| | | |strict-match | 5|exact_match|↑ |0.0015|± |0.0011| |hellaswag | 1|none | 0|acc |↑ |0.2631|± |0.0044| | | |none | 0|acc_norm |↑ |0.2758|± |0.0045| |mmlu | 2|none | |acc |↑ |0.2473|± |0.0036| | - humanities | 2|none | |acc |↑ |0.2351|± |0.0062| | - formal_logic | 1|none | 0|acc |↑ |0.2857|± |0.0404| | - high_school_european_history | 1|none | 0|acc |↑ |0.2667|± |0.0345| | - high_school_us_history | 1|none | 0|acc |↑ |0.2696|± |0.0311| | - high_school_world_history | 1|none | 0|acc |↑ |0.2110|± |0.0266| | - international_law | 1|none | 0|acc |↑ |0.1653|± |0.0339| | - jurisprudence | 1|none | 0|acc |↑ |0.2870|± |0.0437| | - logical_fallacies | 1|none | 0|acc |↑ |0.2331|± |0.0332| | - moral_disputes | 1|none | 0|acc |↑ |0.2283|± |0.0226| | - moral_scenarios | 1|none | 0|acc |↑ |0.2425|± |0.0143| | - philosophy | 1|none | 0|acc |↑ |0.2186|± |0.0235| | - prehistory | 1|none | 0|acc |↑ |0.2099|± |0.0227| | - professional_law | 1|none | 0|acc |↑ |0.2314|± |0.0108| | - world_religions | 1|none | 0|acc |↑ |0.2632|± |0.0338| | - other | 2|none | |acc |↑ |0.2485|± |0.0078| | - business_ethics | 1|none | 0|acc |↑ |0.2600|± |0.0441| | - clinical_knowledge | 1|none | 0|acc |↑ |0.2528|± |0.0267| | - college_medicine | 1|none | 0|acc |↑ |0.2254|± |0.0319| | - global_facts | 1|none | 0|acc |↑ |0.2700|± |0.0446| | - human_aging | 1|none | 0|acc |↑ |0.2377|± |0.0286| | - management | 1|none | 0|acc |↑ |0.2816|± |0.0445| | - marketing | 1|none | 0|acc |↑ |0.2692|± |0.0291| | - medical_genetics | 1|none | 0|acc |↑ |0.2600|± |0.0441| | - miscellaneous | 1|none | 0|acc |↑ |0.2350|± |0.0152| | - nutrition | 1|none | 0|acc |↑ |0.2549|± |0.0250| | - professional_accounting | 1|none | 0|acc |↑ |0.2801|± |0.0268| | - professional_medicine | 1|none | 0|acc |↑ |0.2610|± |0.0267| | - virology | 1|none | 0|acc |↑ |0.1807|± |0.0300| | - social sciences | 2|none | |acc |↑ |0.2658|± |0.0080| | - econometrics | 1|none | 0|acc |↑ |0.1930|± |0.0371| | - high_school_geography | 1|none | 0|acc |↑ |0.2172|± |0.0294| | - high_school_government_and_politics| 1|none | 0|acc |↑ |0.3212|± |0.0337| | - high_school_macroeconomics | 1|none | 0|acc |↑ |0.2923|± |0.0231| | - high_school_microeconomics | 1|none | 0|acc |↑ |0.3025|± |0.0298| | - high_school_psychology | 1|none | 0|acc |↑ |0.2752|± |0.0191| | - human_sexuality | 1|none | 0|acc |↑ |0.2290|± |0.0369| | - professional_psychology | 1|none | 0|acc |↑ |0.2386|± |0.0172| | - public_relations | 1|none | 0|acc |↑ |0.2636|± |0.0422| | - security_studies | 1|none | 0|acc |↑ |0.3143|± |0.0297| | - sociology | 1|none | 0|acc |↑ |0.2338|± |0.0299| | - us_foreign_policy | 1|none | 0|acc |↑ |0.2600|± |0.0441| | - stem | 2|none | |acc |↑ |0.2464|± |0.0077| | - abstract_algebra | 1|none | 0|acc |↑ |0.2500|± |0.0435| | - anatomy | 1|none | 0|acc |↑ |0.2148|± |0.0355| | - astronomy | 1|none | 0|acc |↑ |0.1908|± |0.0320| | - college_biology | 1|none | 0|acc |↑ |0.2569|± |0.0365| | - college_chemistry | 1|none | 0|acc |↑ |0.2700|± |0.0446| | - college_computer_science | 1|none | 0|acc |↑ |0.3500|± |0.0479| | - college_mathematics | 1|none | 0|acc |↑ |0.2700|± |0.0446| | - college_physics | 1|none | 0|acc |↑ |0.2745|± |0.0444| | - computer_security | 1|none | 0|acc |↑ |0.3000|± |0.0461| | - conceptual_physics | 1|none | 0|acc |↑ |0.2766|± |0.0292| | - electrical_engineering | 1|none | 0|acc |↑ |0.2345|± |0.0353| | - elementary_mathematics | 1|none | 0|acc |↑ |0.2566|± |0.0225| | - high_school_biology | 1|none | 0|acc |↑ |0.2226|± |0.0237| | - high_school_chemistry | 1|none | 0|acc |↑ |0.2217|± |0.0292| | - high_school_computer_science | 1|none | 0|acc |↑ |0.2000|± |0.0402| | - high_school_mathematics | 1|none | 0|acc |↑ |0.2370|± |0.0259| | - high_school_physics | 1|none | 0|acc |↑ |0.2517|± |0.0354| | - high_school_statistics | 1|none | 0|acc |↑ |0.2685|± |0.0302| | - machine_learning | 1|none | 0|acc |↑ |0.1786|± |0.0364| |truthfulqa_mc2 | 2|none | 0|acc |↑ |0.4668|± |0.0161| |winogrande | 1|none | 0|acc |↑ |0.5012|± |0.0141| | Groups |Version|Filter|n-shot|Metric| |Value | |Stderr| |------------------|------:|------|------|------|---|-----:|---|-----:| |mmlu | 2|none | |acc |↑ |0.2473|± |0.0036| | - humanities | 2|none | |acc |↑ |0.2351|± |0.0062| | - other | 2|none | |acc |↑ |0.2485|± |0.0078| | - social sciences| 2|none | |acc |↑ |0.2658|± |0.0080| | - stem | 2|none | |acc |↑ |0.2464|± |0.0077| ```bash litgpt evaluate --tasks 'leaderboard' --out_dir 'evaluate-leaderboard/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` ```bash litgpt evaluate --tasks 'bbh_zeroshot,bbh_fewshot,bbh_cot_fewshot,bbh_cot_zeroshot' --out_dir 'evaluate-bigbenchhard/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` ```bash litgpt evaluate --tasks 'mmlu,mmlu_pro' --out_dir 'evaluate-mmlu/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` ```bash litgpt evaluate --tasks 'arc_challenge,boolq,gpqa,hellaswag,openbookqa,piqa,truthfulqa_mc2,winogrande' --out_dir 'evaluate-reasoning/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` ```bash litgpt evaluate --tasks 'mmlu_multilingual,mgsm' --out_dir 'evaluate-multilinguals/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` ```bash litgpt evaluate --tasks 'gsm8k,mathqa' --out_dir 'evaluate-math/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ``` ```bash litgpt evaluate --tasks 'qasper' --out_dir 'evaluate-long/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/ ```