tomekkorbak/test94858444
This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-0-50000, the tomekkorbak/detoxify-pile-chunk3-50000-100000, the tomekkorbak/detoxify-pile-chunk3-100000-150000, the tomekkorbak/detoxify-pile-chunk3-150000-200000, the tomekkorbak/detoxify-pile-chunk3-200000-250000, the tomekkorbak/detoxify-pile-chunk3-250000-300000, the tomekkorbak/detoxify-pile-chunk3-300000-350000, the tomekkorbak/detoxify-pile-chunk3-350000-400000, the tomekkorbak/detoxify-pile-chunk3-400000-450000, the tomekkorbak/detoxify-pile-chunk3-450000-500000, the tomekkorbak/detoxify-pile-chunk3-500000-550000, the tomekkorbak/detoxify-pile-chunk3-550000-600000, the tomekkorbak/detoxify-pile-chunk3-600000-650000, the tomekkorbak/detoxify-pile-chunk3-650000-700000, the tomekkorbak/detoxify-pile-chunk3-700000-750000, the tomekkorbak/detoxify-pile-chunk3-750000-800000, the tomekkorbak/detoxify-pile-chunk3-800000-850000, the tomekkorbak/detoxify-pile-chunk3-850000-900000, the tomekkorbak/detoxify-pile-chunk3-900000-950000, the tomekkorbak/detoxify-pile-chunk3-950000-1000000, the tomekkorbak/detoxify-pile-chunk3-1000000-1050000, the tomekkorbak/detoxify-pile-chunk3-1050000-1100000, the tomekkorbak/detoxify-pile-chunk3-1100000-1150000, the tomekkorbak/detoxify-pile-chunk3-1150000-1200000, the tomekkorbak/detoxify-pile-chunk3-1200000-1250000, the tomekkorbak/detoxify-pile-chunk3-1250000-1300000, the tomekkorbak/detoxify-pile-chunk3-1300000-1350000, the tomekkorbak/detoxify-pile-chunk3-1350000-1400000, the tomekkorbak/detoxify-pile-chunk3-1400000-1450000, the tomekkorbak/detoxify-pile-chunk3-1450000-1500000, the tomekkorbak/detoxify-pile-chunk3-1500000-1550000, the tomekkorbak/detoxify-pile-chunk3-1550000-1600000, the tomekkorbak/detoxify-pile-chunk3-1600000-1650000, the tomekkorbak/detoxify-pile-chunk3-1650000-1700000, the tomekkorbak/detoxify-pile-chunk3-1700000-1750000, the tomekkorbak/detoxify-pile-chunk3-1750000-1800000, the tomekkorbak/detoxify-pile-chunk3-1800000-1850000, the tomekkorbak/detoxify-pile-chunk3-1850000-1900000 and the tomekkorbak/detoxify-pile-chunk3-1900000-1950000 datasets.
Model description
More information needed
Intended uses & limitations
More information needed
Training and evaluation data
More information needed
Training procedure
Training hyperparameters
The following hyperparameters were used during training:
- learning_rate: 0.1
- train_batch_size: 8
- eval_batch_size: 8
- seed: 42
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
- lr_scheduler_type: linear
- lr_scheduler_warmup_ratio: 0.01
- training_steps: 16
- mixed_precision_training: Native AMP
Framework versions
- Transformers 4.20.1
- Pytorch 1.11.0+cu113
- Datasets 2.5.1
- Tokenizers 0.11.6
Full config
{ "kl_gpt3_callback": { "num_samples": 4096, "max_tokens": 64, "force_call_on": [ 25354 ] }, "dataset": { "is_split_by_sentences": true, "datasets": [ "tomekkorbak/detoxify-pile-chunk3-0-50000", "tomekkorbak/detoxify-pile-chunk3-50000-100000", "tomekkorbak/detoxify-pile-chunk3-100000-150000", "tomekkorbak/detoxify-pile-chunk3-150000-200000", "tomekkorbak/detoxify-pile-chunk3-200000-250000", "tomekkorbak/detoxify-pile-chunk3-250000-300000", "tomekkorbak/detoxify-pile-chunk3-300000-350000", "tomekkorbak/detoxify-pile-chunk3-350000-400000", "tomekkorbak/detoxify-pile-chunk3-400000-450000", "tomekkorbak/detoxify-pile-chunk3-450000-500000", "tomekkorbak/detoxify-pile-chunk3-500000-550000", "tomekkorbak/detoxify-pile-chunk3-550000-600000", "tomekkorbak/detoxify-pile-chunk3-600000-650000", "tomekkorbak/detoxify-pile-chunk3-650000-700000", "tomekkorbak/detoxify-pile-chunk3-700000-750000", "tomekkorbak/detoxify-pile-chunk3-750000-800000", "tomekkorbak/detoxify-pile-chunk3-800000-850000", "tomekkorbak/detoxify-pile-chunk3-850000-900000", "tomekkorbak/detoxify-pile-chunk3-900000-950000", "tomekkorbak/detoxify-pile-chunk3-950000-1000000", "tomekkorbak/detoxify-pile-chunk3-1000000-1050000", "tomekkorbak/detoxify-pile-chunk3-1050000-1100000", "tomekkorbak/detoxify-pile-chunk3-1100000-1150000", "tomekkorbak/detoxify-pile-chunk3-1150000-1200000", "tomekkorbak/detoxify-pile-chunk3-1200000-1250000", "tomekkorbak/detoxify-pile-chunk3-1250000-1300000", "tomekkorbak/detoxify-pile-chunk3-1300000-1350000", "tomekkorbak/detoxify-pile-chunk3-1350000-1400000", "tomekkorbak/detoxify-pile-chunk3-1400000-1450000", "tomekkorbak/detoxify-pile-chunk3-1450000-1500000", "tomekkorbak/detoxify-pile-chunk3-1500000-1550000", "tomekkorbak/detoxify-pile-chunk3-1550000-1600000", "tomekkorbak/detoxify-pile-chunk3-1600000-1650000", "tomekkorbak/detoxify-pile-chunk3-1650000-1700000", "tomekkorbak/detoxify-pile-chunk3-1700000-1750000", "tomekkorbak/detoxify-pile-chunk3-1750000-1800000", "tomekkorbak/detoxify-pile-chunk3-1800000-1850000", "tomekkorbak/detoxify-pile-chunk3-1850000-1900000", "tomekkorbak/detoxify-pile-chunk3-1900000-1950000" ] }, "training": { "fp16": true, "per_device_train_batch_size": 8, "save_strategy": "steps", "logging_first_step": true, "evaluation_strategy": "no", "push_to_hub": true, "hub_model_id": "tomekkorbak/test94858444", "dataloader_num_workers": 0, "seed": 42, "effective_batch_size": 8, "logging_steps": 1, "warmup_ratio": 0.01, "remove_unused_columns": false, "hub_strategy": "all_checkpoints", "save_steps": 25354, "weight_decay": 0.1, "learning_rate": 0.1, "output_dir": "training_output10434", "num_tokens": 132000 }, "model": { "path_or_name": "gpt2", "from_scratch": true, "gpt2_config_kwargs": { "reorder_and_upcast_attn": true, "scale_attn_by": true } }, "objective": { "name": "MLE" }, "tokenizer": { "path_or_name": "gpt2" }, "generation": { "force_call_on": [ 25354 ], "scorer_config": { "device": "cuda:0" }, "metrics_configs": [ {}, { "n": 1 }, { "n": 2 }, { "n": 5 } ], "scenario_configs": [ { "name": "unconditional", "num_samples": 2048, "generate_kwargs": { "do_sample": true, "max_length": 128, "min_length": 10, "temperature": 0.7, "top_p": 0.9, "top_k": 0 } }, { "name": "challenging_rtp", "prompts_path": "resources/challenging_rtp.jsonl", "num_samples": 2048, "generate_kwargs": { "do_sample": true, "max_length": 128, "min_length": 10, "temperature": 0.7, "top_p": 0.9, "top_k": 0 } } ] } }