bf16: true cutoff_len: 1024 dataset: XeTute/Keywords-to-Short-Story,MatanP/emotion_mapped_story_dataset,Chamoda/atlas-storyteller-1000,jaydenccc/AI_Storyteller_Dataset,webnovel dataset_dir: data ddp_timeout: 180000000 do_train: true finetuning_type: full flash_attn: auto gradient_accumulation_steps: 1 include_num_input_tokens_seen: true learning_rate: 0.1 logging_steps: 1000 lr_scheduler_type: cosine max_grad_norm: 1.0 max_samples: 1000000000 model_name_or_path: XeTute/Phantasor_V0.1-137M num_train_epochs: 4.0 optim: sgd output_dir: saves\GPT-2-Small\full\09-02-2025 packing: false per_device_train_batch_size: 1 plot_loss: true preprocessing_num_workers: 16 report_to: none save_steps: 5000 stage: sft template: default trust_remote_code: true warmup_steps: 0