XeTute commited on 5 days ago

Commit

a520327

verified ·

1 Parent(s): 9ca0e49

Upload 17 files

Browse files

Files changed (18) hide show

.gitattributes +3 -0
all_results.json +9 -0
config.json +39 -0
generation_config.json +6 -0
llamaboard_config.yaml +81 -0
merges.txt +0 -0
model.safetensors +3 -0
running_log.txt +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +24 -0
train_results.json +9 -0
trainer_log.jsonl +3 -0
trainer_state.json +3 -0
training_args.bin +3 -0
training_args.yaml +29 -0
training_loss.png +0 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+running_log.txt filter=lfs diff=lfs merge=lfs -text
+trainer_log.jsonl filter=lfs diff=lfs merge=lfs -text
+trainer_state.json filter=lfs diff=lfs merge=lfs -text

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 4.0,
+    "num_input_tokens_seen": 209502272,
+    "total_flos": 1.06916551483392e+17,
+    "train_loss": 1.2849165250584973,
+    "train_runtime": 16129.2775,
+    "train_samples_per_second": 12.783,
+    "train_steps_per_second": 12.783
+}

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "XeTute/Phantasor_V0.1-137M",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.2",
+  "use_cache": false,
+  "vocab_size": 50257
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.48.2"
+}

llamaboard_config.yaml ADDED Viewed

	@@ -0,0 +1,81 @@

+top.booster: auto
+top.checkpoint_path: null
+top.finetuning_type: full
+top.model_name: GPT-2-Small
+top.quantization_bit: none
+top.quantization_method: bitsandbytes
+top.rope_scaling: none
+top.template: default
+train.additional_target: ''
+train.apollo_rank: 16
+train.apollo_scale: 32
+train.apollo_target: all
+train.apollo_update_interval: 200
+train.badam_mode: layer
+train.badam_switch_interval: 50
+train.badam_switch_mode: ascending
+train.badam_update_ratio: 0.05
+train.batch_size: 1
+train.compute_type: bf16
+train.create_new_adapter: false
+train.cutoff_len: 1024
+train.dataset:
+- XeTute/Keywords-to-Short-Story
+- MatanP/emotion_mapped_story_dataset
+- Chamoda/atlas-storyteller-1000
+- jaydenccc/AI_Storyteller_Dataset
+- webnovel
+train.dataset_dir: data
+train.ds_offload: false
+train.ds_stage: none
+train.extra_args: '{"optim": "sgd"}'
+train.freeze_extra_modules: ''
+train.freeze_trainable_layers: 2
+train.freeze_trainable_modules: all
+train.galore_rank: 16
+train.galore_scale: 2
+train.galore_target: all
+train.galore_update_interval: 200
+train.gradient_accumulation_steps: 1
+train.learning_rate: 1e-1
+train.logging_steps: 1000
+train.lora_alpha: 16
+train.lora_dropout: 0
+train.lora_rank: 8
+train.lora_target: ''
+train.loraplus_lr_ratio: 0
+train.lr_scheduler_type: cosine
+train.mask_history: false
+train.max_grad_norm: '1.0'
+train.max_samples: '1000000000'
+train.neat_packing: false
+train.neftune_alpha: 0
+train.num_train_epochs: '4.0'
+train.packing: false
+train.ppo_score_norm: false
+train.ppo_whiten_rewards: false
+train.pref_beta: 0.1
+train.pref_ftx: 0
+train.pref_loss: sigmoid
+train.report_to:
+- none
+train.resize_vocab: false
+train.reward_model: []
+train.save_steps: 5000
+train.swanlab_api_key: ''
+train.swanlab_mode: cloud
+train.swanlab_project: llamafactory
+train.swanlab_run_name: ''
+train.swanlab_workspace: ''
+train.train_on_prompt: false
+train.training_stage: Supervised Fine-Tuning
+train.use_apollo: false
+train.use_badam: false
+train.use_dora: false
+train.use_galore: false
+train.use_llama_pro: false
+train.use_pissa: false
+train.use_rslora: false
+train.use_swanlab: false
+train.val_size: 0
+train.warmup_steps: 0

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0474ea40168c95f0a4bde54ec3615cd2804e088e71538326fa229c4812f95e5
+size 497774208

running_log.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81a8658e983f0f6c16c1072c04604643fadd254db6420c01833ea82e0edd2a22
+size 17492598

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ 'System: ' + system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|endoftext|>' + '\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 4.0,
+    "num_input_tokens_seen": 209502272,
+    "total_flos": 1.06916551483392e+17,
+    "train_loss": 1.2849165250584973,
+    "train_runtime": 16129.2775,
+    "train_samples_per_second": 12.783,
+    "train_steps_per_second": 12.783
+}

trainer_log.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01e097f7cd5233a1583547fcde28e47563ed650955bb0f24313682d6872edfdd
+size 51286130

trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:add5f08ebf549b03d6874cfaf11df1b84bb016d6b7e41c1229b7ca11bf2adb2e
+size 45908579

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dfd290ec5ee4ceb52bcae88132d88792a3b931ec2981c35224981e441be6d2c
+size 5688

training_args.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+bf16: true
+cutoff_len: 1024
+dataset: XeTute/Keywords-to-Short-Story,MatanP/emotion_mapped_story_dataset,Chamoda/atlas-storyteller-1000,jaydenccc/AI_Storyteller_Dataset,webnovel
+dataset_dir: data
+ddp_timeout: 180000000
+do_train: true
+finetuning_type: full
+flash_attn: auto
+gradient_accumulation_steps: 1
+include_num_input_tokens_seen: true
+learning_rate: 0.1
+logging_steps: 1000
+lr_scheduler_type: cosine
+max_grad_norm: 1.0
+max_samples: 1000000000
+model_name_or_path: XeTute/Phantasor_V0.1-137M
+num_train_epochs: 4.0
+optim: sgd
+output_dir: saves\GPT-2-Small\full\09-02-2025
+packing: false
+per_device_train_batch_size: 1
+plot_loss: true
+preprocessing_num_workers: 16
+report_to: none
+save_steps: 5000
+stage: sft
+template: default
+trust_remote_code: true
+warmup_steps: 0

training_loss.png ADDED Viewed

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff