XeTute commited on
Commit
a520327
·
verified ·
1 Parent(s): 9ca0e49

Upload 17 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ running_log.txt filter=lfs diff=lfs merge=lfs -text
37
+ trainer_log.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ trainer_state.json filter=lfs diff=lfs merge=lfs -text
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "num_input_tokens_seen": 209502272,
4
+ "total_flos": 1.06916551483392e+17,
5
+ "train_loss": 1.2849165250584973,
6
+ "train_runtime": 16129.2775,
7
+ "train_samples_per_second": 12.783,
8
+ "train_steps_per_second": 12.783
9
+ }
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "XeTute/Phantasor_V0.1-137M",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.48.2",
37
+ "use_cache": false,
38
+ "vocab_size": 50257
39
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.48.2"
6
+ }
llamaboard_config.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ top.booster: auto
2
+ top.checkpoint_path: null
3
+ top.finetuning_type: full
4
+ top.model_name: GPT-2-Small
5
+ top.quantization_bit: none
6
+ top.quantization_method: bitsandbytes
7
+ top.rope_scaling: none
8
+ top.template: default
9
+ train.additional_target: ''
10
+ train.apollo_rank: 16
11
+ train.apollo_scale: 32
12
+ train.apollo_target: all
13
+ train.apollo_update_interval: 200
14
+ train.badam_mode: layer
15
+ train.badam_switch_interval: 50
16
+ train.badam_switch_mode: ascending
17
+ train.badam_update_ratio: 0.05
18
+ train.batch_size: 1
19
+ train.compute_type: bf16
20
+ train.create_new_adapter: false
21
+ train.cutoff_len: 1024
22
+ train.dataset:
23
+ - XeTute/Keywords-to-Short-Story
24
+ - MatanP/emotion_mapped_story_dataset
25
+ - Chamoda/atlas-storyteller-1000
26
+ - jaydenccc/AI_Storyteller_Dataset
27
+ - webnovel
28
+ train.dataset_dir: data
29
+ train.ds_offload: false
30
+ train.ds_stage: none
31
+ train.extra_args: '{"optim": "sgd"}'
32
+ train.freeze_extra_modules: ''
33
+ train.freeze_trainable_layers: 2
34
+ train.freeze_trainable_modules: all
35
+ train.galore_rank: 16
36
+ train.galore_scale: 2
37
+ train.galore_target: all
38
+ train.galore_update_interval: 200
39
+ train.gradient_accumulation_steps: 1
40
+ train.learning_rate: 1e-1
41
+ train.logging_steps: 1000
42
+ train.lora_alpha: 16
43
+ train.lora_dropout: 0
44
+ train.lora_rank: 8
45
+ train.lora_target: ''
46
+ train.loraplus_lr_ratio: 0
47
+ train.lr_scheduler_type: cosine
48
+ train.mask_history: false
49
+ train.max_grad_norm: '1.0'
50
+ train.max_samples: '1000000000'
51
+ train.neat_packing: false
52
+ train.neftune_alpha: 0
53
+ train.num_train_epochs: '4.0'
54
+ train.packing: false
55
+ train.ppo_score_norm: false
56
+ train.ppo_whiten_rewards: false
57
+ train.pref_beta: 0.1
58
+ train.pref_ftx: 0
59
+ train.pref_loss: sigmoid
60
+ train.report_to:
61
+ - none
62
+ train.resize_vocab: false
63
+ train.reward_model: []
64
+ train.save_steps: 5000
65
+ train.swanlab_api_key: ''
66
+ train.swanlab_mode: cloud
67
+ train.swanlab_project: llamafactory
68
+ train.swanlab_run_name: ''
69
+ train.swanlab_workspace: ''
70
+ train.train_on_prompt: false
71
+ train.training_stage: Supervised Fine-Tuning
72
+ train.use_apollo: false
73
+ train.use_badam: false
74
+ train.use_dora: false
75
+ train.use_galore: false
76
+ train.use_llama_pro: false
77
+ train.use_pissa: false
78
+ train.use_rslora: false
79
+ train.use_swanlab: false
80
+ train.val_size: 0
81
+ train.warmup_steps: 0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0474ea40168c95f0a4bde54ec3615cd2804e088e71538326fa229c4812f95e5
3
+ size 497774208
running_log.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a8658e983f0f6c16c1072c04604643fadd254db6420c01833ea82e0edd2a22
3
+ size 17492598
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ 'System: ' + system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|endoftext|>' + '\n' }}{% endif %}{% endfor %}",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "extra_special_tokens": {},
18
+ "model_max_length": 1024,
19
+ "pad_token": "<|endoftext|>",
20
+ "padding_side": "right",
21
+ "split_special_tokens": false,
22
+ "tokenizer_class": "GPT2Tokenizer",
23
+ "unk_token": "<|endoftext|>"
24
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "num_input_tokens_seen": 209502272,
4
+ "total_flos": 1.06916551483392e+17,
5
+ "train_loss": 1.2849165250584973,
6
+ "train_runtime": 16129.2775,
7
+ "train_samples_per_second": 12.783,
8
+ "train_steps_per_second": 12.783
9
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e097f7cd5233a1583547fcde28e47563ed650955bb0f24313682d6872edfdd
3
+ size 51286130
trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add5f08ebf549b03d6874cfaf11df1b84bb016d6b7e41c1229b7ca11bf2adb2e
3
+ size 45908579
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfd290ec5ee4ceb52bcae88132d88792a3b931ec2981c35224981e441be6d2c
3
+ size 5688
training_args.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: true
2
+ cutoff_len: 1024
3
+ dataset: XeTute/Keywords-to-Short-Story,MatanP/emotion_mapped_story_dataset,Chamoda/atlas-storyteller-1000,jaydenccc/AI_Storyteller_Dataset,webnovel
4
+ dataset_dir: data
5
+ ddp_timeout: 180000000
6
+ do_train: true
7
+ finetuning_type: full
8
+ flash_attn: auto
9
+ gradient_accumulation_steps: 1
10
+ include_num_input_tokens_seen: true
11
+ learning_rate: 0.1
12
+ logging_steps: 1000
13
+ lr_scheduler_type: cosine
14
+ max_grad_norm: 1.0
15
+ max_samples: 1000000000
16
+ model_name_or_path: XeTute/Phantasor_V0.1-137M
17
+ num_train_epochs: 4.0
18
+ optim: sgd
19
+ output_dir: saves\GPT-2-Small\full\09-02-2025
20
+ packing: false
21
+ per_device_train_batch_size: 1
22
+ plot_loss: true
23
+ preprocessing_num_workers: 16
24
+ report_to: none
25
+ save_steps: 5000
26
+ stage: sft
27
+ template: default
28
+ trust_remote_code: true
29
+ warmup_steps: 0
training_loss.png ADDED
vocab.json ADDED
The diff for this file is too large to render. See raw diff