Model save

Browse files

Files changed (5) hide show

README.md +1 -1
all_results.json +5 -5
generation_config.json +6 -0
train_results.json +5 -5
trainer_state.json +14 -70

README.md CHANGED Viewed

@@ -26,7 +26,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/eLLM-han2024/Qwen2.5-7B-Open-R1-Distill-Debug/runs/zgzq94od)
 This model was trained with SFT.

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/eLLM-han2024/Qwen2.5-7B-Open-R1-Distill-Debug/runs/kzcifeec)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "total_flos": 4.184202049014989e+16,
-    "train_loss": 1.4971793293952942,
-    "train_runtime": 181.2294,
-    "train_samples_per_second": 3.531,
-    "train_steps_per_second": 0.055
 }

 {
+    "total_flos": 8665747207225344.0,
+    "train_loss": 1.8449461460113525,
+    "train_runtime": 310.7619,
+    "train_samples_per_second": 0.412,
+    "train_steps_per_second": 0.006
 }

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "max_new_tokens": 2048,
+  "transformers_version": "4.49.0"
+}

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "total_flos": 4.184202049014989e+16,
-    "train_loss": 1.4971793293952942,
-    "train_runtime": 181.2294,
-    "train_samples_per_second": 3.531,
-    "train_steps_per_second": 0.055
 }

 {
+    "total_flos": 8665747207225344.0,
+    "train_loss": 1.8449461460113525,
+    "train_runtime": 310.7619,
+    "train_samples_per_second": 0.412,
+    "train_steps_per_second": 0.006
 }

trainer_state.json CHANGED Viewed

@@ -1,95 +1,39 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.006825938566552901,
   "eval_steps": 500,
-  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0006825938566552901,
-      "grad_norm": 28.913602828979492,
       "learning_rate": 1e-05,
       "loss": 1.9212,
       "step": 1
     },
     {
       "epoch": 0.0013651877133105802,
-      "grad_norm": 26.408185958862305,
-      "learning_rate": 9.698463103929542e-06,
       "loss": 1.7687,
       "step": 2
     },
     {
-      "epoch": 0.0020477815699658703,
-      "grad_norm": 12.941144943237305,
-      "learning_rate": 8.83022221559489e-06,
-      "loss": 1.8589,
-      "step": 3
-    },
-    {
-      "epoch": 0.0027303754266211604,
-      "grad_norm": 11.555084228515625,
-      "learning_rate": 7.500000000000001e-06,
-      "loss": 1.7133,
-      "step": 4
-    },
-    {
-      "epoch": 0.0034129692832764505,
-      "grad_norm": 6.055721759796143,
-      "learning_rate": 5.8682408883346535e-06,
-      "loss": 1.3889,
-      "step": 5
-    },
-    {
-      "epoch": 0.004095563139931741,
-      "grad_norm": 4.867063522338867,
-      "learning_rate": 4.131759111665349e-06,
-      "loss": 1.3631,
-      "step": 6
-    },
-    {
-      "epoch": 0.00477815699658703,
-      "grad_norm": 6.896249771118164,
-      "learning_rate": 2.5000000000000015e-06,
-      "loss": 1.1494,
-      "step": 7
-    },
-    {
-      "epoch": 0.005460750853242321,
-      "grad_norm": 3.4688620567321777,
-      "learning_rate": 1.1697777844051105e-06,
-      "loss": 1.1455,
-      "step": 8
-    },
-    {
-      "epoch": 0.0061433447098976105,
-      "grad_norm": 4.858311653137207,
-      "learning_rate": 3.015368960704584e-07,
-      "loss": 1.2792,
-      "step": 9
-    },
-    {
-      "epoch": 0.006825938566552901,
-      "grad_norm": 4.7386555671691895,
-      "learning_rate": 0.0,
-      "loss": 1.3836,
-      "step": 10
-    },
-    {
-      "epoch": 0.006825938566552901,
-      "step": 10,
-      "total_flos": 4.184202049014989e+16,
-      "train_loss": 1.4971793293952942,
-      "train_runtime": 181.2294,
-      "train_samples_per_second": 3.531,
-      "train_steps_per_second": 0.055
     }
   ],
   "logging_steps": 1.0,
-  "max_steps": 10,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
@@ -105,7 +49,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.184202049014989e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0013651877133105802,
   "eval_steps": 500,
+  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0006825938566552901,
+      "grad_norm": 29.01302146911621,
       "learning_rate": 1e-05,
       "loss": 1.9212,
       "step": 1
     },
     {
       "epoch": 0.0013651877133105802,
+      "grad_norm": 26.415725708007812,
+      "learning_rate": 0.0,
       "loss": 1.7687,
       "step": 2
     },
     {
+      "epoch": 0.0013651877133105802,
+      "step": 2,
+      "total_flos": 8665747207225344.0,
+      "train_loss": 1.8449461460113525,
+      "train_runtime": 310.7619,
+      "train_samples_per_second": 0.412,
+      "train_steps_per_second": 0.006
     }
   ],
   "logging_steps": 1.0,
+  "max_steps": 2,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 8665747207225344.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null