barc0
/

engineer-barc-llama3.2-3b-instruct-fft-transduction-lr1e-5_epoch3

@@ -3,9 +3,18 @@ library_name: transformers
 license: llama3.2
 base_model: meta-llama/Llama-3.2-3B-Instruct
 tags:
 - trl
 - sft
 - generated_from_trainer
 model-index:
 - name: engineer-barc-llama3.2-3b-instruct-fft-transduction-lr1e-5_epoch3
  results: []
@@ -16,7 +25,7 @@ should probably proofread and complete it, then remove this comment. -->
 # engineer-barc-llama3.2-3b-instruct-fft-transduction-lr1e-5_epoch3
-This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.0255

 license: llama3.2
 base_model: meta-llama/Llama-3.2-3B-Instruct
 tags:
+- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
+- trl
+- sft
+- generated_from_trainer
+datasets:
+- barc0/transduction_concept_library
+- barc0/transduction_angmented_100k-gpt4-description-gpt4omini-code_generated_problems
+- barc0/transduction_angmented_100k_gpt4o-mini_generated_problems
+- barc0/transduction_rearc_dataset_400k
 model-index:
 - name: engineer-barc-llama3.2-3b-instruct-fft-transduction-lr1e-5_epoch3
  results: []
 # engineer-barc-llama3.2-3b-instruct-fft-transduction-lr1e-5_epoch3
+This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the barc0/transduction_concept_library, the barc0/transduction_angmented_100k-gpt4-description-gpt4omini-code_generated_problems, the barc0/transduction_angmented_100k_gpt4o-mini_generated_problems and the barc0/transduction_rearc_dataset_400k datasets.
 It achieves the following results on the evaluation set:
 - Loss: 0.0255

all_results.json CHANGED Viewed

@@ -1,5 +1,10 @@
 {
  "epoch": 3.0,
  "total_flos": 2332096332890112.0,
  "train_loss": 0.03801635660235435,
  "train_runtime": 128961.4313,

 {
  "epoch": 3.0,
+ "eval_loss": 0.025541018694639206,
+ "eval_runtime": 787.6358,
+ "eval_samples": 19674,
+ "eval_samples_per_second": 24.979,
+ "eval_steps_per_second": 0.781,
  "total_flos": 2332096332890112.0,
  "train_loss": 0.03801635660235435,
  "train_runtime": 128961.4313,

config.json CHANGED Viewed

@@ -35,6 +35,6 @@
  "tie_word_embeddings": true,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.45.0.dev0",
- "use_cache": false,
  "vocab_size": 128256
 }

  "tie_word_embeddings": true,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.45.0.dev0",
+ "use_cache": true,
  "vocab_size": 128256
 }

eval_results.json ADDED Viewed

+{
+ "epoch": 3.0,
+ "eval_loss": 0.025541018694639206,
+ "eval_runtime": 787.6358,
+ "eval_samples": 19674,
+ "eval_samples_per_second": 24.979,
+ "eval_steps_per_second": 0.781
+}

runs/Oct04_23-40-49_56deac14d784/events.out.tfevents.1728216697.56deac14d784.1950.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:aaabaffc69ed79453ef0c1b1b0670963771e3ed05690d61ccbdd17e420843b6b
+size 359