zhangduo4610 commited on
Commit
ef03add
1 Parent(s): 479543f

Model save

Browse files
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  base_model: facebook/opt-350m
3
  datasets:
4
- - HuggingFaceH4/ultrachat_200k
5
  library_name: peft
6
  license: other
7
  tags:
8
- - alignment-handbook
9
  - trl
10
  - sft
 
11
  - generated_from_trainer
12
  model-index:
13
  - name: opt350
@@ -19,9 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # opt350
21
 
22
- This model is a fine-tuned version of [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on the HuggingFaceH4/ultrachat_200k dataset.
23
- It achieves the following results on the evaluation set:
24
- - Loss: 1.7869
25
 
26
  ## Model description
27
 
@@ -56,9 +54,6 @@ The following hyperparameters were used during training:
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Validation Loss |
60
- |:-------------:|:------:|:----:|:---------------:|
61
- | 1.8289 | 0.9999 | 8068 | 1.7869 |
62
 
63
 
64
  ### Framework versions
 
1
  ---
2
  base_model: facebook/opt-350m
3
  datasets:
4
+ - generator
5
  library_name: peft
6
  license: other
7
  tags:
 
8
  - trl
9
  - sft
10
+ - alignment-handbook
11
  - generated_from_trainer
12
  model-index:
13
  - name: opt350
 
19
 
20
  # opt350
21
 
22
+ This model is a fine-tuned version of [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on the generator dataset.
 
 
23
 
24
  ## Model description
25
 
 
54
 
55
  ### Training results
56
 
 
 
 
57
 
58
 
59
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "down_proj",
24
- "gate_proj",
25
  "k_proj",
26
- "up_proj",
27
  "o_proj",
28
- "q_proj",
29
- "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
  "down_proj",
 
25
  "k_proj",
26
+ "gate_proj",
27
  "o_proj",
28
+ "up_proj",
29
+ "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 98.221,
7
  "eval_steps_per_second": 6.143,
8
  "total_flos": 4248917998829568.0,
9
- "train_loss": 1.8640377591255577,
10
- "train_runtime": 7899.8856,
11
  "train_samples": 207864,
12
- "train_samples_per_second": 16.341,
13
- "train_steps_per_second": 1.021
14
  }
 
6
  "eval_samples_per_second": 98.221,
7
  "eval_steps_per_second": 6.143,
8
  "total_flos": 4248917998829568.0,
9
+ "train_loss": 0.0,
10
+ "train_runtime": 0.013,
11
  "train_samples": 207864,
12
+ "train_samples_per_second": 9898425.558,
13
+ "train_steps_per_second": 618618.052
14
  }
runs/Nov06_12-35-29_gnode001.cluster/events.out.tfevents.1730925336.gnode001.cluster.319001.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ace51b3dce42ebf39b6f376bc63f4eec7936b16f30fbec200f44e9e0eb13b3
3
+ size 5968
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9999380306128772,
3
  "total_flos": 4248917998829568.0,
4
- "train_loss": 1.8640377591255577,
5
- "train_runtime": 7899.8856,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 16.341,
8
- "train_steps_per_second": 1.021
9
  }
 
1
  {
2
  "epoch": 0.9999380306128772,
3
  "total_flos": 4248917998829568.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 0.013,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 9898425.558,
8
+ "train_steps_per_second": 618618.052
9
  }
trainer_state.json CHANGED
@@ -11306,22 +11306,14 @@
11306
  "loss": 1.8289,
11307
  "step": 8065
11308
  },
11309
- {
11310
- "epoch": 0.9999380306128772,
11311
- "eval_loss": 1.7869144678115845,
11312
- "eval_runtime": 146.7975,
11313
- "eval_samples_per_second": 97.27,
11314
- "eval_steps_per_second": 6.083,
11315
- "step": 8068
11316
- },
11317
  {
11318
  "epoch": 0.9999380306128772,
11319
  "step": 8068,
11320
  "total_flos": 4248917998829568.0,
11321
- "train_loss": 1.8640377591255577,
11322
- "train_runtime": 7899.8856,
11323
- "train_samples_per_second": 16.341,
11324
- "train_steps_per_second": 1.021
11325
  }
11326
  ],
11327
  "logging_steps": 5,
 
11306
  "loss": 1.8289,
11307
  "step": 8065
11308
  },
 
 
 
 
 
 
 
 
11309
  {
11310
  "epoch": 0.9999380306128772,
11311
  "step": 8068,
11312
  "total_flos": 4248917998829568.0,
11313
+ "train_loss": 0.0,
11314
+ "train_runtime": 0.013,
11315
+ "train_samples_per_second": 9898425.558,
11316
+ "train_steps_per_second": 618618.052
11317
  }
11318
  ],
11319
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf597ff109e8eb432c6a2edc1b25470f5aa9cb37ee0e041326691c23bae90273
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00d22c401c4097001dc3bef189c4e94267b1e886a53abe7580d57bab88f54a8b
3
  size 7032