ybelkada/gpt-neo-125m-tagged
Browse files- README.md +51 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- emissions.csv +6 -0
- merges.txt +0 -0
- runs/Dec06_18-05-52_younes-multi-gpu-3/events.out.tfevents.1701885959.younes-multi-gpu-3.30433.0 +3 -0
- runs/Dec06_18-07-30_younes-multi-gpu-3/events.out.tfevents.1701886056.younes-multi-gpu-3.2911.0 +3 -0
- runs/Dec06_18-08-31_younes-multi-gpu-3/events.out.tfevents.1701886117.younes-multi-gpu-3.6138.0 +3 -0
- runs/Dec06_18-09-42_younes-multi-gpu-3/events.out.tfevents.1701886189.younes-multi-gpu-3.10296.0 +3 -0
- runs/Dec06_18-12-57_younes-multi-gpu-3/events.out.tfevents.1701886385.younes-multi-gpu-3.19612.0 +3 -0
- runs/Dec06_18-14-46_younes-multi-gpu-3/events.out.tfevents.1701886492.younes-multi-gpu-3.25842.0 +3 -0
- runs/Dec06_18-18-34_younes-multi-gpu-3/events.out.tfevents.1701886720.younes-multi-gpu-3.4702.0 +3 -0
- runs/Dec06_18-24-34_younes-multi-gpu-3/events.out.tfevents.1701887080.younes-multi-gpu-3.22335.0 +3 -0
- runs/Dec06_18-25-42_younes-multi-gpu-3/events.out.tfevents.1701887149.younes-multi-gpu-3.26545.0 +3 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer_config.json +22 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- sft
|
6 |
+
- generated_from_trainer
|
7 |
+
base_model: EleutherAI/gpt-neo-125m
|
8 |
+
model-index:
|
9 |
+
- name: out-test
|
10 |
+
results: []
|
11 |
+
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
# out-test
|
17 |
+
|
18 |
+
This model is a fine-tuned version of [EleutherAI/gpt-neo-125m](https://huggingface.co/EleutherAI/gpt-neo-125m) on an unknown dataset.
|
19 |
+
|
20 |
+
## Model description
|
21 |
+
|
22 |
+
More information needed
|
23 |
+
|
24 |
+
## Intended uses & limitations
|
25 |
+
|
26 |
+
More information needed
|
27 |
+
|
28 |
+
## Training and evaluation data
|
29 |
+
|
30 |
+
More information needed
|
31 |
+
|
32 |
+
## Training procedure
|
33 |
+
|
34 |
+
### Training hyperparameters
|
35 |
+
|
36 |
+
The following hyperparameters were used during training:
|
37 |
+
- learning_rate: 5e-05
|
38 |
+
- train_batch_size: 2
|
39 |
+
- eval_batch_size: 2
|
40 |
+
- seed: 42
|
41 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
42 |
+
- lr_scheduler_type: linear
|
43 |
+
- training_steps: 2
|
44 |
+
|
45 |
+
### Framework versions
|
46 |
+
|
47 |
+
- PEFT 0.7.1
|
48 |
+
- Transformers 4.37.0.dev0
|
49 |
+
- Pytorch 2.1.2+cu118
|
50 |
+
- Datasets 2.14.6
|
51 |
+
- Tokenizers 0.15.0
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "EleutherAI/gpt-neo-125m",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 32,
|
13 |
+
"lora_dropout": 0.05,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 16,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"v_proj",
|
23 |
+
"q_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7d51e8db70541b811d5437c903c690b799b8dd82b1fc277f82f34a89fd99327
|
3 |
+
size 2365872
|
emissions.csv
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
|
2 |
+
2023-12-06T18:10:28,860cbc76-94dc-4a72-ad15-2dfa8295e351,codecarbon,15.394656896591187,0.00031497472560473524,0.0005561976436601365,USA,USA,Iowa,Y,gcp,us-central1
|
3 |
+
2023-12-06T18:15:33,03e655e3-6437-4445-8ad8-185b7cbcf3ba,codecarbon,13.940103769302368,0.00029711615603801583,0.0005246621155536215,USA,USA,Iowa,Y,gcp,us-central1
|
4 |
+
2023-12-06T18:19:17,bfb90db7-cede-4fd8-9ca2-a276d2a94c28,codecarbon,16.225735425949097,0.0004134000036186234,0.000730001772238431,USA,USA,Iowa,Y,gcp,us-central1
|
5 |
+
2023-12-06T18:25:13,5cb1e488-1124-4778-a06f-61ac5ba27774,codecarbon,14.836968660354614,0.0002921380764671886,0.0005158715812593831,USA,USA,Iowa,Y,gcp,us-central1
|
6 |
+
2023-12-06T18:26:28,b65c77cc-a9e4-4909-835d-6cd59b5a53bb,codecarbon,15.781362295150757,0.0003607516027599338,0.0006370326730706937,USA,USA,Iowa,Y,gcp,us-central1
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/Dec06_18-05-52_younes-multi-gpu-3/events.out.tfevents.1701885959.younes-multi-gpu-3.30433.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab7fca65e689f9cbca41283151bee1067ec03624c2b790a02fd22f4989466ca4
|
3 |
+
size 4136
|
runs/Dec06_18-07-30_younes-multi-gpu-3/events.out.tfevents.1701886056.younes-multi-gpu-3.2911.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25e51ec74b252bec3a39a2d0212fbefb14636d3ae7ac827fa341dc82704aa7e2
|
3 |
+
size 5145
|
runs/Dec06_18-08-31_younes-multi-gpu-3/events.out.tfevents.1701886117.younes-multi-gpu-3.6138.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bde57e37146319dfa93f80dfc86ec80eb02e893efe720e7eb639167b59f75f9
|
3 |
+
size 4136
|
runs/Dec06_18-09-42_younes-multi-gpu-3/events.out.tfevents.1701886189.younes-multi-gpu-3.10296.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:112f35857ab60745b924362db0bfe0ad749987874ecc94a00f1fdf49e705ee8a
|
3 |
+
size 6025
|
runs/Dec06_18-12-57_younes-multi-gpu-3/events.out.tfevents.1701886385.younes-multi-gpu-3.19612.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a250e1d2730417012d31b791de637764b23192ccbd09241718995a5787043f2
|
3 |
+
size 5677
|
runs/Dec06_18-14-46_younes-multi-gpu-3/events.out.tfevents.1701886492.younes-multi-gpu-3.25842.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46aecae219a8f6086b6089b7990af89a1726810137e14af620dbe82af80bd141
|
3 |
+
size 6025
|
runs/Dec06_18-18-34_younes-multi-gpu-3/events.out.tfevents.1701886720.younes-multi-gpu-3.4702.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d55c950d785202c15f1684fbaf7a920fe486a4074251aaa4596447d12e2b96a3
|
3 |
+
size 6025
|
runs/Dec06_18-24-34_younes-multi-gpu-3/events.out.tfevents.1701887080.younes-multi-gpu-3.22335.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00530a1d8a43cda914a857c39b7266d4c14cdf982bbb2e05b73af55661526606
|
3 |
+
size 6025
|
runs/Dec06_18-25-42_younes-multi-gpu-3/events.out.tfevents.1701887149.younes-multi-gpu-3.26545.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de59f91230ef1c1bab9660cf70ae0000eca5b8006f730af815a55da98f95ea52
|
3 |
+
size 6025
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": true,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"50256": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
}
|
13 |
+
},
|
14 |
+
"bos_token": "<|endoftext|>",
|
15 |
+
"clean_up_tokenization_spaces": true,
|
16 |
+
"eos_token": "<|endoftext|>",
|
17 |
+
"errors": "replace",
|
18 |
+
"model_max_length": 2048,
|
19 |
+
"pad_token": "<|endoftext|>",
|
20 |
+
"tokenizer_class": "GPT2Tokenizer",
|
21 |
+
"unk_token": "<|endoftext|>"
|
22 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bbeecbe1ed90f57edb342c5fc76b14902d890367e06e8e9b2cb7e49e078780e
|
3 |
+
size 4728
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|