ybelkada commited on Dec 20, 2023

Commit

df9e26e

•

1 Parent(s): 58bab1d

ybelkada/gpt-neo-125m-tagged

Browse files

Files changed (19) hide show

README.md +51 -0
adapter_config.json +26 -0
adapter_model.safetensors +3 -0
emissions.csv +6 -0
merges.txt +0 -0
runs/Dec06_18-05-52_younes-multi-gpu-3/events.out.tfevents.1701885959.younes-multi-gpu-3.30433.0 +3 -0
runs/Dec06_18-07-30_younes-multi-gpu-3/events.out.tfevents.1701886056.younes-multi-gpu-3.2911.0 +3 -0
runs/Dec06_18-08-31_younes-multi-gpu-3/events.out.tfevents.1701886117.younes-multi-gpu-3.6138.0 +3 -0
runs/Dec06_18-09-42_younes-multi-gpu-3/events.out.tfevents.1701886189.younes-multi-gpu-3.10296.0 +3 -0
runs/Dec06_18-12-57_younes-multi-gpu-3/events.out.tfevents.1701886385.younes-multi-gpu-3.19612.0 +3 -0
runs/Dec06_18-14-46_younes-multi-gpu-3/events.out.tfevents.1701886492.younes-multi-gpu-3.25842.0 +3 -0
runs/Dec06_18-18-34_younes-multi-gpu-3/events.out.tfevents.1701886720.younes-multi-gpu-3.4702.0 +3 -0
runs/Dec06_18-24-34_younes-multi-gpu-3/events.out.tfevents.1701887080.younes-multi-gpu-3.22335.0 +3 -0
runs/Dec06_18-25-42_younes-multi-gpu-3/events.out.tfevents.1701887149.younes-multi-gpu-3.26545.0 +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +22 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,51 @@

+---
+license: mit
+library_name: peft
+tags:
+- sft
+- generated_from_trainer
+base_model: EleutherAI/gpt-neo-125m
+model-index:
+- name: out-test
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# out-test
+This model is a fine-tuned version of [EleutherAI/gpt-neo-125m](https://huggingface.co/EleutherAI/gpt-neo-125m) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- training_steps: 2
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.37.0.dev0
+- Pytorch 2.1.2+cu118
+- Datasets 2.14.6
+- Tokenizers 0.15.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "EleutherAI/gpt-neo-125m",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7d51e8db70541b811d5437c903c690b799b8dd82b1fc277f82f34a89fd99327
+size 2365872

emissions.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
+2023-12-06T18:10:28,860cbc76-94dc-4a72-ad15-2dfa8295e351,codecarbon,15.394656896591187,0.00031497472560473524,0.0005561976436601365,USA,USA,Iowa,Y,gcp,us-central1
+2023-12-06T18:15:33,03e655e3-6437-4445-8ad8-185b7cbcf3ba,codecarbon,13.940103769302368,0.00029711615603801583,0.0005246621155536215,USA,USA,Iowa,Y,gcp,us-central1
+2023-12-06T18:19:17,bfb90db7-cede-4fd8-9ca2-a276d2a94c28,codecarbon,16.225735425949097,0.0004134000036186234,0.000730001772238431,USA,USA,Iowa,Y,gcp,us-central1
+2023-12-06T18:25:13,5cb1e488-1124-4778-a06f-61ac5ba27774,codecarbon,14.836968660354614,0.0002921380764671886,0.0005158715812593831,USA,USA,Iowa,Y,gcp,us-central1
+2023-12-06T18:26:28,b65c77cc-a9e4-4909-835d-6cd59b5a53bb,codecarbon,15.781362295150757,0.0003607516027599338,0.0006370326730706937,USA,USA,Iowa,Y,gcp,us-central1

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Dec06_18-05-52_younes-multi-gpu-3/events.out.tfevents.1701885959.younes-multi-gpu-3.30433.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab7fca65e689f9cbca41283151bee1067ec03624c2b790a02fd22f4989466ca4
+size 4136

runs/Dec06_18-07-30_younes-multi-gpu-3/events.out.tfevents.1701886056.younes-multi-gpu-3.2911.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25e51ec74b252bec3a39a2d0212fbefb14636d3ae7ac827fa341dc82704aa7e2
+size 5145

runs/Dec06_18-08-31_younes-multi-gpu-3/events.out.tfevents.1701886117.younes-multi-gpu-3.6138.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bde57e37146319dfa93f80dfc86ec80eb02e893efe720e7eb639167b59f75f9
+size 4136

runs/Dec06_18-09-42_younes-multi-gpu-3/events.out.tfevents.1701886189.younes-multi-gpu-3.10296.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:112f35857ab60745b924362db0bfe0ad749987874ecc94a00f1fdf49e705ee8a
+size 6025

runs/Dec06_18-12-57_younes-multi-gpu-3/events.out.tfevents.1701886385.younes-multi-gpu-3.19612.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a250e1d2730417012d31b791de637764b23192ccbd09241718995a5787043f2
+size 5677

runs/Dec06_18-14-46_younes-multi-gpu-3/events.out.tfevents.1701886492.younes-multi-gpu-3.25842.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46aecae219a8f6086b6089b7990af89a1726810137e14af620dbe82af80bd141
+size 6025

runs/Dec06_18-18-34_younes-multi-gpu-3/events.out.tfevents.1701886720.younes-multi-gpu-3.4702.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d55c950d785202c15f1684fbaf7a920fe486a4074251aaa4596447d12e2b96a3
+size 6025

runs/Dec06_18-24-34_younes-multi-gpu-3/events.out.tfevents.1701887080.younes-multi-gpu-3.22335.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00530a1d8a43cda914a857c39b7266d4c14cdf982bbb2e05b73af55661526606
+size 6025

runs/Dec06_18-25-42_younes-multi-gpu-3/events.out.tfevents.1701887149.younes-multi-gpu-3.26545.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de59f91230ef1c1bab9660cf70ae0000eca5b8006f730af815a55da98f95ea52
+size 6025

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 2048,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bbeecbe1ed90f57edb342c5fc76b14902d890367e06e8e9b2cb7e49e078780e
+size 4728

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff