tangledgroup
/

tangled-llama-v-128k-base-v0.1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on Oct 6, 2024

Commit

2af09d8

·

1 Parent(s): 951a420

config.json

Files changed (2) hide show

config.json +22 -0
scripts/pretrain-model.yaml +1 -1

config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "_name_or_path": "tangledgroup/tangled-llama-33m-32k-base-v0.1",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_size": 256,
+  "intermediate_size": 1024,
+  "max_position_embeddings": 38400,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 4,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.44.2",
+  "use_cache": true,
+  "vocab_size": 38400
+}

scripts/pretrain-model.yaml CHANGED Viewed

@@ -78,7 +78,7 @@ train:
   # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
   # max_tokens: 3000000000000
   # max_tokens: 8159107755 # 796399 * 2049 * 5
-  max_tokens: 13054572408 # 796399 * 2049 * 8
   # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
   max_steps:

   # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
   # max_tokens: 3000000000000
   # max_tokens: 8159107755 # 796399 * 2049 * 5
+  max_tokens: 11422750857 # 796399 * 2049 * 7
   # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
   max_steps: