aravindhank
commited on
Upload 3 files
Browse files- sarvam1_2b.toml +31 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
sarvam1_2b.toml
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[model]
|
2 |
+
dim = 2048
|
3 |
+
n_vocab = 68096
|
4 |
+
hf_model_id = "sarvamai/sarvam-1"
|
5 |
+
pad_token = "</s>"
|
6 |
+
|
7 |
+
[tokenizer]
|
8 |
+
return_tensors = "pt"
|
9 |
+
return_attention_mask = true
|
10 |
+
max_length = 256
|
11 |
+
padding = "longest"
|
12 |
+
truncation = true
|
13 |
+
add_special_tokens = false
|
14 |
+
|
15 |
+
[training]
|
16 |
+
output_dir = "output/matryoshka_sarvam1"
|
17 |
+
num_train_epochs = 20
|
18 |
+
per_device_train_batch_size = 128
|
19 |
+
warmup_steps = 256
|
20 |
+
evaluation_strategy = "steps"
|
21 |
+
eval_steps = 2000
|
22 |
+
save_steps = 2000
|
23 |
+
fp16 = true
|
24 |
+
include_num_input_tokens_seen = false
|
25 |
+
learning_rate = 3e-4
|
26 |
+
multi_dataset_batch_sampler = "PROPORTIONAL"
|
27 |
+
binarizer_ste = "tanh"
|
28 |
+
|
29 |
+
[matryoshka]
|
30 |
+
dims = [1024, 512, 256, 128, 64]
|
31 |
+
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|