aravindhank commited on
Commit
3c15818
·
verified ·
1 Parent(s): 63d7cdb

Upload 3 files

Browse files
Files changed (3) hide show
  1. sarvam1_2b.toml +31 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +0 -0
sarvam1_2b.toml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [model]
2
+ dim = 2048
3
+ n_vocab = 68096
4
+ hf_model_id = "sarvamai/sarvam-1"
5
+ pad_token = "</s>"
6
+
7
+ [tokenizer]
8
+ return_tensors = "pt"
9
+ return_attention_mask = true
10
+ max_length = 256
11
+ padding = "longest"
12
+ truncation = true
13
+ add_special_tokens = false
14
+
15
+ [training]
16
+ output_dir = "output/matryoshka_sarvam1"
17
+ num_train_epochs = 20
18
+ per_device_train_batch_size = 128
19
+ warmup_steps = 256
20
+ evaluation_strategy = "steps"
21
+ eval_steps = 2000
22
+ save_steps = 2000
23
+ fp16 = true
24
+ include_num_input_tokens_seen = false
25
+ learning_rate = 3e-4
26
+ multi_dataset_batch_sampler = "PROPORTIONAL"
27
+ binarizer_ste = "tanh"
28
+
29
+ [matryoshka]
30
+ dims = [1024, 512, 256, 128, 64]
31
+
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff