Upload 3 files

Files changed (3) hide show

sarvam1_2b.toml ADDED Viewed

+[model]
+dim = 2048
+n_vocab = 68096
+hf_model_id = "sarvamai/sarvam-1"
+pad_token = "</s>"
+[tokenizer]
+return_tensors = "pt"
+return_attention_mask = true
+max_length = 256
+padding = "longest"
+truncation = true
+add_special_tokens = false
+[training]
+output_dir = "output/matryoshka_sarvam1"
+num_train_epochs = 20
+per_device_train_batch_size = 128
+warmup_steps = 256
+evaluation_strategy = "steps"
+eval_steps = 2000
+save_steps = 2000
+fp16 = true
+include_num_input_tokens_seen = false
+learning_rate = 3e-4
+multi_dataset_batch_sampler = "PROPORTIONAL"
+binarizer_ste = "tanh"
+[matryoshka]
+dims = [1024, 512, 256, 128, 64]

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff