{ "ESM": false, "MI_loss": false, "MOE": false, "_name_or_path": "allenai/scibert_scivocab_uncased", "a_col": "a", "add_during_eval": false, "architectures": [ "BertForSentenceSimilarity" ], "attention_probs_dropout_prob": 0.0, "b_col": "b", "classifier_dropout": null, "contact_head": false, "contrastive_loss": "clip", "data_paths": [ "lhallee/abstract_domain_parasitic" ], "domains": [ "[PAR]" ], "hidden_act": "gelu", "hidden_dropout_prob": 0.05, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "label_col": "label", "layer_norm_eps": 1e-12, "limits": false, "log_path": "./results.txt", "max_length": 512, "max_position_embeddings": 512, "model_path": "allenai/scibert_scivocab_uncased", "model_type": "bert", "moe_type": "topk", "new_special_tokens": false, "num_attention_heads": 12, "num_experts": 8, "num_hidden_layers": 12, "num_tasks": 5, "pad_token_id": 0, "patience": 3, "position_embedding_type": "absolute", "project_name": "SciMOE", "token_moe": false, "topk": 2, "torch_dtype": "float32", "transformers_version": "4.38.2", "type_vocab_size": 2, "use_cache": true, "vocab_size": 31090, "wBAL": 0.1, "wMI": 0.1, "wandb": true, "wandb_name": "parasite_mixed_full", "wandb_project": "SSPR", "weight_path": "parasitic_single.pt" }