neo-nlp-dev
commited on
Commit
•
c2502f1
1
Parent(s):
4219db6
updating config for aux loss coefficient
Browse files
configuration_lola_gpt2.py
CHANGED
@@ -47,6 +47,7 @@ class LOLAConfig(PretrainedConfig):
|
|
47 |
reorder_and_upcast_attn=False,
|
48 |
num_experts=16,
|
49 |
topk=1,
|
|
|
50 |
**kwargs,
|
51 |
):
|
52 |
self.vocab_size = vocab_size
|
@@ -75,6 +76,7 @@ class LOLAConfig(PretrainedConfig):
|
|
75 |
|
76 |
self.bos_token_id = bos_token_id
|
77 |
self.eos_token_id = eos_token_id
|
|
|
78 |
|
79 |
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
80 |
|
|
|
47 |
reorder_and_upcast_attn=False,
|
48 |
num_experts=16,
|
49 |
topk=1,
|
50 |
+
router_aux_loss_coef=0.01,
|
51 |
**kwargs,
|
52 |
):
|
53 |
self.vocab_size = vocab_size
|
|
|
76 |
|
77 |
self.bos_token_id = bos_token_id
|
78 |
self.eos_token_id = eos_token_id
|
79 |
+
self.router_aux_loss_coef = router_aux_loss_coef
|
80 |
|
81 |
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
82 |
|