finnstrom3693
commited on
Commit
•
0bf0be0
1
Parent(s):
5875b95
fix calculation warm up step to ratio
Browse files- modeling5.py +4 -4
modeling5.py
CHANGED
@@ -7,7 +7,7 @@ class MiniSunConfig:
|
|
7 |
def __init__(self, vocab_size=30522, max_position_embeddings=1024, hidden_size=512,
|
8 |
num_attention_heads=8, intermediate_size=2048, num_hidden_layers=8,
|
9 |
dropout_rate=0.1, weight_decay=0.01, learning_rate=1e-4, total_steps=2500,
|
10 |
-
|
11 |
self.vocab_size = vocab_size
|
12 |
self.max_position_embeddings = max_position_embeddings
|
13 |
self.hidden_size = hidden_size
|
@@ -18,7 +18,7 @@ class MiniSunConfig:
|
|
18 |
self.weight_decay = weight_decay
|
19 |
self.learning_rate = learning_rate
|
20 |
self.total_steps = total_steps
|
21 |
-
self.
|
22 |
self.restart_period = restart_period
|
23 |
|
24 |
|
@@ -140,7 +140,7 @@ def create_model(config):
|
|
140 |
|
141 |
def cosine_annealing_with_warmup(step, config):
|
142 |
"""Learning rate schedule with warm-up and cosine annealing."""
|
143 |
-
warmup_steps = int(config.total_steps * config.
|
144 |
if step < warmup_steps:
|
145 |
return config.learning_rate * (step / warmup_steps)
|
146 |
else:
|
@@ -150,7 +150,7 @@ def cosine_annealing_with_warmup(step, config):
|
|
150 |
|
151 |
def cosine_annealing_with_restarts(step, config):
|
152 |
"""Learning rate schedule with warm-up and cosine annealing with restarts."""
|
153 |
-
warmup_steps = int(config.total_steps * config.
|
154 |
|
155 |
current_cycle = step // config.restart_period
|
156 |
effective_step = step % config.restart_period
|
|
|
7 |
def __init__(self, vocab_size=30522, max_position_embeddings=1024, hidden_size=512,
|
8 |
num_attention_heads=8, intermediate_size=2048, num_hidden_layers=8,
|
9 |
dropout_rate=0.1, weight_decay=0.01, learning_rate=1e-4, total_steps=2500,
|
10 |
+
warmup_ratio=0.5, restart_period=500):
|
11 |
self.vocab_size = vocab_size
|
12 |
self.max_position_embeddings = max_position_embeddings
|
13 |
self.hidden_size = hidden_size
|
|
|
18 |
self.weight_decay = weight_decay
|
19 |
self.learning_rate = learning_rate
|
20 |
self.total_steps = total_steps
|
21 |
+
self.warmup_ratio = warmup_ratio
|
22 |
self.restart_period = restart_period
|
23 |
|
24 |
|
|
|
140 |
|
141 |
def cosine_annealing_with_warmup(step, config):
|
142 |
"""Learning rate schedule with warm-up and cosine annealing."""
|
143 |
+
warmup_steps = int(config.total_steps * config.warmup_ratio)
|
144 |
if step < warmup_steps:
|
145 |
return config.learning_rate * (step / warmup_steps)
|
146 |
else:
|
|
|
150 |
|
151 |
def cosine_annealing_with_restarts(step, config):
|
152 |
"""Learning rate schedule with warm-up and cosine annealing with restarts."""
|
153 |
+
warmup_steps = int(config.total_steps * config.warmup_ratio)
|
154 |
|
155 |
current_cycle = step // config.restart_period
|
156 |
effective_step = step % config.restart_period
|