finnstrom3693 commited on
Commit
0bf0be0
1 Parent(s): 5875b95

fix calculation warm up step to ratio

Browse files
Files changed (1) hide show
  1. modeling5.py +4 -4
modeling5.py CHANGED
@@ -7,7 +7,7 @@ class MiniSunConfig:
7
  def __init__(self, vocab_size=30522, max_position_embeddings=1024, hidden_size=512,
8
  num_attention_heads=8, intermediate_size=2048, num_hidden_layers=8,
9
  dropout_rate=0.1, weight_decay=0.01, learning_rate=1e-4, total_steps=2500,
10
- warmup_steps=500, restart_period=500):
11
  self.vocab_size = vocab_size
12
  self.max_position_embeddings = max_position_embeddings
13
  self.hidden_size = hidden_size
@@ -18,7 +18,7 @@ class MiniSunConfig:
18
  self.weight_decay = weight_decay
19
  self.learning_rate = learning_rate
20
  self.total_steps = total_steps
21
- self.warmup_steps = warmup_steps
22
  self.restart_period = restart_period
23
 
24
 
@@ -140,7 +140,7 @@ def create_model(config):
140
 
141
  def cosine_annealing_with_warmup(step, config):
142
  """Learning rate schedule with warm-up and cosine annealing."""
143
- warmup_steps = int(config.total_steps * config.warmup_steps)
144
  if step < warmup_steps:
145
  return config.learning_rate * (step / warmup_steps)
146
  else:
@@ -150,7 +150,7 @@ def cosine_annealing_with_warmup(step, config):
150
 
151
  def cosine_annealing_with_restarts(step, config):
152
  """Learning rate schedule with warm-up and cosine annealing with restarts."""
153
- warmup_steps = int(config.total_steps * config.warmup_steps)
154
 
155
  current_cycle = step // config.restart_period
156
  effective_step = step % config.restart_period
 
7
  def __init__(self, vocab_size=30522, max_position_embeddings=1024, hidden_size=512,
8
  num_attention_heads=8, intermediate_size=2048, num_hidden_layers=8,
9
  dropout_rate=0.1, weight_decay=0.01, learning_rate=1e-4, total_steps=2500,
10
+ warmup_ratio=0.5, restart_period=500):
11
  self.vocab_size = vocab_size
12
  self.max_position_embeddings = max_position_embeddings
13
  self.hidden_size = hidden_size
 
18
  self.weight_decay = weight_decay
19
  self.learning_rate = learning_rate
20
  self.total_steps = total_steps
21
+ self.warmup_ratio = warmup_ratio
22
  self.restart_period = restart_period
23
 
24
 
 
140
 
141
  def cosine_annealing_with_warmup(step, config):
142
  """Learning rate schedule with warm-up and cosine annealing."""
143
+ warmup_steps = int(config.total_steps * config.warmup_ratio)
144
  if step < warmup_steps:
145
  return config.learning_rate * (step / warmup_steps)
146
  else:
 
150
 
151
  def cosine_annealing_with_restarts(step, config):
152
  """Learning rate schedule with warm-up and cosine annealing with restarts."""
153
+ warmup_steps = int(config.total_steps * config.warmup_ratio)
154
 
155
  current_cycle = step // config.restart_period
156
  effective_step = step % config.restart_period