Elron commited on
Commit
f8e8a10
·
1 Parent(s): 42da9ae

Upload split_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. split_utils.py +4 -1
split_utils.py CHANGED
@@ -30,7 +30,7 @@ def parse_random_mix_string(input_str):
30
  """
31
 
32
  if not re.fullmatch(r"(([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)\+)*([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)", input_str):
33
- raise ValueError("Invalid input format")
34
 
35
  pattern = re.compile(r"([a-zA-Z]+)(\[\d*\.?\d*%?\])?")
36
  matches = pattern.findall(input_str)
@@ -227,6 +227,9 @@ def random_mix_generator(new_stream_name, new_stream_sources, stream_routing, in
227
  for old_stream_name in new_stream_sources:
228
  optinal_streams, weights = stream_routing[old_stream_name]
229
  with nested_seed(old_stream_name) as rand:
 
 
 
230
  for item in input_streams[old_stream_name]:
231
  choice = rand.choices(optinal_streams, weights=weights, k=1)[0]
232
  if choice == new_stream_name:
 
30
  """
31
 
32
  if not re.fullmatch(r"(([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)\+)*([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)", input_str):
33
+ raise ValueError(f"Invalid input format for split '{input_str}'")
34
 
35
  pattern = re.compile(r"([a-zA-Z]+)(\[\d*\.?\d*%?\])?")
36
  matches = pattern.findall(input_str)
 
227
  for old_stream_name in new_stream_sources:
228
  optinal_streams, weights = stream_routing[old_stream_name]
229
  with nested_seed(old_stream_name) as rand:
230
+ assert (
231
+ old_stream_name in input_streams
232
+ ), f"'{old_stream_name}' split not found. Possibles options: {input_streams.keys()}"
233
  for item in input_streams[old_stream_name]:
234
  choice = rand.choices(optinal_streams, weights=weights, k=1)[0]
235
  if choice == new_stream_name: