Elron commited on
Commit
1f5859d
·
verified ·
1 Parent(s): b41caa8

Upload operators.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. operators.py +54 -20
operators.py CHANGED
@@ -41,6 +41,7 @@ from abc import abstractmethod
41
  from collections import Counter
42
  from copy import deepcopy
43
  from dataclasses import field
 
44
  from itertools import zip_longest
45
  from random import Random
46
  from typing import (
@@ -58,7 +59,7 @@ from typing import (
58
  import requests
59
 
60
  from .artifact import Artifact, fetch_artifact
61
- from .dataclass import NonPositionalField
62
  from .dict_utils import dict_delete, dict_get, dict_set, is_subpath
63
  from .operator import (
64
  MultiStream,
@@ -74,11 +75,14 @@ from .operator import (
74
  StreamInstanceOperator,
75
  )
76
  from .random_utils import new_random_generator
 
77
  from .stream import Stream
78
  from .text_utils import nested_tuple_to_string
79
  from .type_utils import isoftype
80
  from .utils import flatten_dict
81
 
 
 
82
 
83
  class FromIterables(StreamInitializerOperator):
84
  """Creates a MultiStream from a dict of named iterables.
@@ -484,7 +488,7 @@ class AddConstant(FieldOperator):
484
 
485
 
486
  class Augmentor(StreamInstanceOperator):
487
- """A stream that augments the values of either the task input fields before rendering with the template, or the input passed to the model after rendering of the template.
488
 
489
  Args:
490
  augment_model_input: Whether to augment the input to the model.
@@ -564,9 +568,9 @@ class NullAugmentor(Augmentor):
564
 
565
 
566
  class AugmentWhitespace(Augmentor):
567
- """Augments the inputs by replace existing whitespace with other whitespace.
568
 
569
- Currently each whitespace is replaced by a random choice of 1-3 whitespace charaters (spcae, tab, newline).
570
  """
571
 
572
  def process_value(self, value: Any) -> Any:
@@ -1094,7 +1098,7 @@ class ArtifactFetcherMixin:
1094
  return cls.cache[artifact_identifier]
1095
 
1096
 
1097
- class ApplyOperatorsField(StreamInstanceOperator, ArtifactFetcherMixin):
1098
  """Applies value operators to each instance in a stream based on specified fields.
1099
 
1100
  Args:
@@ -1215,30 +1219,60 @@ class FilterByCondition(SingleStreamOperator):
1215
  return True
1216
 
1217
 
1218
- class FilterByQuery(SingleStreamOperator):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1219
  """Filters a stream, yielding only instances which fulfil a condition specified as a string to be python's eval-uated.
1220
 
1221
  Raises an error if a field participating in the specified condition is missing from the instance
1222
 
1223
  Args:
1224
- query (str): a condition over fields of the instance, to be processed by python's eval()
 
1225
  error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
1226
 
1227
  Examples:
1228
- FilterByQuery(query = "a > 4") will yield only instances where "a">4
1229
- FilterByQuery(query = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
1230
- FilterByQuery(query = "a in [4, 8]") will yield only instances where "a" is 4 or 8
1231
- FilterByQuery(query = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
1232
 
1233
  """
1234
 
1235
- query: str
1236
  error_on_filtered_all: bool = True
1237
 
1238
  def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generator:
1239
  yielded = False
1240
  for instance in stream:
1241
- if eval(self.query, None, instance):
1242
  yielded = True
1243
  yield instance
1244
 
@@ -1248,33 +1282,33 @@ class FilterByQuery(SingleStreamOperator):
1248
  )
1249
 
1250
 
1251
- class ExecuteQuery(StreamInstanceOperator):
1252
- """Compute an expression (query), expressed as a string to be eval-uated, over the instance's fields, and store the result in field to_field.
1253
 
1254
  Raises an error if a field mentioned in the query is missing from the instance.
1255
 
1256
  Args:
1257
- query (str): an expression to be evaluated over the fields of the instance
1258
  to_field (str): the field where the result is to be stored into
 
1259
 
1260
  Examples:
1261
  When instance {"a": 2, "b": 3} is process-ed by operator
1262
- ExecuteQuery(query="a+b", to_field = "c")
1263
  the result is {"a": 2, "b": 3, "c": 5}
1264
 
1265
  When instance {"a": "hello", "b": "world"} is process-ed by operator
1266
- ExecuteQuery(query = "a+' '+b", to_field = "c")
1267
  the result is {"a": "hello", "b": "world", "c": "hello world"}
1268
 
1269
  """
1270
 
1271
- query: str
1272
  to_field: str
1273
 
1274
  def process(
1275
  self, instance: Dict[str, Any], stream_name: Optional[str] = None
1276
  ) -> Dict[str, Any]:
1277
- instance[self.to_field] = eval(self.query, None, instance)
1278
  return instance
1279
 
1280
 
 
41
  from collections import Counter
42
  from copy import deepcopy
43
  from dataclasses import field
44
+ from importlib import import_module
45
  from itertools import zip_longest
46
  from random import Random
47
  from typing import (
 
59
  import requests
60
 
61
  from .artifact import Artifact, fetch_artifact
62
+ from .dataclass import NonPositionalField, OptionalField
63
  from .dict_utils import dict_delete, dict_get, dict_set, is_subpath
64
  from .operator import (
65
  MultiStream,
 
75
  StreamInstanceOperator,
76
  )
77
  from .random_utils import new_random_generator
78
+ from .settings_utils import get_settings
79
  from .stream import Stream
80
  from .text_utils import nested_tuple_to_string
81
  from .type_utils import isoftype
82
  from .utils import flatten_dict
83
 
84
+ settings = get_settings()
85
+
86
 
87
  class FromIterables(StreamInitializerOperator):
88
  """Creates a MultiStream from a dict of named iterables.
 
488
 
489
 
490
  class Augmentor(StreamInstanceOperator):
491
+ """A stream operator that augments the values of either the task input fields before rendering with the template, or the input passed to the model after rendering of the template.
492
 
493
  Args:
494
  augment_model_input: Whether to augment the input to the model.
 
568
 
569
 
570
  class AugmentWhitespace(Augmentor):
571
+ """Augments the inputs by replacing existing whitespaces with other whitespaces.
572
 
573
+ Currently, each whitespace is replaced by a random choice of 1-3 whitespace characters (space, tab, newline).
574
  """
575
 
576
  def process_value(self, value: Any) -> Any:
 
1098
  return cls.cache[artifact_identifier]
1099
 
1100
 
1101
+ class ApplyOperatorsField(StreamInstanceOperator):
1102
  """Applies value operators to each instance in a stream based on specified fields.
1103
 
1104
  Args:
 
1219
  return True
1220
 
1221
 
1222
+ class ComputeExpressionMixin(Artifact):
1223
+ """Computes an expression expressed over fields of an instance.
1224
+
1225
+ Args:
1226
+ expression (str): the expression, in terms of names of fields of an instance
1227
+ imports_list (List[str]): list of names of imports needed for the evaluation of the expression
1228
+ """
1229
+
1230
+ expression: str
1231
+ imports_list: List[str] = OptionalField(default_factory=list)
1232
+
1233
+ def prepare(self):
1234
+ # can not do the imports here, because object does not pickle with imports
1235
+ self.globs = {}
1236
+ self.to_import = True
1237
+
1238
+ def compute_expression(self, instance: dict) -> Any:
1239
+ if self.to_import:
1240
+ for module_name in self.imports_list:
1241
+ self.globs[module_name] = import_module(module_name)
1242
+ self.to_import = False
1243
+
1244
+ if settings.allow_unverified_code:
1245
+ return eval(self.expression, self.globs, instance)
1246
+
1247
+ raise ValueError(
1248
+ f"Cannot run expression by {self} when unitxt.settings.allow_unverified_code=False either set it to True or set {settings.allow_unverified_code_key} environment variable."
1249
+ )
1250
+
1251
+
1252
+ class FilterByExpression(SingleStreamOperator, ComputeExpressionMixin):
1253
  """Filters a stream, yielding only instances which fulfil a condition specified as a string to be python's eval-uated.
1254
 
1255
  Raises an error if a field participating in the specified condition is missing from the instance
1256
 
1257
  Args:
1258
+ expression (str): a condition over fields of the instance, to be processed by python's eval()
1259
+ imports_list (List[str]): names of imports needed for the eval of the query (e.g. 're', 'json')
1260
  error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
1261
 
1262
  Examples:
1263
+ FilterByExpression(expression = "a > 4") will yield only instances where "a">4
1264
+ FilterByExpression(expression = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
1265
+ FilterByExpression(expression = "a in [4, 8]") will yield only instances where "a" is 4 or 8
1266
+ FilterByExpression(expression = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
1267
 
1268
  """
1269
 
 
1270
  error_on_filtered_all: bool = True
1271
 
1272
  def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generator:
1273
  yielded = False
1274
  for instance in stream:
1275
+ if self.compute_expression(instance):
1276
  yielded = True
1277
  yield instance
1278
 
 
1282
  )
1283
 
1284
 
1285
+ class ExecuteExpression(StreamInstanceOperator, ComputeExpressionMixin):
1286
+ """Compute an expression, specified as a string to be eval-uated, over the instance's fields, and store the result in field to_field.
1287
 
1288
  Raises an error if a field mentioned in the query is missing from the instance.
1289
 
1290
  Args:
1291
+ expression (str): an expression to be evaluated over the fields of the instance
1292
  to_field (str): the field where the result is to be stored into
1293
+ imports_list (List[str]): names of imports needed for the eval of the query (e.g. 're', 'json')
1294
 
1295
  Examples:
1296
  When instance {"a": 2, "b": 3} is process-ed by operator
1297
+ ExecuteExpression(expression="a+b", to_field = "c")
1298
  the result is {"a": 2, "b": 3, "c": 5}
1299
 
1300
  When instance {"a": "hello", "b": "world"} is process-ed by operator
1301
+ ExecuteExpression(expression = "a+' '+b", to_field = "c")
1302
  the result is {"a": "hello", "b": "world", "c": "hello world"}
1303
 
1304
  """
1305
 
 
1306
  to_field: str
1307
 
1308
  def process(
1309
  self, instance: Dict[str, Any], stream_name: Optional[str] = None
1310
  ) -> Dict[str, Any]:
1311
+ instance[self.to_field] = self.compute_expression(instance)
1312
  return instance
1313
 
1314