Upload operators.py with huggingface_hub
Browse files- operators.py +19 -10
operators.py
CHANGED
@@ -1162,20 +1162,25 @@ class ApplyOperatorsField(StreamInstanceOperator):
|
|
1162 |
|
1163 |
|
1164 |
class FilterByCondition(SingleStreamOperator):
|
1165 |
-
"""Filters a stream, yielding only instances
|
1166 |
|
1167 |
-
Raises an error if a required
|
1168 |
|
1169 |
Args:
|
1170 |
-
values (Dict[str, Any]): Values that instances must match
|
1171 |
-
condition: the name of the desired condition operator between the
|
1172 |
error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
|
1173 |
|
1174 |
Examples:
|
1175 |
-
FilterByCondition(values = {"a":4}, condition = "gt") will yield only instances where "a">4
|
1176 |
FilterByCondition(values = {"a":4}, condition = "le") will yield only instances where "a"<=4
|
1177 |
FilterByCondition(values = {"a":[4,8]}, condition = "in") will yield only instances where "a" is 4 or 8
|
1178 |
FilterByCondition(values = {"a":[4,8]}, condition = "not in") will yield only instances where "a" different from 4 or 8
|
|
|
|
|
|
|
|
|
|
|
1179 |
|
1180 |
"""
|
1181 |
|
@@ -1220,15 +1225,17 @@ class FilterByCondition(SingleStreamOperator):
|
|
1220 |
|
1221 |
def _is_required(self, instance: dict) -> bool:
|
1222 |
for key, value in self.values.items():
|
1223 |
-
|
|
|
|
|
1224 |
raise ValueError(
|
1225 |
f"Required filter field ('{key}') in FilterByCondition is not found in {instance}"
|
1226 |
-
)
|
1227 |
if self.condition == "in":
|
1228 |
-
if
|
1229 |
return False
|
1230 |
elif self.condition == "not in":
|
1231 |
-
if
|
1232 |
return False
|
1233 |
else:
|
1234 |
func = self.condition_to_func[self.condition]
|
@@ -1236,7 +1243,7 @@ class FilterByCondition(SingleStreamOperator):
|
|
1236 |
raise ValueError(
|
1237 |
f"Function not defined for condition '{self.condition}'"
|
1238 |
)
|
1239 |
-
if not func(
|
1240 |
return False
|
1241 |
return True
|
1242 |
|
@@ -1285,6 +1292,8 @@ class FilterByExpression(SingleStreamOperator, ComputeExpressionMixin):
|
|
1285 |
FilterByExpression(expression = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
|
1286 |
FilterByExpression(expression = "a in [4, 8]") will yield only instances where "a" is 4 or 8
|
1287 |
FilterByExpression(expression = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
|
|
|
|
|
1288 |
|
1289 |
"""
|
1290 |
|
|
|
1162 |
|
1163 |
|
1164 |
class FilterByCondition(SingleStreamOperator):
|
1165 |
+
"""Filters a stream, yielding only instances in which the values in required fields follow the required condition operator.
|
1166 |
|
1167 |
+
Raises an error if a required field name is missing from the input instance.
|
1168 |
|
1169 |
Args:
|
1170 |
+
values (Dict[str, Any]): Field names and respective Values that instances must match according the condition, to be included in the output.
|
1171 |
+
condition: the name of the desired condition operator between the specified (sub) field's value and the provided constant value. Supported conditions are ("gt", "ge", "lt", "le", "ne", "eq", "in","not in")
|
1172 |
error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
|
1173 |
|
1174 |
Examples:
|
1175 |
+
FilterByCondition(values = {"a":4}, condition = "gt") will yield only instances where field "a" contains a value > 4
|
1176 |
FilterByCondition(values = {"a":4}, condition = "le") will yield only instances where "a"<=4
|
1177 |
FilterByCondition(values = {"a":[4,8]}, condition = "in") will yield only instances where "a" is 4 or 8
|
1178 |
FilterByCondition(values = {"a":[4,8]}, condition = "not in") will yield only instances where "a" different from 4 or 8
|
1179 |
+
FilterByCondition(values = {"a/b":[4,8]}, condition = "not in") will yield only instances where "a" is
|
1180 |
+
a dict in which key "b" is mapped to a value that is neither 4 nor 8
|
1181 |
+
FilterByCondition(values = {"a[2]":4}, condition = "le") will yield only instances where "a" is a list whose 3-rd
|
1182 |
+
element is <= 4
|
1183 |
+
|
1184 |
|
1185 |
"""
|
1186 |
|
|
|
1225 |
|
1226 |
def _is_required(self, instance: dict) -> bool:
|
1227 |
for key, value in self.values.items():
|
1228 |
+
try:
|
1229 |
+
instance_key = dict_get(instance, key)
|
1230 |
+
except ValueError as ve:
|
1231 |
raise ValueError(
|
1232 |
f"Required filter field ('{key}') in FilterByCondition is not found in {instance}"
|
1233 |
+
) from ve
|
1234 |
if self.condition == "in":
|
1235 |
+
if instance_key not in value:
|
1236 |
return False
|
1237 |
elif self.condition == "not in":
|
1238 |
+
if instance_key in value:
|
1239 |
return False
|
1240 |
else:
|
1241 |
func = self.condition_to_func[self.condition]
|
|
|
1243 |
raise ValueError(
|
1244 |
f"Function not defined for condition '{self.condition}'"
|
1245 |
)
|
1246 |
+
if not func(instance_key, value):
|
1247 |
return False
|
1248 |
return True
|
1249 |
|
|
|
1292 |
FilterByExpression(expression = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
|
1293 |
FilterByExpression(expression = "a in [4, 8]") will yield only instances where "a" is 4 or 8
|
1294 |
FilterByExpression(expression = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
|
1295 |
+
FilterByExpression(expression = "a['b'] not in [4, 8]") will yield only instances where "a" is a dict in
|
1296 |
+
which key 'b' is mapped to a value that is neither 4 nor 8
|
1297 |
|
1298 |
"""
|
1299 |
|