Spaces:
Sleeping
Sleeping
Added another feature
Browse files- README.md +2 -0
- restrictedpython_code_eval.py +30 -10
README.md
CHANGED
@@ -62,6 +62,8 @@ In addition, this metric supports three additional arguments, specifying which i
|
|
62 |
|
63 |
**`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
|
64 |
|
|
|
|
|
65 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
66 |
|
67 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
|
|
62 |
|
63 |
**`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
|
64 |
|
65 |
+
**`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
|
66 |
+
|
67 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
68 |
|
69 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
restrictedpython_code_eval.py
CHANGED
@@ -38,7 +38,7 @@ import evaluate
|
|
38 |
import datasets
|
39 |
import numpy as np
|
40 |
from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
|
41 |
-
from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR
|
42 |
from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
|
43 |
from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
|
44 |
|
@@ -78,7 +78,7 @@ def safer_getattr_allowing_string_format(object, name, default=None, getattr=get
|
|
78 |
return getattr(object, name, default)
|
79 |
|
80 |
|
81 |
-
class
|
82 |
def __init__(self, *args, **kwargs):
|
83 |
super().__init__(*args, **kwargs)
|
84 |
|
@@ -114,9 +114,22 @@ class CodeEvalRestrictingTransformer(RestrictingNodeTransformer):
|
|
114 |
|
115 |
return super().visit_AugAssign(node)
|
116 |
|
117 |
-
# TODO: decide if I should override the method below to allow variable names that start with an underscore
|
118 |
-
# def check_name(self, node, name, allow_magic_methods=False):
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
|
122 |
# TODO: Add BibTeX citation
|
@@ -152,6 +165,7 @@ Args:
|
|
152 |
additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
|
153 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
154 |
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
|
|
155 |
|
156 |
Returns:
|
157 |
pass_at_k: dict with pass rates for each k
|
@@ -238,7 +252,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
238 |
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
|
239 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
240 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
241 |
-
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False
|
|
|
242 |
"""Returns the scores"""
|
243 |
|
244 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
@@ -260,7 +275,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
260 |
test_program, timeout, task_id, completion_id[task_id],
|
261 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
262 |
additional_globals, additional_locals,
|
263 |
-
allowed_imports, allow_str_format,
|
264 |
)
|
265 |
future = executor.submit(_check_correctness, *args)
|
266 |
futures.append(future)
|
@@ -308,7 +323,8 @@ def estimate_pass_at_k(num_samples, num_correct, k):
|
|
308 |
def _check_correctness(check_program, timeout, task_id, completion_id,
|
309 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
310 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
311 |
-
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False
|
|
|
312 |
"""
|
313 |
Evaluates the functional correctness of a completion by running the test
|
314 |
suite provided in the problem.
|
@@ -323,7 +339,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
323 |
check_program, result, timeout,
|
324 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
325 |
additional_globals, additional_locals,
|
326 |
-
allowed_imports, allow_str_format,
|
327 |
)
|
328 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
329 |
p.start()
|
@@ -396,7 +412,8 @@ class DefaultPrinter:
|
|
396 |
def _unsafe_execute(check_program, result, timeout,
|
397 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
398 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
399 |
-
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False
|
|
|
400 |
|
401 |
with create_tempdir():
|
402 |
|
@@ -477,8 +494,11 @@ def _unsafe_execute(check_program, result, timeout,
|
|
477 |
exec_globals['_apply_'] = _apply # type: ignore
|
478 |
|
479 |
with swallow_io():
|
|
|
|
|
|
|
480 |
with time_limit(timeout):
|
481 |
-
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=
|
482 |
exec(byte_code, exec_globals, additional_locals)
|
483 |
result.append("passed")
|
484 |
except EOFError:
|
|
|
38 |
import datasets
|
39 |
import numpy as np
|
40 |
from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
|
41 |
+
from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR, FORBIDDEN_FUNC_NAMES
|
42 |
from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
|
43 |
from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
|
44 |
|
|
|
78 |
return getattr(object, name, default)
|
79 |
|
80 |
|
81 |
+
class AllowAugmentedAssignRestrictingTransformer(RestrictingNodeTransformer):
|
82 |
def __init__(self, *args, **kwargs):
|
83 |
super().__init__(*args, **kwargs)
|
84 |
|
|
|
114 |
|
115 |
return super().visit_AugAssign(node)
|
116 |
|
|
|
|
|
117 |
|
118 |
+
class AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer(AllowAugmentedAssignRestrictingTransformer):
|
119 |
+
def __init__(self, *args, **kwargs):
|
120 |
+
super().__init__(*args, **kwargs)
|
121 |
+
|
122 |
+
def check_name(self, node, name, allow_magic_methods=False):
|
123 |
+
if name is None:
|
124 |
+
return
|
125 |
+
|
126 |
+
if name.startswith('_'):
|
127 |
+
# Verify it doesn't do anything else that's not allowed
|
128 |
+
if not name.endswith('__roles__') and not name in FORBIDDEN_FUNC_NAMES:
|
129 |
+
return
|
130 |
+
|
131 |
+
# Otherwise, flow to parent logic
|
132 |
+
return super().check_name(node, name, allow_magic_methods)
|
133 |
|
134 |
|
135 |
# TODO: Add BibTeX citation
|
|
|
165 |
additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
|
166 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
167 |
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
168 |
+
allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
|
169 |
|
170 |
Returns:
|
171 |
pass_at_k: dict with pass rates for each k
|
|
|
252 |
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
|
253 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
254 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
255 |
+
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
256 |
+
allow_underscore_variable_names: bool = False):
|
257 |
"""Returns the scores"""
|
258 |
|
259 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
275 |
test_program, timeout, task_id, completion_id[task_id],
|
276 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
277 |
additional_globals, additional_locals,
|
278 |
+
allowed_imports, allow_str_format, allow_underscore_variable_names
|
279 |
)
|
280 |
future = executor.submit(_check_correctness, *args)
|
281 |
futures.append(future)
|
|
|
323 |
def _check_correctness(check_program, timeout, task_id, completion_id,
|
324 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
325 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
326 |
+
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
327 |
+
allow_underscore_variable_names: bool = False):
|
328 |
"""
|
329 |
Evaluates the functional correctness of a completion by running the test
|
330 |
suite provided in the problem.
|
|
|
339 |
check_program, result, timeout,
|
340 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
341 |
additional_globals, additional_locals,
|
342 |
+
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
343 |
)
|
344 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
345 |
p.start()
|
|
|
412 |
def _unsafe_execute(check_program, result, timeout,
|
413 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
414 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
415 |
+
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
416 |
+
allow_underscore_variable_names: bool = False):
|
417 |
|
418 |
with create_tempdir():
|
419 |
|
|
|
494 |
exec_globals['_apply_'] = _apply # type: ignore
|
495 |
|
496 |
with swallow_io():
|
497 |
+
policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
|
498 |
+
|
499 |
+
|
500 |
with time_limit(timeout):
|
501 |
+
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
|
502 |
exec(byte_code, exec_globals, additional_locals)
|
503 |
result.append("passed")
|
504 |
except EOFError:
|