Spaces:

guydav
/

restrictedpython_code_eval

Sleeping

App Files Files Community

guydav commited on Jun 22, 2023

Commit

f13942b

1 Parent(s): b7ace50

Added another feature

Browse files

Files changed (2) hide show

README.md +2 -0
restrictedpython_code_eval.py +30 -10

README.md CHANGED Viewed

@@ -62,6 +62,8 @@ In addition, this metric supports three additional arguments, specifying which i
 **`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
 As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
 Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.

 **`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
+**`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
 As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
 Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.

restrictedpython_code_eval.py CHANGED Viewed

@@ -38,7 +38,7 @@ import evaluate
 import datasets
 import numpy as np
 from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
-from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR
 from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
 from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
@@ -78,7 +78,7 @@ def safer_getattr_allowing_string_format(object, name, default=None, getattr=get
     return getattr(object, name, default)
-class CodeEvalRestrictingTransformer(RestrictingNodeTransformer):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -114,9 +114,22 @@ class CodeEvalRestrictingTransformer(RestrictingNodeTransformer):
         return super().visit_AugAssign(node)
-    # TODO: decide if I should override the method below to allow variable names that start with an underscore
-    # def check_name(self, node, name, allow_magic_methods=False):
 # TODO: Add BibTeX citation
@@ -152,6 +165,7 @@ Args:
     additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
     allowed_imports: an optional list of string, modules the tested code is allowed to import
     allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
 Returns:
     pass_at_k: dict with pass rates for each k
@@ -238,7 +252,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
     def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
                  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
-                 allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -260,7 +275,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
                         test_program, timeout, task_id, completion_id[task_id],
                         use_safe_builtins, use_limited_builtins, use_utility_builtins,
                         additional_globals, additional_locals,
-                        allowed_imports, allow_str_format,
                     )
                     future = executor.submit(_check_correctness, *args)
                     futures.append(future)
@@ -308,7 +323,8 @@ def estimate_pass_at_k(num_samples, num_correct, k):
 def _check_correctness(check_program, timeout, task_id, completion_id,
                        use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                        additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
-                       allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
@@ -323,7 +339,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
         check_program, result, timeout,
         use_safe_builtins, use_limited_builtins, use_utility_builtins,
         additional_globals, additional_locals,
-        allowed_imports, allow_str_format,
     )
     p = multiprocessing.Process(target=_unsafe_execute, args=args)
     p.start()
@@ -396,7 +412,8 @@ class DefaultPrinter:
 def _unsafe_execute(check_program, result, timeout,
                     use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                     additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
-                    allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
     with create_tempdir():
@@ -477,8 +494,11 @@ def _unsafe_execute(check_program, result, timeout,
                 exec_globals['_apply_'] = _apply  # type: ignore
             with swallow_io():
                 with time_limit(timeout):
-                    byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=CodeEvalRestrictingTransformer)
                     exec(byte_code, exec_globals, additional_locals)
             result.append("passed")
         except EOFError:

 import datasets
 import numpy as np
 from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
+from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR, FORBIDDEN_FUNC_NAMES
 from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
 from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
     return getattr(object, name, default)
+class AllowAugmentedAssignRestrictingTransformer(RestrictingNodeTransformer):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         return super().visit_AugAssign(node)
+class AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer(AllowAugmentedAssignRestrictingTransformer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def check_name(self, node, name, allow_magic_methods=False):
+        if name is None:
+            return
+        if name.startswith('_'):
+            # Verify it doesn't do anything else that's not allowed
+            if not name.endswith('__roles__') and not name in FORBIDDEN_FUNC_NAMES:
+                return
+        # Otherwise, flow to parent logic
+        return super().check_name(node, name, allow_magic_methods)
 # TODO: Add BibTeX citation
     additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
     allowed_imports: an optional list of string, modules the tested code is allowed to import
     allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
+    allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
 Returns:
     pass_at_k: dict with pass rates for each k
     def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
                  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
+                 allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
+                 allow_underscore_variable_names: bool = False):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
                         test_program, timeout, task_id, completion_id[task_id],
                         use_safe_builtins, use_limited_builtins, use_utility_builtins,
                         additional_globals, additional_locals,
+                        allowed_imports, allow_str_format, allow_underscore_variable_names
                     )
                     future = executor.submit(_check_correctness, *args)
                     futures.append(future)
 def _check_correctness(check_program, timeout, task_id, completion_id,
                        use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                        additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
+                       allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
+                       allow_underscore_variable_names: bool = False):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
         check_program, result, timeout,
         use_safe_builtins, use_limited_builtins, use_utility_builtins,
         additional_globals, additional_locals,
+        allowed_imports, allow_str_format, allow_underscore_variable_names,
     )
     p = multiprocessing.Process(target=_unsafe_execute, args=args)
     p.start()
 def _unsafe_execute(check_program, result, timeout,
                     use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                     additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
+                    allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
+                    allow_underscore_variable_names: bool = False):
     with create_tempdir():
                 exec_globals['_apply_'] = _apply  # type: ignore
             with swallow_io():
+                policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
                 with time_limit(timeout):
+                    byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
                     exec(byte_code, exec_globals, additional_locals)
             result.append("passed")
         except EOFError: