Spaces:
Sleeping
Sleeping
Added some additional parameters to control `RestrictedPython` behavior.
Browse files- README.md +10 -2
- restrictedpython_code_eval.py +207 -7
README.md
CHANGED
@@ -46,7 +46,7 @@ The following arguments are inherited from the basic `code_eval`:
|
|
46 |
|
47 |
**`timeout`** (`float`): The maximum time taken to produce a prediction before it is considered a "timeout". The default value is `3.0` (i.e. 3 seconds).
|
48 |
|
49 |
-
In addition, this metric supports three additional arguments, specifying which
|
50 |
|
51 |
**`use_safe_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.safe_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Guards.py#L23), defaults to True
|
52 |
|
@@ -54,7 +54,15 @@ In addition, this metric supports three additional arguments, specifying which d
|
|
54 |
|
55 |
**`use_utility_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.utility_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Utilities.py#L19), which includes the `string`, `math`, `random`, and `set` packages, among others. Defaults to True.
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
### Output Values
|
60 |
|
|
|
46 |
|
47 |
**`timeout`** (`float`): The maximum time taken to produce a prediction before it is considered a "timeout". The default value is `3.0` (i.e. 3 seconds).
|
48 |
|
49 |
+
In addition, this metric supports three additional arguments, specifying which imports should be made available, and controlling other apsects of `RestrictedPython` behavior:
|
50 |
|
51 |
**`use_safe_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.safe_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Guards.py#L23), defaults to True
|
52 |
|
|
|
54 |
|
55 |
**`use_utility_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.utility_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Utilities.py#L19), which includes the `string`, `math`, `random`, and `set` packages, among others. Defaults to True.
|
56 |
|
57 |
+
**`additional_globals`** (`Dict[str, Any] | None`): Any additional `globals` to make available to the code. Defaults to None.
|
58 |
+
|
59 |
+
**`additional_locals`** (`Dict[str, Any] | None`): Any additional `locals` to make available to the code. Defaults to None.
|
60 |
+
|
61 |
+
**`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
|
62 |
+
|
63 |
+
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
64 |
+
|
65 |
+
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_`. See the code for additional details.
|
66 |
|
67 |
### Output Values
|
68 |
|
restrictedpython_code_eval.py
CHANGED
@@ -19,12 +19,15 @@ Lightly adapted and mostly copied verbatim from the implementation in `evaluate`
|
|
19 |
import contextlib
|
20 |
import faulthandler
|
21 |
import itertools
|
|
|
22 |
import io
|
23 |
import multiprocessing
|
24 |
import os
|
25 |
import platform
|
26 |
import signal
|
27 |
import tempfile
|
|
|
|
|
28 |
|
29 |
from collections import Counter, defaultdict
|
30 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
@@ -34,6 +37,8 @@ import evaluate
|
|
34 |
import datasets
|
35 |
import numpy as np
|
36 |
from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
|
|
|
|
|
37 |
|
38 |
|
39 |
# TODO: Add BibTeX citation
|
@@ -65,6 +70,10 @@ Args:
|
|
65 |
use_safe_builtins: a bool indicating whether to use the `RestrictedPython.safe_builtins`
|
66 |
use_limited_builtins: a bool indicating whether to use the `RestrictedPython.limited_builtins`
|
67 |
use_utility_builtins: a bool indicating whether to use the `RestrictedPython.utility_builtins`
|
|
|
|
|
|
|
|
|
68 |
Returns:
|
69 |
pass_at_k: dict with pass rates for each k
|
70 |
results: dict with granular results of each unittest
|
@@ -148,7 +157,9 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
148 |
)
|
149 |
|
150 |
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
|
151 |
-
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True
|
|
|
|
|
152 |
"""Returns the scores"""
|
153 |
|
154 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
@@ -166,7 +177,11 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
166 |
for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
|
167 |
for candidate in candidates:
|
168 |
test_program = candidate + "\n" + test_case
|
169 |
-
args = (
|
|
|
|
|
|
|
|
|
170 |
future = executor.submit(_check_correctness, *args)
|
171 |
futures.append(future)
|
172 |
completion_id[task_id] += 1
|
@@ -211,7 +226,9 @@ def estimate_pass_at_k(num_samples, num_correct, k):
|
|
211 |
|
212 |
|
213 |
def _check_correctness(check_program, timeout, task_id, completion_id,
|
214 |
-
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True
|
|
|
|
|
215 |
"""
|
216 |
Evaluates the functional correctness of a completion by running the test
|
217 |
suite provided in the problem.
|
@@ -222,7 +239,12 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
222 |
manager = multiprocessing.Manager()
|
223 |
result = manager.list()
|
224 |
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
226 |
p.start()
|
227 |
p.join(timeout=timeout + 1)
|
228 |
if p.is_alive():
|
@@ -238,8 +260,36 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
238 |
completion_id=completion_id,
|
239 |
)
|
240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
def _unsafe_execute(check_program, result, timeout,
|
242 |
-
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True
|
|
|
|
|
243 |
|
244 |
with create_tempdir():
|
245 |
|
@@ -265,10 +315,42 @@ def _unsafe_execute(check_program, result, timeout,
|
|
265 |
builtins.update(utility_builtins)
|
266 |
|
267 |
exec_globals = {'__builtins__': builtins}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
with swallow_io():
|
269 |
with time_limit(timeout):
|
270 |
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
|
271 |
-
exec(byte_code, exec_globals,
|
272 |
result.append("passed")
|
273 |
except TimeoutException:
|
274 |
result.append("timed out")
|
@@ -428,4 +510,122 @@ def reliability_guard(maximum_memory_bytes=None):
|
|
428 |
sys.modules["joblib"] = None # type: ignore
|
429 |
sys.modules["resource"] = None # type: ignore
|
430 |
sys.modules["psutil"] = None # type: ignore
|
431 |
-
sys.modules["tkinter"] = None # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
import contextlib
|
20 |
import faulthandler
|
21 |
import itertools
|
22 |
+
import importlib
|
23 |
import io
|
24 |
import multiprocessing
|
25 |
import os
|
26 |
import platform
|
27 |
import signal
|
28 |
import tempfile
|
29 |
+
import types
|
30 |
+
from typing import Optional, Dict, List, Any
|
31 |
|
32 |
from collections import Counter, defaultdict
|
33 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
37 |
import datasets
|
38 |
import numpy as np
|
39 |
from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
|
40 |
+
from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
|
41 |
+
from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr
|
42 |
|
43 |
|
44 |
# TODO: Add BibTeX citation
|
|
|
70 |
use_safe_builtins: a bool indicating whether to use the `RestrictedPython.safe_builtins`
|
71 |
use_limited_builtins: a bool indicating whether to use the `RestrictedPython.limited_builtins`
|
72 |
use_utility_builtins: a bool indicating whether to use the `RestrictedPython.utility_builtins`
|
73 |
+
additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
|
74 |
+
additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
|
75 |
+
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
76 |
+
|
77 |
Returns:
|
78 |
pass_at_k: dict with pass rates for each k
|
79 |
results: dict with granular results of each unittest
|
|
|
157 |
)
|
158 |
|
159 |
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
|
160 |
+
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
161 |
+
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
162 |
+
allowed_imports: Optional[List[str]] = None):
|
163 |
"""Returns the scores"""
|
164 |
|
165 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
177 |
for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
|
178 |
for candidate in candidates:
|
179 |
test_program = candidate + "\n" + test_case
|
180 |
+
args = (
|
181 |
+
test_program, timeout, task_id, completion_id[task_id],
|
182 |
+
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
183 |
+
additional_globals, additional_locals, allowed_imports
|
184 |
+
)
|
185 |
future = executor.submit(_check_correctness, *args)
|
186 |
futures.append(future)
|
187 |
completion_id[task_id] += 1
|
|
|
226 |
|
227 |
|
228 |
def _check_correctness(check_program, timeout, task_id, completion_id,
|
229 |
+
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
230 |
+
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
231 |
+
allowed_imports: Optional[List[str]] = None):
|
232 |
"""
|
233 |
Evaluates the functional correctness of a completion by running the test
|
234 |
suite provided in the problem.
|
|
|
239 |
manager = multiprocessing.Manager()
|
240 |
result = manager.list()
|
241 |
|
242 |
+
args = (
|
243 |
+
check_program, result, timeout,
|
244 |
+
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
245 |
+
additional_globals, additional_locals, allowed_imports
|
246 |
+
)
|
247 |
+
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
248 |
p.start()
|
249 |
p.join(timeout=timeout + 1)
|
250 |
if p.is_alive():
|
|
|
260 |
completion_id=completion_id,
|
261 |
)
|
262 |
|
263 |
+
|
264 |
+
class AllowListImporter:
|
265 |
+
def __init__(self, allowed_imports: List[str]):
|
266 |
+
self.allowed_imports = allowed_imports
|
267 |
+
|
268 |
+
def __call__(self, name, globals=None, locals=None, fromlist=(), level=0):
|
269 |
+
if name.startswith('.'):
|
270 |
+
raise ImportError("Relative imports are not allowed.")
|
271 |
+
|
272 |
+
if '.' in name:
|
273 |
+
package_name, _ = name.split('.', 1)
|
274 |
+
|
275 |
+
else:
|
276 |
+
package_name = name
|
277 |
+
|
278 |
+
if package_name in self.allowed_imports:
|
279 |
+
return importlib.__import__(name, globals, locals, fromlist, level)
|
280 |
+
|
281 |
+
|
282 |
+
def _default_write_(obj):
|
283 |
+
if isinstance(obj, types.ModuleType):
|
284 |
+
raise ValueError("Modules are not allowed in to be written to.")
|
285 |
+
|
286 |
+
return obj
|
287 |
+
|
288 |
+
|
289 |
def _unsafe_execute(check_program, result, timeout,
|
290 |
+
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
291 |
+
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
292 |
+
allowed_imports: Optional[List[str]] = None):
|
293 |
|
294 |
with create_tempdir():
|
295 |
|
|
|
315 |
builtins.update(utility_builtins)
|
316 |
|
317 |
exec_globals = {'__builtins__': builtins}
|
318 |
+
exec_globals.update(additional_globals or {})
|
319 |
+
|
320 |
+
if allowed_imports is not None:
|
321 |
+
if '__import__' in exec_globals['__builtins__']:
|
322 |
+
raise ValueError("Cannot specify allowed_imports when __import__ is in additional_globals.")
|
323 |
+
|
324 |
+
exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
|
325 |
+
|
326 |
+
if '__metaclass__' not in exec_globals:
|
327 |
+
exec_globals['__metaclass__'] = type # type: ignore
|
328 |
+
|
329 |
+
if '__name__' not in exec_globals:
|
330 |
+
exec_globals['__name__'] = '__main__' # type: ignore
|
331 |
+
|
332 |
+
if '_getiter_' not in exec_globals:
|
333 |
+
exec_globals['_getiter_'] = default_guarded_getiter # type: ignore
|
334 |
+
|
335 |
+
if '_iter_unpack_sequence_' not in exec_globals:
|
336 |
+
exec_globals['_iter_unpack_sequence_'] = guarded_iter_unpack_sequence # type: ignore
|
337 |
+
|
338 |
+
if '_getitem_' not in exec_globals:
|
339 |
+
exec_globals['_getitem_'] = default_guarded_getitem # type: ignore
|
340 |
+
|
341 |
+
if 'getattr' not in exec_globals:
|
342 |
+
exec_globals['getattr'] = safer_getattr # type: ignore
|
343 |
+
|
344 |
+
if '_write_' not in exec_globals:
|
345 |
+
exec_globals['_write_'] = _default_write_ # type: ignore
|
346 |
+
|
347 |
+
if '_inplacevar_' not in exec_globals:
|
348 |
+
exec_globals['_inplacevar_'] = protected_inplacevar # type: ignore
|
349 |
+
|
350 |
with swallow_io():
|
351 |
with time_limit(timeout):
|
352 |
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
|
353 |
+
exec(byte_code, exec_globals, additional_locals)
|
354 |
result.append("passed")
|
355 |
except TimeoutException:
|
356 |
result.append("timed out")
|
|
|
510 |
sys.modules["joblib"] = None # type: ignore
|
511 |
sys.modules["resource"] = None # type: ignore
|
512 |
sys.modules["psutil"] = None # type: ignore
|
513 |
+
sys.modules["tkinter"] = None # type: ignore
|
514 |
+
|
515 |
+
|
516 |
+
"""
|
517 |
+
Borrowed implementation of _inplacevar_ from the Zope Foundations's AccessControl module
|
518 |
+
https://github.com/zopefoundation/AccessControl/blob/f9ae58816f0712eb6ea97459b4ccafbf4662d9db/src/AccessControl/ZopeGuards.py#L530
|
519 |
+
"""
|
520 |
+
|
521 |
+
valid_inplace_types = (list, set)
|
522 |
+
|
523 |
+
|
524 |
+
inplace_slots = {
|
525 |
+
'+=': '__iadd__',
|
526 |
+
'-=': '__isub__',
|
527 |
+
'*=': '__imul__',
|
528 |
+
'/=': (1 / 2 == 0) and '__idiv__' or '__itruediv__',
|
529 |
+
'//=': '__ifloordiv__',
|
530 |
+
'%=': '__imod__',
|
531 |
+
'**=': '__ipow__',
|
532 |
+
'<<=': '__ilshift__',
|
533 |
+
'>>=': '__irshift__',
|
534 |
+
'&=': '__iand__',
|
535 |
+
'^=': '__ixor__',
|
536 |
+
'|=': '__ior__',
|
537 |
+
}
|
538 |
+
|
539 |
+
|
540 |
+
def __iadd__(x, y):
|
541 |
+
x += y
|
542 |
+
return x
|
543 |
+
|
544 |
+
|
545 |
+
def __isub__(x, y):
|
546 |
+
x -= y
|
547 |
+
return x
|
548 |
+
|
549 |
+
|
550 |
+
def __imul__(x, y):
|
551 |
+
x *= y
|
552 |
+
return x
|
553 |
+
|
554 |
+
|
555 |
+
def __idiv__(x, y):
|
556 |
+
x /= y
|
557 |
+
return x
|
558 |
+
|
559 |
+
|
560 |
+
def __ifloordiv__(x, y):
|
561 |
+
x //= y
|
562 |
+
return x
|
563 |
+
|
564 |
+
|
565 |
+
def __imod__(x, y):
|
566 |
+
x %= y
|
567 |
+
return x
|
568 |
+
|
569 |
+
|
570 |
+
def __ipow__(x, y):
|
571 |
+
x **= y
|
572 |
+
return x
|
573 |
+
|
574 |
+
|
575 |
+
def __ilshift__(x, y):
|
576 |
+
x <<= y
|
577 |
+
return x
|
578 |
+
|
579 |
+
|
580 |
+
def __irshift__(x, y):
|
581 |
+
x >>= y
|
582 |
+
return x
|
583 |
+
|
584 |
+
|
585 |
+
def __iand__(x, y):
|
586 |
+
x &= y
|
587 |
+
return x
|
588 |
+
|
589 |
+
|
590 |
+
def __ixor__(x, y):
|
591 |
+
x ^= y
|
592 |
+
return x
|
593 |
+
|
594 |
+
|
595 |
+
def __ior__(x, y):
|
596 |
+
x |= y
|
597 |
+
return x
|
598 |
+
|
599 |
+
|
600 |
+
inplace_ops = {
|
601 |
+
'+=': __iadd__,
|
602 |
+
'-=': __isub__,
|
603 |
+
'*=': __imul__,
|
604 |
+
'/=': __idiv__,
|
605 |
+
'//=': __ifloordiv__,
|
606 |
+
'%=': __imod__,
|
607 |
+
'**=': __ipow__,
|
608 |
+
'<<=': __ilshift__,
|
609 |
+
'>>=': __irshift__,
|
610 |
+
'&=': __iand__,
|
611 |
+
'^=': __ixor__,
|
612 |
+
'|=': __ior__,
|
613 |
+
}
|
614 |
+
|
615 |
+
|
616 |
+
def protected_inplacevar(op, var, expr):
|
617 |
+
"""Do an inplace operation
|
618 |
+
|
619 |
+
If the var has an inplace slot, then disallow the operation
|
620 |
+
unless the var an instance of ``valid_inplace_types``.
|
621 |
+
"""
|
622 |
+
if hasattr(var, inplace_slots[op]) and \
|
623 |
+
not isinstance(var, valid_inplace_types):
|
624 |
+
try:
|
625 |
+
cls = var.__class__
|
626 |
+
except AttributeError:
|
627 |
+
cls = type(var)
|
628 |
+
raise TypeError(
|
629 |
+
"Augmented assignment to %s objects is not allowed"
|
630 |
+
" in untrusted code" % cls.__name__)
|
631 |
+
return inplace_ops[op](var, expr)
|