guydav commited on
Commit
7293ac9
1 Parent(s): 032ea0d

Added some additional parameters to control `RestrictedPython` behavior.

Browse files
Files changed (2) hide show
  1. README.md +10 -2
  2. restrictedpython_code_eval.py +207 -7
README.md CHANGED
@@ -46,7 +46,7 @@ The following arguments are inherited from the basic `code_eval`:
46
 
47
  **`timeout`** (`float`): The maximum time taken to produce a prediction before it is considered a "timeout". The default value is `3.0` (i.e. 3 seconds).
48
 
49
- In addition, this metric supports three additional arguments, specifying which default imports should be made available:
50
 
51
  **`use_safe_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.safe_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Guards.py#L23), defaults to True
52
 
@@ -54,7 +54,15 @@ In addition, this metric supports three additional arguments, specifying which d
54
 
55
  **`use_utility_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.utility_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Utilities.py#L19), which includes the `string`, `math`, `random`, and `set` packages, among others. Defaults to True.
56
 
57
- As the additional arguments are optional, this could be used as a drop-in replacement for `code_eval`.
 
 
 
 
 
 
 
 
58
 
59
  ### Output Values
60
 
 
46
 
47
  **`timeout`** (`float`): The maximum time taken to produce a prediction before it is considered a "timeout". The default value is `3.0` (i.e. 3 seconds).
48
 
49
+ In addition, this metric supports three additional arguments, specifying which imports should be made available, and controlling other apsects of `RestrictedPython` behavior:
50
 
51
  **`use_safe_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.safe_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Guards.py#L23), defaults to True
52
 
 
54
 
55
  **`use_utility_builtins`** (`bool`): Whether or not to allow the usage of [`RestrictedPython.utility_builtins`](https://github.com/zopefoundation/RestrictedPython/blob/c31c133844ac2308f5cc930e934a7227a2a6a77b/src/RestrictedPython/Utilities.py#L19), which includes the `string`, `math`, `random`, and `set` packages, among others. Defaults to True.
56
 
57
+ **`additional_globals`** (`Dict[str, Any] | None`): Any additional `globals` to make available to the code. Defaults to None.
58
+
59
+ **`additional_locals`** (`Dict[str, Any] | None`): Any additional `locals` to make available to the code. Defaults to None.
60
+
61
+ **`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
62
+
63
+ As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
64
+
65
+ Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_`. See the code for additional details.
66
 
67
  ### Output Values
68
 
restrictedpython_code_eval.py CHANGED
@@ -19,12 +19,15 @@ Lightly adapted and mostly copied verbatim from the implementation in `evaluate`
19
  import contextlib
20
  import faulthandler
21
  import itertools
 
22
  import io
23
  import multiprocessing
24
  import os
25
  import platform
26
  import signal
27
  import tempfile
 
 
28
 
29
  from collections import Counter, defaultdict
30
  from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -34,6 +37,8 @@ import evaluate
34
  import datasets
35
  import numpy as np
36
  from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
 
 
37
 
38
 
39
  # TODO: Add BibTeX citation
@@ -65,6 +70,10 @@ Args:
65
  use_safe_builtins: a bool indicating whether to use the `RestrictedPython.safe_builtins`
66
  use_limited_builtins: a bool indicating whether to use the `RestrictedPython.limited_builtins`
67
  use_utility_builtins: a bool indicating whether to use the `RestrictedPython.utility_builtins`
 
 
 
 
68
  Returns:
69
  pass_at_k: dict with pass rates for each k
70
  results: dict with granular results of each unittest
@@ -148,7 +157,9 @@ class RestrictedPythonCodeEval(evaluate.Metric):
148
  )
149
 
150
  def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
151
- use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True):
 
 
152
  """Returns the scores"""
153
 
154
  if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -166,7 +177,11 @@ class RestrictedPythonCodeEval(evaluate.Metric):
166
  for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
167
  for candidate in candidates:
168
  test_program = candidate + "\n" + test_case
169
- args = (test_program, timeout, task_id, completion_id[task_id], use_safe_builtins, use_limited_builtins, use_utility_builtins)
 
 
 
 
170
  future = executor.submit(_check_correctness, *args)
171
  futures.append(future)
172
  completion_id[task_id] += 1
@@ -211,7 +226,9 @@ def estimate_pass_at_k(num_samples, num_correct, k):
211
 
212
 
213
  def _check_correctness(check_program, timeout, task_id, completion_id,
214
- use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True):
 
 
215
  """
216
  Evaluates the functional correctness of a completion by running the test
217
  suite provided in the problem.
@@ -222,7 +239,12 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
222
  manager = multiprocessing.Manager()
223
  result = manager.list()
224
 
225
- p = multiprocessing.Process(target=_unsafe_execute, args=(check_program, result, timeout, use_safe_builtins, use_limited_builtins, use_utility_builtins))
 
 
 
 
 
226
  p.start()
227
  p.join(timeout=timeout + 1)
228
  if p.is_alive():
@@ -238,8 +260,36 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
238
  completion_id=completion_id,
239
  )
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  def _unsafe_execute(check_program, result, timeout,
242
- use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True):
 
 
243
 
244
  with create_tempdir():
245
 
@@ -265,10 +315,42 @@ def _unsafe_execute(check_program, result, timeout,
265
  builtins.update(utility_builtins)
266
 
267
  exec_globals = {'__builtins__': builtins}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  with swallow_io():
269
  with time_limit(timeout):
270
  byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
271
- exec(byte_code, exec_globals, None)
272
  result.append("passed")
273
  except TimeoutException:
274
  result.append("timed out")
@@ -428,4 +510,122 @@ def reliability_guard(maximum_memory_bytes=None):
428
  sys.modules["joblib"] = None # type: ignore
429
  sys.modules["resource"] = None # type: ignore
430
  sys.modules["psutil"] = None # type: ignore
431
- sys.modules["tkinter"] = None # type: ignore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  import contextlib
20
  import faulthandler
21
  import itertools
22
+ import importlib
23
  import io
24
  import multiprocessing
25
  import os
26
  import platform
27
  import signal
28
  import tempfile
29
+ import types
30
+ from typing import Optional, Dict, List, Any
31
 
32
  from collections import Counter, defaultdict
33
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
37
  import datasets
38
  import numpy as np
39
  from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
40
+ from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
41
+ from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr
42
 
43
 
44
  # TODO: Add BibTeX citation
 
70
  use_safe_builtins: a bool indicating whether to use the `RestrictedPython.safe_builtins`
71
  use_limited_builtins: a bool indicating whether to use the `RestrictedPython.limited_builtins`
72
  use_utility_builtins: a bool indicating whether to use the `RestrictedPython.utility_builtins`
73
+ additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
74
+ additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
75
+ allowed_imports: an optional list of string, modules the tested code is allowed to import
76
+
77
  Returns:
78
  pass_at_k: dict with pass rates for each k
79
  results: dict with granular results of each unittest
 
157
  )
158
 
159
  def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
160
+ use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
161
+ additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
162
+ allowed_imports: Optional[List[str]] = None):
163
  """Returns the scores"""
164
 
165
  if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
 
177
  for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
178
  for candidate in candidates:
179
  test_program = candidate + "\n" + test_case
180
+ args = (
181
+ test_program, timeout, task_id, completion_id[task_id],
182
+ use_safe_builtins, use_limited_builtins, use_utility_builtins,
183
+ additional_globals, additional_locals, allowed_imports
184
+ )
185
  future = executor.submit(_check_correctness, *args)
186
  futures.append(future)
187
  completion_id[task_id] += 1
 
226
 
227
 
228
  def _check_correctness(check_program, timeout, task_id, completion_id,
229
+ use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
230
+ additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
231
+ allowed_imports: Optional[List[str]] = None):
232
  """
233
  Evaluates the functional correctness of a completion by running the test
234
  suite provided in the problem.
 
239
  manager = multiprocessing.Manager()
240
  result = manager.list()
241
 
242
+ args = (
243
+ check_program, result, timeout,
244
+ use_safe_builtins, use_limited_builtins, use_utility_builtins,
245
+ additional_globals, additional_locals, allowed_imports
246
+ )
247
+ p = multiprocessing.Process(target=_unsafe_execute, args=args)
248
  p.start()
249
  p.join(timeout=timeout + 1)
250
  if p.is_alive():
 
260
  completion_id=completion_id,
261
  )
262
 
263
+
264
+ class AllowListImporter:
265
+ def __init__(self, allowed_imports: List[str]):
266
+ self.allowed_imports = allowed_imports
267
+
268
+ def __call__(self, name, globals=None, locals=None, fromlist=(), level=0):
269
+ if name.startswith('.'):
270
+ raise ImportError("Relative imports are not allowed.")
271
+
272
+ if '.' in name:
273
+ package_name, _ = name.split('.', 1)
274
+
275
+ else:
276
+ package_name = name
277
+
278
+ if package_name in self.allowed_imports:
279
+ return importlib.__import__(name, globals, locals, fromlist, level)
280
+
281
+
282
+ def _default_write_(obj):
283
+ if isinstance(obj, types.ModuleType):
284
+ raise ValueError("Modules are not allowed in to be written to.")
285
+
286
+ return obj
287
+
288
+
289
  def _unsafe_execute(check_program, result, timeout,
290
+ use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
291
+ additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
292
+ allowed_imports: Optional[List[str]] = None):
293
 
294
  with create_tempdir():
295
 
 
315
  builtins.update(utility_builtins)
316
 
317
  exec_globals = {'__builtins__': builtins}
318
+ exec_globals.update(additional_globals or {})
319
+
320
+ if allowed_imports is not None:
321
+ if '__import__' in exec_globals['__builtins__']:
322
+ raise ValueError("Cannot specify allowed_imports when __import__ is in additional_globals.")
323
+
324
+ exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
325
+
326
+ if '__metaclass__' not in exec_globals:
327
+ exec_globals['__metaclass__'] = type # type: ignore
328
+
329
+ if '__name__' not in exec_globals:
330
+ exec_globals['__name__'] = '__main__' # type: ignore
331
+
332
+ if '_getiter_' not in exec_globals:
333
+ exec_globals['_getiter_'] = default_guarded_getiter # type: ignore
334
+
335
+ if '_iter_unpack_sequence_' not in exec_globals:
336
+ exec_globals['_iter_unpack_sequence_'] = guarded_iter_unpack_sequence # type: ignore
337
+
338
+ if '_getitem_' not in exec_globals:
339
+ exec_globals['_getitem_'] = default_guarded_getitem # type: ignore
340
+
341
+ if 'getattr' not in exec_globals:
342
+ exec_globals['getattr'] = safer_getattr # type: ignore
343
+
344
+ if '_write_' not in exec_globals:
345
+ exec_globals['_write_'] = _default_write_ # type: ignore
346
+
347
+ if '_inplacevar_' not in exec_globals:
348
+ exec_globals['_inplacevar_'] = protected_inplacevar # type: ignore
349
+
350
  with swallow_io():
351
  with time_limit(timeout):
352
  byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
353
+ exec(byte_code, exec_globals, additional_locals)
354
  result.append("passed")
355
  except TimeoutException:
356
  result.append("timed out")
 
510
  sys.modules["joblib"] = None # type: ignore
511
  sys.modules["resource"] = None # type: ignore
512
  sys.modules["psutil"] = None # type: ignore
513
+ sys.modules["tkinter"] = None # type: ignore
514
+
515
+
516
+ """
517
+ Borrowed implementation of _inplacevar_ from the Zope Foundations's AccessControl module
518
+ https://github.com/zopefoundation/AccessControl/blob/f9ae58816f0712eb6ea97459b4ccafbf4662d9db/src/AccessControl/ZopeGuards.py#L530
519
+ """
520
+
521
+ valid_inplace_types = (list, set)
522
+
523
+
524
+ inplace_slots = {
525
+ '+=': '__iadd__',
526
+ '-=': '__isub__',
527
+ '*=': '__imul__',
528
+ '/=': (1 / 2 == 0) and '__idiv__' or '__itruediv__',
529
+ '//=': '__ifloordiv__',
530
+ '%=': '__imod__',
531
+ '**=': '__ipow__',
532
+ '<<=': '__ilshift__',
533
+ '>>=': '__irshift__',
534
+ '&=': '__iand__',
535
+ '^=': '__ixor__',
536
+ '|=': '__ior__',
537
+ }
538
+
539
+
540
+ def __iadd__(x, y):
541
+ x += y
542
+ return x
543
+
544
+
545
+ def __isub__(x, y):
546
+ x -= y
547
+ return x
548
+
549
+
550
+ def __imul__(x, y):
551
+ x *= y
552
+ return x
553
+
554
+
555
+ def __idiv__(x, y):
556
+ x /= y
557
+ return x
558
+
559
+
560
+ def __ifloordiv__(x, y):
561
+ x //= y
562
+ return x
563
+
564
+
565
+ def __imod__(x, y):
566
+ x %= y
567
+ return x
568
+
569
+
570
+ def __ipow__(x, y):
571
+ x **= y
572
+ return x
573
+
574
+
575
+ def __ilshift__(x, y):
576
+ x <<= y
577
+ return x
578
+
579
+
580
+ def __irshift__(x, y):
581
+ x >>= y
582
+ return x
583
+
584
+
585
+ def __iand__(x, y):
586
+ x &= y
587
+ return x
588
+
589
+
590
+ def __ixor__(x, y):
591
+ x ^= y
592
+ return x
593
+
594
+
595
+ def __ior__(x, y):
596
+ x |= y
597
+ return x
598
+
599
+
600
+ inplace_ops = {
601
+ '+=': __iadd__,
602
+ '-=': __isub__,
603
+ '*=': __imul__,
604
+ '/=': __idiv__,
605
+ '//=': __ifloordiv__,
606
+ '%=': __imod__,
607
+ '**=': __ipow__,
608
+ '<<=': __ilshift__,
609
+ '>>=': __irshift__,
610
+ '&=': __iand__,
611
+ '^=': __ixor__,
612
+ '|=': __ior__,
613
+ }
614
+
615
+
616
+ def protected_inplacevar(op, var, expr):
617
+ """Do an inplace operation
618
+
619
+ If the var has an inplace slot, then disallow the operation
620
+ unless the var an instance of ``valid_inplace_types``.
621
+ """
622
+ if hasattr(var, inplace_slots[op]) and \
623
+ not isinstance(var, valid_inplace_types):
624
+ try:
625
+ cls = var.__class__
626
+ except AttributeError:
627
+ cls = type(var)
628
+ raise TypeError(
629
+ "Augmented assignment to %s objects is not allowed"
630
+ " in untrusted code" % cls.__name__)
631
+ return inplace_ops[op](var, expr)