Spaces:

bigcode
/

bigcodebench-interaction

Runtime error

App Files Files Community

Terry Zhuo commited on Oct 25

Commit

7ecd52b

•

1 Parent(s): 7997b03

update

Browse files

Files changed (4) hide show

Dockerfile +5 -0
README.md +12 -5
app.py +95 -0
pyproject.toml +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,5 @@

+FROM bigcodebench/bigcodebench-gradio:latest
+COPY . /app
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,17 @@
 ---
-title: Bigcodebench Interaction
-emoji: 👁
-colorFrom: indigo
 colorTo: indigo
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BigCodeBench Evaluator
+emoji: 🥇
+colorFrom: green
 colorTo: indigo
 sdk: docker
+app_file: app.py
+disable_embedding: true
 pinned: false
+license: apache-2.0
+tags:
+- leaderboard
+- eval:code
+- test:public
+- judge:auto
 ---
+Paper:arxiv.org/abs/2406.15877

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+import io
+import sys
+import logging
+import multiprocessing
+import os
+import pickle
+import threading
+import time
+from collections import Counter, defaultdict
+from concurrent.futures import ProcessPoolExecutor, as_completed, wait, FIRST_COMPLETED
+from datetime import datetime
+from typing import Any, Dict, List, Tuple
+from warnings import warn
+from contextlib import redirect_stdout, redirect_stderr
+import numpy as np
+from huggingface_hub import HfApi
+from bigcodebench.data.utils import CACHE_DIR
+from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
+from bigcodebench.gen.util import trusted_check
+from apscheduler.schedulers.background import BackgroundScheduler
+REPO_ID = "bigcode/bigcodebench-interaction"
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+API = HfApi(token=HF_TOKEN)
+Result = Tuple[str, List[bool]]
+def run_code(code: str) -> str:
+    # Create string buffers to capture output
+    stdout_buffer = io.StringIO()
+    stderr_buffer = io.StringIO()
+    # Create a dictionary for local variables
+    local_dict = {}
+    # Capture both stdout and stderr
+    with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
+        try:
+            # Execute the code
+            exec(code, globals(), local_dict)
+            # Get the output
+            output = stdout_buffer.getvalue()
+            errors = stderr_buffer.getvalue()
+            # If there's a return value in the last expression, capture it
+            last_line = code.strip().split('\n')[-1]
+            if not (last_line.startswith('print') or last_line.strip() == ''):
+                try:
+                    result = eval(last_line, globals(), local_dict)
+                    if result is not None:
+                        output += f"\n>>> {result}"
+                except:
+                    pass
+            # Combine stdout and stderr
+            result = output
+            if errors:
+                result += "\n--- Errors ---\n" + errors
+        except Exception as e:
+            # Capture any execution errors
+            result = f"Error: {str(e)}"
+    return result if result.strip() else "Code executed successfully (no output)"
+# Create the Gradio interface with better styling
+interface = gr.Interface(
+    fn=run_code,
+    inputs=[
+        gr.Code(label="Python Code", language="python"),
+    ],
+    outputs=[
+        gr.Textbox(label="Output")
+    ],
+)
+interface.queue(default_concurrency_limit=None)
+def restart_space():
+    logging.info(f"Restarting space with repo ID: {REPO_ID}")
+    try:
+        # Now restart the space
+        API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
+        logging.info("Space restarted successfully.")
+    except Exception as e:
+        logging.error(f"Failed to restart space: {e}")
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", hours=5)  # Restart every 5hs
+scheduler.start()
+interface.launch(show_error=True)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[tool.ruff]
+# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
+select = ["E", "F"]
+ignore = ["E501"] # line too long (black is taking care of this)
+line-length = 119
+fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
+[tool.isort]
+profile = "black"
+line_length = 119
+[tool.black]
+line-length = 119