future-xy
commited on
Commit
·
d936aea
1
Parent(s):
88d1c0e
improve local debug
Browse files- backend-cli.py +19 -11
- src/backend/envs.py +0 -2
backend-cli.py
CHANGED
@@ -11,7 +11,7 @@ from datetime import datetime
|
|
11 |
from src.backend.run_eval_suite import run_evaluation
|
12 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
13 |
from src.backend.sort_queue import sort_models_by_priority
|
14 |
-
from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE,
|
15 |
from src.backend.manage_requests import EvalRequest
|
16 |
from src.leaderboard.read_evals import EvalResult
|
17 |
|
@@ -122,7 +122,7 @@ def request_to_result_name(request: EvalRequest) -> str:
|
|
122 |
return res
|
123 |
|
124 |
|
125 |
-
def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
126 |
batch_size = 1
|
127 |
try:
|
128 |
results = run_evaluation(
|
@@ -132,7 +132,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
|
132 |
batch_size=batch_size,
|
133 |
device=DEVICE,
|
134 |
use_cache=None,
|
135 |
-
limit=
|
136 |
)
|
137 |
except RuntimeError as e:
|
138 |
if "No executable batch size found" in str(e):
|
@@ -144,7 +144,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
|
144 |
batch_size=batch_size,
|
145 |
device=DEVICE,
|
146 |
use_cache=None,
|
147 |
-
limit=
|
148 |
)
|
149 |
else:
|
150 |
raise
|
@@ -395,6 +395,12 @@ def process_pending_requests() -> bool:
|
|
395 |
def get_args():
|
396 |
parser = argparse.ArgumentParser(description="Run the backend")
|
397 |
parser.add_argument("--debug", action="store_true", help="Run in debug mode")
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
return parser.parse_args()
|
399 |
|
400 |
|
@@ -403,11 +409,8 @@ if __name__ == "__main__":
|
|
403 |
local_debug = args.debug
|
404 |
# debug specific task by ping
|
405 |
if local_debug:
|
406 |
-
|
407 |
-
|
408 |
-
# debug_model_names = ["TheBloke/Mixtral-8x7B-v0.1-GPTQ"]
|
409 |
-
debug_task_name = 'selfcheckgpt'
|
410 |
-
# debug_task_name = "mmlu"
|
411 |
task_lst = TASKS_HARNESS.copy()
|
412 |
for task in task_lst:
|
413 |
for debug_model_name in debug_model_names:
|
@@ -415,9 +418,14 @@ if __name__ == "__main__":
|
|
415 |
if task_name != debug_task_name:
|
416 |
continue
|
417 |
eval_request = EvalRequest(
|
418 |
-
model=debug_model_name,
|
|
|
|
|
|
|
|
|
|
|
419 |
)
|
420 |
-
results = process_evaluation(task, eval_request)
|
421 |
else:
|
422 |
while True:
|
423 |
res = False
|
|
|
11 |
from src.backend.run_eval_suite import run_evaluation
|
12 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
13 |
from src.backend.sort_queue import sort_models_by_priority
|
14 |
+
from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE, Task
|
15 |
from src.backend.manage_requests import EvalRequest
|
16 |
from src.leaderboard.read_evals import EvalResult
|
17 |
|
|
|
122 |
return res
|
123 |
|
124 |
|
125 |
+
def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
|
126 |
batch_size = 1
|
127 |
try:
|
128 |
results = run_evaluation(
|
|
|
132 |
batch_size=batch_size,
|
133 |
device=DEVICE,
|
134 |
use_cache=None,
|
135 |
+
limit=limit,
|
136 |
)
|
137 |
except RuntimeError as e:
|
138 |
if "No executable batch size found" in str(e):
|
|
|
144 |
batch_size=batch_size,
|
145 |
device=DEVICE,
|
146 |
use_cache=None,
|
147 |
+
limit=limit,
|
148 |
)
|
149 |
else:
|
150 |
raise
|
|
|
395 |
def get_args():
|
396 |
parser = argparse.ArgumentParser(description="Run the backend")
|
397 |
parser.add_argument("--debug", action="store_true", help="Run in debug mode")
|
398 |
+
# debug parameters
|
399 |
+
parser.add_argument("--task", type=str, default="selfcheckgpt", help="Task to debug")
|
400 |
+
parser.add_argument("--model", type=str, default="facebook/opt-1.3b", help="Model to debug")
|
401 |
+
parser.add_argument("--precision", type=str, default="float16", help="Precision to debug")
|
402 |
+
parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
|
403 |
+
parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
|
404 |
return parser.parse_args()
|
405 |
|
406 |
|
|
|
409 |
local_debug = args.debug
|
410 |
# debug specific task by ping
|
411 |
if local_debug:
|
412 |
+
debug_model_names = [args.model] # Use model from arguments
|
413 |
+
debug_task_name = args.task # Use task from arguments
|
|
|
|
|
|
|
414 |
task_lst = TASKS_HARNESS.copy()
|
415 |
for task in task_lst:
|
416 |
for debug_model_name in debug_model_names:
|
|
|
418 |
if task_name != debug_task_name:
|
419 |
continue
|
420 |
eval_request = EvalRequest(
|
421 |
+
model=debug_model_name,
|
422 |
+
private=False,
|
423 |
+
status="",
|
424 |
+
json_filepath="",
|
425 |
+
precision=args.precision, # Use precision from arguments
|
426 |
+
inference_framework=args.inference_framework # Use inference framework from arguments
|
427 |
)
|
428 |
+
results = process_evaluation(task, eval_request, limit=args.limit)
|
429 |
else:
|
430 |
while True:
|
431 |
res = False
|
src/backend/envs.py
CHANGED
@@ -64,5 +64,3 @@ EVAL_REQUESTS_PATH_BACKEND_SYNC = os.path.join(CACHE_PATH, "eval-queue-bk-sync")
|
|
64 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
65 |
|
66 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
67 |
-
|
68 |
-
LIMIT = None # Testing; needs to be None
|
|
|
64 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
65 |
|
66 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|