File size: 3,490 Bytes
1ffc326
 
 
 
 
d295ed3
8b85b8d
 
 
 
d295ed3
 
 
 
8b85b8d
1ffc326
d295ed3
8b85b8d
 
d295ed3
 
 
8b85b8d
d295ed3
8b85b8d
d295ed3
8b85b8d
 
d295ed3
8b88d2c
1ffc326
d295ed3
 
 
8b88d2c
 
 
1ffc326
 
d295ed3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ffc326
 
 
 
 
 
 
 
 
 
 
 
 
 
d295ed3
1ffc326
 
 
d295ed3
 
 
1ffc326
 
 
8b88d2c
1ffc326
 
 
 
 
8b88d2c
 
1ffc326
 
 
 
 
 
 
 
08ae6c5
d295ed3
6902167
d295ed3
6902167
19999b4
 
d295ed3
 
 
08ae6c5
1ffc326
d295ed3
 
1ffc326
d295ed3
 
 
 
 
 
 
 
1ffc326
d295ed3
 
 
95c19d6
1ffc326
 
d295ed3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import logging
import pprint

from huggingface_hub import snapshot_download

from src.backend.manage_requests import (
    FAILED_STATUS,
    FINISHED_STATUS,
    PENDING_STATUS,
    RUNNING_STATUS,
    check_completed_evals,
    get_eval_requests,
    set_eval_request,
)
from src.backend.run_eval_suite_lighteval import run_evaluation
from src.backend.sort_queue import sort_models_by_priority
from src.envs import (
    ACCELERATOR,
    API,
    EVAL_REQUESTS_PATH_BACKEND,
    EVAL_RESULTS_PATH_BACKEND,
    LIMIT,
    QUEUE_REPO,
    REGION,
    RESULTS_REPO,
    TASKS_LIGHTEVAL,
    TOKEN,
    VENDOR,
)
from src.logging import setup_logger


logging.getLogger("openai").setLevel(logging.WARNING)

logger = setup_logger(__name__)

# logging.basicConfig(level=logging.ERROR)
pp = pprint.PrettyPrinter(width=80)

snapshot_download(
    repo_id=RESULTS_REPO,
    revision="main",
    local_dir=EVAL_RESULTS_PATH_BACKEND,
    repo_type="dataset",
    max_workers=60,
    token=TOKEN,
)
snapshot_download(
    repo_id=QUEUE_REPO,
    revision="main",
    local_dir=EVAL_REQUESTS_PATH_BACKEND,
    repo_type="dataset",
    max_workers=60,
    token=TOKEN,
)


def run_auto_eval():
    current_pending_status = [PENDING_STATUS]

    # pull the eval dataset from the hub and parse any eval requests
    # check completed evals and set them to finished
    check_completed_evals(
        api=API,
        checked_status=RUNNING_STATUS,
        completed_status=FINISHED_STATUS,
        failed_status=FAILED_STATUS,
        hf_repo=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH_BACKEND,
        hf_repo_results=RESULTS_REPO,
        local_dir_results=EVAL_RESULTS_PATH_BACKEND,
    )

    # Get all eval request that are PENDING, if you want to run other evals, change this parameter
    eval_requests = get_eval_requests(
        job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND
    )
    # Sort the evals by priority (first submitted first run)
    eval_requests = sort_models_by_priority(api=API, models=eval_requests)

    logger.info(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")

    if len(eval_requests) == 0:
        return

    eval_request = eval_requests[0]
    logger.info(pp.pformat(eval_request))

    set_eval_request(
        api=API,
        eval_request=eval_request,
        set_to_status=RUNNING_STATUS,
        hf_repo=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH_BACKEND,
    )

    # This needs to be done
    # instance_size, instance_type = get_instance_for_model(eval_request)
    # For GPU
    # instance_size, instance_type = "small", "g4dn.xlarge"
    # For CPU
    # Updated naming available at https://huggingface.co/docs/inference-endpoints/pricing
    instance_size, instance_type = "x4", "intel-icl"
    logger.info(
        f"Starting Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}"
    )

    run_evaluation(
        eval_request=eval_request,
        task_names=TASKS_LIGHTEVAL,
        local_dir=EVAL_RESULTS_PATH_BACKEND,
        batch_size=1,
        accelerator=ACCELERATOR,
        region=REGION,
        vendor=VENDOR,
        instance_size=instance_size,
        instance_type=instance_type,
        limit=LIMIT,
    )

    logger.info(
        f"Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}"
    )


if __name__ == "__main__":
    run_auto_eval()