Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
cache-management
#960
by
alozowski
HF Staff
- opened
app.py
CHANGED
|
@@ -60,8 +60,14 @@ NEW_DATA_ON_LEADERBOARD = True
|
|
| 60 |
LEADERBOARD_DF = None
|
| 61 |
|
| 62 |
def restart_space():
|
|
|
|
| 63 |
try:
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
| 66 |
logging.info("Space restarted successfully.")
|
| 67 |
except Exception as e:
|
|
@@ -107,23 +113,31 @@ def get_latest_data_leaderboard(leaderboard_initial_df=None):
|
|
| 107 |
global NEW_DATA_ON_LEADERBOARD
|
| 108 |
global LEADERBOARD_DF
|
| 109 |
if NEW_DATA_ON_LEADERBOARD:
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
NEW_DATA_ON_LEADERBOARD = False
|
| 125 |
else:
|
| 126 |
LEADERBOARD_DF = leaderboard_initial_df
|
|
|
|
| 127 |
return LEADERBOARD_DF
|
| 128 |
|
| 129 |
|
|
@@ -450,25 +464,16 @@ webhooks_server = enable_space_ci_and_return_server(ui=main_block)
|
|
| 450 |
# Add webhooks
|
| 451 |
@webhooks_server.add_webhook
|
| 452 |
def update_leaderboard(payload: WebhookPayload) -> None:
|
| 453 |
-
"""Redownloads the leaderboard dataset each time it updates
|
| 454 |
if payload.repo.type == "dataset" and payload.event.action == "update":
|
| 455 |
global NEW_DATA_ON_LEADERBOARD
|
| 456 |
-
if NEW_DATA_ON_LEADERBOARD:
|
| 457 |
-
logging.info("Leaderboard data is already marked for update, skipping...")
|
| 458 |
-
return
|
| 459 |
logging.info("New data detected, downloading updated leaderboard dataset.")
|
|
|
|
|
|
|
| 460 |
NEW_DATA_ON_LEADERBOARD = True
|
| 461 |
|
| 462 |
-
#
|
| 463 |
-
|
| 464 |
-
AGGREGATED_REPO,
|
| 465 |
-
"default",
|
| 466 |
-
split="train",
|
| 467 |
-
cache_dir=HF_HOME,
|
| 468 |
-
download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
|
| 469 |
-
verification_mode="no_checks"
|
| 470 |
-
)
|
| 471 |
-
logging.info("Leaderboard dataset successfully downloaded.")
|
| 472 |
|
| 473 |
# The below code is not used at the moment, as we can manage the queue file locally
|
| 474 |
LAST_UPDATE_QUEUE = datetime.datetime.now()
|
|
|
|
| 60 |
LEADERBOARD_DF = None
|
| 61 |
|
| 62 |
def restart_space():
|
| 63 |
+
logging.info(f"Restarting space with repo ID: {REPO_ID}")
|
| 64 |
try:
|
| 65 |
+
# Check if new data is pending and download if necessary
|
| 66 |
+
if NEW_DATA_ON_LEADERBOARD:
|
| 67 |
+
logging.info("Fetching latest leaderboard data before restart.")
|
| 68 |
+
get_latest_data_leaderboard()
|
| 69 |
+
|
| 70 |
+
# Now restart the space
|
| 71 |
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
| 72 |
logging.info("Space restarted successfully.")
|
| 73 |
except Exception as e:
|
|
|
|
| 113 |
global NEW_DATA_ON_LEADERBOARD
|
| 114 |
global LEADERBOARD_DF
|
| 115 |
if NEW_DATA_ON_LEADERBOARD:
|
| 116 |
+
logging.info("Leaderboard updated at reload!")
|
| 117 |
+
try:
|
| 118 |
+
leaderboard_dataset = datasets.load_dataset(
|
| 119 |
+
AGGREGATED_REPO,
|
| 120 |
+
"default",
|
| 121 |
+
split="train",
|
| 122 |
+
cache_dir=HF_HOME,
|
| 123 |
+
download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
|
| 124 |
+
verification_mode="no_checks"
|
| 125 |
+
)
|
| 126 |
+
LEADERBOARD_DF = get_leaderboard_df(
|
| 127 |
+
leaderboard_dataset=leaderboard_dataset,
|
| 128 |
+
cols=COLS,
|
| 129 |
+
benchmark_cols=BENCHMARK_COLS,
|
| 130 |
+
)
|
| 131 |
+
logging.info("Leaderboard dataset successfully downloaded.")
|
| 132 |
+
except Exception as e:
|
| 133 |
+
logging.error(f"Failed to download leaderboard dataset: {e}")
|
| 134 |
+
return
|
| 135 |
+
|
| 136 |
+
# Reset the flag after successful download
|
| 137 |
NEW_DATA_ON_LEADERBOARD = False
|
| 138 |
else:
|
| 139 |
LEADERBOARD_DF = leaderboard_initial_df
|
| 140 |
+
logging.info("Using cached leaderboard dataset.")
|
| 141 |
return LEADERBOARD_DF
|
| 142 |
|
| 143 |
|
|
|
|
| 464 |
# Add webhooks
|
| 465 |
@webhooks_server.add_webhook
|
| 466 |
def update_leaderboard(payload: WebhookPayload) -> None:
|
| 467 |
+
"""Redownloads the leaderboard dataset each time it updates"""
|
| 468 |
if payload.repo.type == "dataset" and payload.event.action == "update":
|
| 469 |
global NEW_DATA_ON_LEADERBOARD
|
|
|
|
|
|
|
|
|
|
| 470 |
logging.info("New data detected, downloading updated leaderboard dataset.")
|
| 471 |
+
|
| 472 |
+
# Mark the flag for new data
|
| 473 |
NEW_DATA_ON_LEADERBOARD = True
|
| 474 |
|
| 475 |
+
# Now actually download the latest data immediately
|
| 476 |
+
get_latest_data_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
# The below code is not used at the moment, as we can manage the queue file locally
|
| 479 |
LAST_UPDATE_QUEUE = datetime.datetime.now()
|