Update sync_data.sh
Browse files- sync_data.sh +18 -5
sync_data.sh
CHANGED
@@ -21,12 +21,23 @@ sync_data() {
|
|
21 |
|
22 |
tar -czf "/tmp/${backup_file}" data/
|
23 |
|
24 |
-
# 使用Python脚本上传到HuggingFace
|
25 |
python3 -c "
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
api = HfApi()
|
31 |
api.upload_file(
|
32 |
path_or_fileobj='/tmp/${backup_file}',
|
@@ -34,7 +45,9 @@ try:
|
|
34 |
repo_id='${DATASET_ID}',
|
35 |
repo_type='dataset'
|
36 |
)
|
37 |
-
print('Backup
|
|
|
|
|
38 |
except Exception as e:
|
39 |
print(f'Backup failed: {str(e)}')
|
40 |
"
|
|
|
21 |
|
22 |
tar -czf "/tmp/${backup_file}" data/
|
23 |
|
|
|
24 |
python3 -c "
|
25 |
+
from huggingface_hub import HfApi
|
26 |
+
import os
|
27 |
+
def manage_backups(api, repo_id, max_files=50):
|
28 |
+
files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
|
29 |
+
backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
|
30 |
+
backup_files.sort()
|
31 |
|
32 |
+
if len(backup_files) >= max_files:
|
33 |
+
files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
|
34 |
+
for file_to_delete in files_to_delete:
|
35 |
+
try:
|
36 |
+
api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
|
37 |
+
print(f'Deleted old backup: {file_to_delete}')
|
38 |
+
except Exception as e:
|
39 |
+
print(f'Error deleting {file_to_delete}: {str(e)}')
|
40 |
+
try:
|
41 |
api = HfApi()
|
42 |
api.upload_file(
|
43 |
path_or_fileobj='/tmp/${backup_file}',
|
|
|
45 |
repo_id='${DATASET_ID}',
|
46 |
repo_type='dataset'
|
47 |
)
|
48 |
+
print('Backup uploaded successfully')
|
49 |
+
|
50 |
+
manage_backups(api, '${DATASET_ID}')
|
51 |
except Exception as e:
|
52 |
print(f'Backup failed: {str(e)}')
|
53 |
"
|