backup
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ FINAL_REPO = "taesiri/DatasetOfHardQuestions5"
|
|
26 |
# Download existing data from hub
|
27 |
def sync_with_hub():
|
28 |
"""
|
29 |
-
Synchronize local data with the hub by
|
30 |
"""
|
31 |
print("Starting sync with hub...")
|
32 |
data_dir = Path("./data")
|
@@ -37,37 +37,27 @@ def sync_with_hub():
|
|
37 |
shutil.rmtree(backup_dir)
|
38 |
shutil.copytree(data_dir, backup_dir)
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
if hub_data_dir.exists():
|
45 |
-
# If repo exists, do a git pull
|
46 |
-
print("Pulling latest changes...")
|
47 |
-
repo = git.Repo(hub_data_dir)
|
48 |
-
origin = repo.remotes.origin
|
49 |
-
origin.pull()
|
50 |
-
else:
|
51 |
-
# Clone the repo
|
52 |
-
print("Cloning repository...")
|
53 |
-
git.Repo.clone_from(repo_url, hub_data_dir)
|
54 |
|
55 |
# Merge hub data with local data
|
56 |
-
|
57 |
-
if
|
58 |
# Create data dir if it doesn't exist
|
59 |
data_dir.mkdir(exist_ok=True)
|
60 |
|
61 |
# Copy files from hub
|
62 |
-
for item in
|
63 |
if item.is_dir():
|
64 |
dest = data_dir / item.name
|
65 |
if not dest.exists(): # Only copy if doesn't exist locally
|
66 |
shutil.copytree(item, dest)
|
67 |
|
68 |
-
# Clean up
|
69 |
-
if
|
70 |
-
shutil.rmtree(
|
71 |
print("Finished syncing with hub!")
|
72 |
|
73 |
|
|
|
26 |
# Download existing data from hub
|
27 |
def sync_with_hub():
|
28 |
"""
|
29 |
+
Synchronize local data with the hub by downloading latest dataset
|
30 |
"""
|
31 |
print("Starting sync with hub...")
|
32 |
data_dir = Path("./data")
|
|
|
37 |
shutil.rmtree(backup_dir)
|
38 |
shutil.copytree(data_dir, backup_dir)
|
39 |
|
40 |
+
# Download latest data from hub
|
41 |
+
repo_path = snapshot_download(
|
42 |
+
repo_id="taesiri/zb_dataset_storage", repo_type="dataset", local_dir="hub_data"
|
43 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Merge hub data with local data
|
46 |
+
hub_data_dir = Path(repo_path) / "data"
|
47 |
+
if hub_data_dir.exists():
|
48 |
# Create data dir if it doesn't exist
|
49 |
data_dir.mkdir(exist_ok=True)
|
50 |
|
51 |
# Copy files from hub
|
52 |
+
for item in hub_data_dir.glob("*"):
|
53 |
if item.is_dir():
|
54 |
dest = data_dir / item.name
|
55 |
if not dest.exists(): # Only copy if doesn't exist locally
|
56 |
shutil.copytree(item, dest)
|
57 |
|
58 |
+
# Clean up downloaded repo
|
59 |
+
if Path("hub_data").exists():
|
60 |
+
shutil.rmtree("hub_data")
|
61 |
print("Finished syncing with hub!")
|
62 |
|
63 |
|