taesiri commited on
Commit
3b73501
·
1 Parent(s): 82512ef
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -26,7 +26,7 @@ FINAL_REPO = "taesiri/DatasetOfHardQuestions5"
26
  # Download existing data from hub
27
  def sync_with_hub():
28
  """
29
- Synchronize local data with the hub by cloning the dataset repo
30
  """
31
  print("Starting sync with hub...")
32
  data_dir = Path("./data")
@@ -37,37 +37,27 @@ def sync_with_hub():
37
  shutil.rmtree(backup_dir)
38
  shutil.copytree(data_dir, backup_dir)
39
 
40
- # Clone/pull latest data from hub
41
- repo_url = f"https://huggingface.co/datasets/{DATASET_REPO}"
42
- hub_data_dir = Path("hub_data")
43
-
44
- if hub_data_dir.exists():
45
- # If repo exists, do a git pull
46
- print("Pulling latest changes...")
47
- repo = git.Repo(hub_data_dir)
48
- origin = repo.remotes.origin
49
- origin.pull()
50
- else:
51
- # Clone the repo
52
- print("Cloning repository...")
53
- git.Repo.clone_from(repo_url, hub_data_dir)
54
 
55
  # Merge hub data with local data
56
- hub_data_source = hub_data_dir / "data"
57
- if hub_data_source.exists():
58
  # Create data dir if it doesn't exist
59
  data_dir.mkdir(exist_ok=True)
60
 
61
  # Copy files from hub
62
- for item in hub_data_source.glob("*"):
63
  if item.is_dir():
64
  dest = data_dir / item.name
65
  if not dest.exists(): # Only copy if doesn't exist locally
66
  shutil.copytree(item, dest)
67
 
68
- # Clean up cloned repo
69
- if hub_data_dir.exists():
70
- shutil.rmtree(hub_data_dir)
71
  print("Finished syncing with hub!")
72
 
73
 
 
26
  # Download existing data from hub
27
  def sync_with_hub():
28
  """
29
+ Synchronize local data with the hub by downloading latest dataset
30
  """
31
  print("Starting sync with hub...")
32
  data_dir = Path("./data")
 
37
  shutil.rmtree(backup_dir)
38
  shutil.copytree(data_dir, backup_dir)
39
 
40
+ # Download latest data from hub
41
+ repo_path = snapshot_download(
42
+ repo_id="taesiri/zb_dataset_storage", repo_type="dataset", local_dir="hub_data"
43
+ )
 
 
 
 
 
 
 
 
 
 
44
 
45
  # Merge hub data with local data
46
+ hub_data_dir = Path(repo_path) / "data"
47
+ if hub_data_dir.exists():
48
  # Create data dir if it doesn't exist
49
  data_dir.mkdir(exist_ok=True)
50
 
51
  # Copy files from hub
52
+ for item in hub_data_dir.glob("*"):
53
  if item.is_dir():
54
  dest = data_dir / item.name
55
  if not dest.exists(): # Only copy if doesn't exist locally
56
  shutil.copytree(item, dest)
57
 
58
+ # Clean up downloaded repo
59
+ if Path("hub_data").exists():
60
+ shutil.rmtree("hub_data")
61
  print("Finished syncing with hub!")
62
 
63