Spaces:
Running
Running
fix dataset card upload
Browse files- run_job.py +2 -2
run_job.py
CHANGED
@@ -38,7 +38,7 @@ def sql(src: str, dst: str, query: str, config: str = "default", split: str = "t
|
|
38 |
class CommitAndCleanScheduler(CommitScheduler):
|
39 |
|
40 |
def push_to_hub(self):
|
41 |
-
for path in self.folder_path.with_name("tmp").glob(
|
42 |
with path.open("rb") as f:
|
43 |
footer = f.read(4) and f.seek(-4, os.SEEK_END) and f.read(4)
|
44 |
if footer == b"PAR1":
|
@@ -47,7 +47,7 @@ def sql(src: str, dst: str, query: str, config: str = "default", split: str = "t
|
|
47 |
for path in self.last_uploaded:
|
48 |
path.unlink(missing_ok=True)
|
49 |
|
50 |
-
with nullcontext() if dry_run else CommitAndCleanScheduler(repo_id=dst, repo_type="dataset", folder_path="dst", path_in_repo="data",
|
51 |
con = duckdb.connect(":memory:", config=CONFIG)
|
52 |
src_kwargs = con.sql(CMD_SRC_KWARGS.format(src=src, config=config, split=split)).df().to_dict(orient="records")
|
53 |
if not src_kwargs:
|
|
|
38 |
class CommitAndCleanScheduler(CommitScheduler):
|
39 |
|
40 |
def push_to_hub(self):
|
41 |
+
for path in self.folder_path.with_name("tmp").glob("*.parquet"):
|
42 |
with path.open("rb") as f:
|
43 |
footer = f.read(4) and f.seek(-4, os.SEEK_END) and f.read(4)
|
44 |
if footer == b"PAR1":
|
|
|
47 |
for path in self.last_uploaded:
|
48 |
path.unlink(missing_ok=True)
|
49 |
|
50 |
+
with nullcontext() if dry_run else CommitAndCleanScheduler(repo_id=dst, repo_type="dataset", folder_path="dst", path_in_repo="data", every=0.1, private=private):
|
51 |
con = duckdb.connect(":memory:", config=CONFIG)
|
52 |
src_kwargs = con.sql(CMD_SRC_KWARGS.format(src=src, config=config, split=split)).df().to_dict(orient="records")
|
53 |
if not src_kwargs:
|