Valeriy Sinyukov
commited on
Commit
·
ce2c261
1
Parent(s):
3ee2ccf
Fixedfile path to written dataset, add option to save to csv
Browse files
category_classification/datasets/datasets_common.py
CHANGED
@@ -51,7 +51,10 @@ def train_test_split(X, y, test_size=0.25):
|
|
51 |
return sk_train_test_split(X, y, test_size=test_size, stratify=y)
|
52 |
|
53 |
|
54 |
-
def write_dataset(dest_dir, X, y, filename):
|
55 |
dest_dir = Path(dest_dir)
|
56 |
df = pd.concat((X, y), axis=1)
|
57 |
-
|
|
|
|
|
|
|
|
51 |
return sk_train_test_split(X, y, test_size=test_size, stratify=y)
|
52 |
|
53 |
|
54 |
+
def write_dataset(dest_dir, X, y, filename, to_json : bool = True):
|
55 |
dest_dir = Path(dest_dir)
|
56 |
df = pd.concat((X, y), axis=1)
|
57 |
+
if to_json:
|
58 |
+
df.to_json(dest_dir / filename, orient="records", lines=True)
|
59 |
+
else:
|
60 |
+
df.to_csv(dest_dir / filename)
|