Script to download train and test datasets
Browse files
category_classification/datasets/en/download_train_test.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from kagglehub import dataset_download
|
| 7 |
+
|
| 8 |
+
dataset = "hibiscus4000/arxiv-dataset"
|
| 9 |
+
|
| 10 |
+
test_dataset = "arxiv_test.json"
|
| 11 |
+
train_dataset = "arxiv_train.json"
|
| 12 |
+
|
| 13 |
+
dataset_path = Path(dataset_download(dataset))
|
| 14 |
+
os.symlink(dataset_path / test_dataset, test_dataset)
|
| 15 |
+
os.symlink(dataset_path / train_dataset, train_dataset)
|