Script to download train and test datasets
Browse files
category_classification/datasets/en/download_train_test.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
|
3 |
+
import os
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
from kagglehub import dataset_download
|
7 |
+
|
8 |
+
dataset = "hibiscus4000/arxiv-dataset"
|
9 |
+
|
10 |
+
test_dataset = "arxiv_test.json"
|
11 |
+
train_dataset = "arxiv_train.json"
|
12 |
+
|
13 |
+
dataset_path = Path(dataset_download(dataset))
|
14 |
+
os.symlink(dataset_path / test_dataset, test_dataset)
|
15 |
+
os.symlink(dataset_path / train_dataset, train_dataset)
|