Spaces:
Runtime error
Runtime error
daresearch
commited on
Create split_data.py
Browse files- split_data.py +12 -0
split_data.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.model_selection import train_test_split
|
3 |
+
|
4 |
+
# Load dataset from the HF dataset location
|
5 |
+
data = pd.read_csv("data/dataset.csv") # Ensure your dataset is in the `data` directory
|
6 |
+
|
7 |
+
# Split into train and validation sets
|
8 |
+
train, validation = train_test_split(data, test_size=0.3, random_state=42)
|
9 |
+
|
10 |
+
# Save to CSV files
|
11 |
+
train.to_csv("data/train.csv", index=False)
|
12 |
+
validation.to_csv("data/validation.csv", index=False)
|