daresearch commited on
Commit
1e777ee
·
verified ·
1 Parent(s): c619634

Create split_data.py

Browse files
Files changed (1) hide show
  1. split_data.py +12 -0
split_data.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+
4
+ # Load dataset from the HF dataset location
5
+ data = pd.read_csv("data/dataset.csv") # Ensure your dataset is in the `data` directory
6
+
7
+ # Split into train and validation sets
8
+ train, validation = train_test_split(data, test_size=0.3, random_state=42)
9
+
10
+ # Save to CSV files
11
+ train.to_csv("data/train.csv", index=False)
12
+ validation.to_csv("data/validation.csv", index=False)