train_70b_4bit / split_data.py
daresearch's picture
Create split_data.py
1e777ee verified
raw
history blame
441 Bytes
import pandas as pd
from sklearn.model_selection import train_test_split
# Load dataset from the HF dataset location
data = pd.read_csv("data/dataset.csv") # Ensure your dataset is in the `data` directory
# Split into train and validation sets
train, validation = train_test_split(data, test_size=0.3, random_state=42)
# Save to CSV files
train.to_csv("data/train.csv", index=False)
validation.to_csv("data/validation.csv", index=False)