fisherman611's picture
Upload 3 files
9144bfc verified
raw
history blame contribute delete
953 Bytes
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('data/filtered_cleaned_dataset.csv')
# data = pd.read_csv('data/cleaned_dataset.csv')
# First, split off 10% of the data (for train + test)
train, test = train_test_split(data, test_size=0.1, random_state=42)
# Second, split off 10% of the train data (for train + validation)
train, val = train_test_split(train, test_size=0.1, random_state=42)
print("Train shape:", train.shape)
print("Test shape:", test.shape)
print("Validation shape:", val.shape)
train.to_csv("data/train_filtered_cleaned_dataset.csv", index=False)
test.to_csv("data/test_filtered_cleaned_dataset.csv", index=False)
val.to_csv("data/val_filtered_cleaned_dataset.csv", index=False)
# train.to_csv("data/train_cleaned_dataset.csv", index=False)
# test.to_csv("data/test_cleaned_dataset.csv", index=False)
# val.to_csv("data/val_cleaned_dataset.csv", index=False)