|
|
import pandas as pd
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
data = pd.read_csv('data/filtered_cleaned_dataset.csv')
|
|
|
|
|
|
|
|
|
|
|
|
train, test = train_test_split(data, test_size=0.1, random_state=42)
|
|
|
|
|
|
|
|
|
train, val = train_test_split(train, test_size=0.1, random_state=42)
|
|
|
|
|
|
print("Train shape:", train.shape)
|
|
|
print("Test shape:", test.shape)
|
|
|
print("Validation shape:", val.shape)
|
|
|
|
|
|
train.to_csv("data/train_filtered_cleaned_dataset.csv", index=False)
|
|
|
test.to_csv("data/test_filtered_cleaned_dataset.csv", index=False)
|
|
|
val.to_csv("data/val_filtered_cleaned_dataset.csv", index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|