Migrate benchmark from https://github.com/kitamoto-lab/benchmarks/

Browse files

Files changed (9) hide show

Dockerfile +19 -0
FrameDatamodule.py +110 -0
README.md +38 -0
config.py +28 -0
createdataset.py +178 -0
lightning_resnetReg.py +149 -0
loading.py +43 -0
split_testing.py +168 -0
train_split.py +138 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM ubuntu
+WORKDIR /app
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y git && \
+    apt-get install -y libopenmpi-dev && \
+    apt-get install -y python3-pip && \
+    git clone https://github.com/kitamoto-lab/pyphoon2.git && \
+    cd pyphoon2 &&  \
+    pip3 install . && \
+    pip3 install tqdm && \
+    pip3 install scikit-learn && \
+    pip3 install matplotlib && \
+    pip3 install seaborn && \
+    pip3 install lightning && \
+    pip3 install tensorboardX

FrameDatamodule.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import torch
+from torch import nn
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+from pathlib import Path
+import numpy as np
+from pyphoon2.DigitalTyphoonDataset import DigitalTyphoonDataset
+class TyphoonDataModule(pl.LightningDataModule):
+    def __init__(
+        self,
+        dataroot,
+        batch_size,
+        num_workers,
+        labels = 'grade',
+        split_by="sequence",
+        load_data=False,
+        dataset_split=(0.8, 0.1, 0.1),
+        standardize_range=(150, 350),
+        downsample_size=(224, 224),
+        corruption_ceiling_pct=100,
+    ):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        data_path = Path(dataroot)
+        self.images_path = str(data_path / "image") + "/"
+        self.track_path = str(data_path / "track") + "/"
+        self.metadata_path = str(data_path / "metadata.json")
+        self.load_data = load_data
+        self.split_by = split_by
+        self.labels = labels
+        self.dataset_split = dataset_split
+        self.standardize_range = standardize_range
+        self.downsample_size = downsample_size
+        self.corruption_ceiling_pct = corruption_ceiling_pct
+    def setup(self, stage):
+        # Load Dataset
+        dataset = DigitalTyphoonDataset(
+            str(self.images_path),
+            str(self.track_path),
+            str(self.metadata_path),
+            self.labels,
+            load_data_into_memory=self.load_data,
+            filter_func=self.image_filter,
+            transform_func=self.transform_func,
+            spectrum="Infrared",
+            verbose=False,
+        )
+        self.train_set, self.val_set, _ = dataset.random_split(
+            self.dataset_split, split_by=self.split_by
+        )
+    def train_dataloader(self):
+        return DataLoader(
+            self.train_set,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.val_set,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+    def image_filter(self, image):
+        return (
+            (image.grade() < 6)
+            and (image.grade() > 2)
+            and (image.interpolated() == False)
+            and (image.year() != 2023)
+            and (100.0 <= image.long() <= 180.0)
+        )  # and (image.mask_1_percent() <  self.corruption_ceiling_pct))
+    def transform_func(self, image_ray):
+        image_ray = np.clip(
+            image_ray, self.standardize_range[0], self.standardize_range[1]
+        )
+        image_ray = (image_ray - self.standardize_range[0]) / (
+            self.standardize_range[1] - self.standardize_range[0]
+        )
+        if self.downsample_size != (512, 512):
+            image_ray = torch.Tensor(image_ray)
+            image_ray = torch.reshape(
+                image_ray, [1, 1, image_ray.size()[0], image_ray.size()[1]]
+            )
+            image_ray = nn.functional.interpolate(
+                image_ray,
+                size=self.downsample_size,
+                mode="bilinear",
+                align_corners=False,
+            )
+            image_ray = torch.reshape(
+                image_ray, [image_ray.size()[2], image_ray.size()[3]]
+            )
+            image_ray = image_ray.numpy()
+        return image_ray

README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+## Instructions to run
+#### Docker
+All of the below commands should be run in a Docker container built using the Dockerfile in the repo, with the data and repo being exposed as volumes in the container.
+To build:
+```docker build  -t benchmarks_img .```
+To run an interactive shell:
+```docker run -it --shm-size=2G --gpus all -v /path/to/neurips2023-benchmarks:/neurips2023-benchmarks -v /path/to/datasets/:/data benchmarks_img```
+### Reanalysis Task
+Every command should be run in the reanalysis folder. The path to this folder and to the data should be provided in the config.py file.
+#### Create buckets
+First, you have to split and save the dataset into 3 buckets according to the type of splitting refered in the config.py file ('standard' for standard splitting between before 2005 / between 2005 and 2015 / after 2015, 'same_size' for the same splitting but with a equal number of sequences per bucket).
+```
+python3 createdataset.py
+```
+This will create a folder (named 'save' or 'save_same') with 6 .txt file containing the id of the sequences used for training and testing in each bucket.
+#### Train
+You can now train for a number of runs (called version in the logs) and epochs specified in the config.py file.
+```
+python3 train_split.py
+```
+A tensorboard log while be created for each run with each bucket in the tb_logs.
+#### Test
+After specifing a list of versions in the config.py file, you'll be able to test the model.
+```
+python3 split_testing.py
+```
+The accuracy (RMSE in hPa) will be displayed on the terminal but also written in a log.txt file in the directory ```reanalysis```.

config.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+# Training Hyperparameters
+LEARNING_RATE     = 0.0001
+BATCH_SIZE        = 16
+NUM_WORKERS       = 16
+MAX_EPOCHS        = 101
+NB_RUNS           = 5
+TESTING_VERSION   = (0,1,2,3,4)
+# DATASET
+WEIGHTS           = None
+LABELS            = 'pressure'
+SPLIT_BY          = 'sequence'
+LOAD_DATA         = 'all_data'
+DATASET_SPLIT     = (0.8, 0.1, 0.1)
+STANDARDIZE_RANGE = (170, 350)
+DOWNSAMPLE_SIZE   = (224, 224)
+NUM_CLASSES       = 1
+TYPE_SAVE         = 'standard' #'standard' or 'same_size'
+# Computation
+ACCELERATOR       = 'gpu' if torch.cuda.is_available() else 'cpu'
+DEVICE            = [0]
+DATA_DIR          = '/app/datasets/wnp/'
+LOG_DIR           = "/app/pyphoon2/reanalysis/tb_logs"

createdataset.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import config
+import torch
+from torch import nn
+from pathlib import Path
+import numpy as np
+from pyphoon2.DigitalTyphoonDataset import DigitalTyphoonDataset
+import random
+import os
+dataroot = config.DATA_DIR
+batch_size=config.BATCH_SIZE
+num_workers=config.NUM_WORKERS
+split_by=config.SPLIT_BY
+load_data=config.LOAD_DATA
+dataset_split=config.DATASET_SPLIT
+standardize_range=config.STANDARDIZE_RANGE
+downsample_size=config.DOWNSAMPLE_SIZE
+type_save=config.TYPE_SAVE
+data_path = Path(dataroot)
+images_path = str(data_path / "image") + "/"
+track_path = str(data_path / "track") + "/"
+metadata_path = str(data_path / "metadata.json")
+def image_filter(image):
+    return (
+        (image.grade() < 7)
+        and (image.year() != 2023)
+        and (100.0 <= image.long() <= 180.0)
+    )  # and (image.mask_1_percent() <  self.corruption_ceiling_pct))
+def transform_func(image_ray):
+    image_ray = np.clip(
+        image_ray,standardize_range[0],standardize_range[1]
+    )
+    image_ray = (image_ray - standardize_range[0]) / (
+        standardize_range[1] - standardize_range[0]
+    )
+    if downsample_size != (512, 512):
+        image_ray = torch.Tensor(image_ray)
+        image_ray = torch.reshape(
+            image_ray, [1, 1, image_ray.size()[0], image_ray.size()[1]]
+        )
+        image_ray = nn.functional.interpolate(
+            image_ray,
+            size=downsample_size,
+            mode="bilinear",
+            align_corners=False,
+        )
+        image_ray = torch.reshape(
+            image_ray, [image_ray.size()[2], image_ray.size()[3]]
+        )
+        image_ray = image_ray.numpy()
+    return image_ray
+dataset = DigitalTyphoonDataset(
+            str(images_path),
+            str(track_path),
+            str(metadata_path),
+            "pressure",
+            load_data_into_memory='all_data',
+            filter_func=image_filter,
+            transform_func=transform_func,
+            spectrum="Infrared",
+            verbose=False,
+        )
+years = dataset.get_years()
+old=[]
+recent=[]
+now=[]
+#splitting years in 3 buckets
+for i in years :
+    if i < 2005 :
+        old.append(i)
+    else :
+        if i < 2015:
+            recent.append(i)
+        else :
+            now.append(i)
+old_data=[]
+recent_data=[]
+now_data=[]
+#getting the ids from years
+for year in old :
+    old_data.extend(dataset.get_seq_ids_from_year(year))
+for year in recent :
+    recent_data.extend(dataset.get_seq_ids_from_year(year))
+for year in now :
+    now_data.extend(dataset.get_seq_ids_from_year(year))
+old_train , old_val = [],[]
+recent_train , recent_val = [],[]
+now_train , now_val = [],[]
+#shuffling and splitting 80/20
+random.shuffle(old_data)
+random.shuffle(now_data)
+random.shuffle(recent_data)
+l=len(old_data)
+for i in range(l):
+    if i<l*0.8:
+        old_train.append(old_data[i])
+    else:
+        old_val.append(old_data[i])
+l=len(recent_data)
+for i in range(l):
+    if i<l*0.8:
+        recent_train.append(recent_data[i])
+    else:
+        recent_val.append(recent_data[i])
+l=len(now_data)
+for i in range(l):
+    if i<l*0.8:
+        now_train.append(now_data[i])
+    else:
+        now_val.append(now_data[i])
+#writting in file depending on which format
+if(type_save=="standard"):
+    if not(os.path.exists('./save')): os.mkdir('./save')
+    with open('save/old_train.txt','w+') as file:
+        for id in old_train:
+            file.write(id+"\n")
+    with open('save/old_val.txt','w+') as file:
+        for id in old_val :
+            file.write(id+"\n")
+    with open('save/recent_train.txt','w+') as file:
+        for id in recent_train:
+            file.write(id+"\n")
+    with open('save/recent_val.txt','w+') as file:
+        for id in recent_val:
+            file.write(id+"\n")
+    with open('save/now_train.txt','w+') as file:
+        for id in now_train:
+            file.write(id+"\n")
+    with open('save/now_val.txt','w+') as file:
+        for id in now_val:
+            file.write(id+"\n")
+if(type_save=="same_size"):
+    if not(os.path.exists('./save_same')): os.mkdir('./save_same')
+    with(
+        open('save_same/old_train.txt','w+') as train1,
+        open('save_same/old_val.txt','w+') as test1,
+        open('save_same/recent_train.txt','w+') as train2,
+        open('save_same/recent_val.txt','w+') as test2,
+        open('save_same/now_train.txt','w+') as train3,
+        open('save_same/now_val.txt','w+') as test3,
+    ):
+        for i in range(min(len(old_train),len(recent_train),len(now_train))):
+            train1.write(old_train[i]+'\n')
+            train2.write(recent_train[i]+'\n')
+            train3.write(now_train[i]+'\n')
+        for i in range(min(len(old_val),len(recent_val),len(now_val))):
+            test1.write(old_val[i]+'\n')
+            test2.write(recent_val[i]+'\n')
+            test3.write(now_val[i]+'\n')
+print("Saving Done !")

lightning_resnetReg.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import torch.nn as nn
+import torch
+import torch.optim as optim
+from torchvision.models import resnet18
+import pytorch_lightning as pl
+from torchmetrics import MeanSquaredError
+class LightningResnetReg(pl.LightningModule):
+    def __init__(self, learning_rate, weights, num_classes):
+        super().__init__()
+        self.save_hyperparameters()
+        self.model = resnet18(num_classes=1, weights=weights)
+        self.model.conv1 = nn.Conv2d(
+            1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
+        )
+        self.model.fc = nn.Linear(in_features=512, out_features=1, bias=True)
+        self.learning_rate = learning_rate
+        self.loss_fn = nn.MSELoss()
+        self.accuracy = MeanSquaredError(squared = False)
+        self.compt = 1
+        self.predicted_labels = []
+        self.truth_labels = []
+    def forward(self, images):
+        images = torch.Tensor(images).float()
+        images = torch.reshape(
+            images, [images.size()[0], 1, images.size()[1], images.size()[2]]
+        )
+        output = self.model(images)
+        return output
+    def training_step(self, batch, batch_idx):
+        loss, outputs, labels = self._common_step(batch)
+        accuracy = self.accuracy(outputs, labels)
+        self.log_dict({
+            "train_loss": loss,
+            "train_RMSE": accuracy
+            },
+            on_step=False,
+            on_epoch=True,
+            sync_dist=True,
+        )
+        return loss
+    def validation_step(self, batch, batch_idx):
+        loss, outputs, labels = self._common_step(batch)
+        self.log("validation_loss", loss,
+            on_step=False, on_epoch=True, sync_dist=True)
+        self.predicted_labels.append(outputs)
+        self.truth_labels.append(labels.float())
+        return loss
+    def test_step(self, batch, batch_idx):
+        loss, outputs, labels = self._common_step(batch)
+        self.log("test_loss", loss,
+            on_step=False, on_epoch=True, sync_dist=True)
+        self.predicted_labels.append(outputs)
+        self.truth_labels.append(labels.float())
+        return loss
+    def _common_step(self, batch):
+        images, labels = batch
+        labels = labels - 2
+        labels = torch.reshape(labels, [labels.size()[0],1])
+        outputs = self.forward(images)
+        loss = self.loss_fn(outputs, labels.float())
+        return loss, outputs, labels
+    def predict_step(self, batch):
+        images, labels = batch
+        labels = labels - 2
+        labels = torch.reshape(labels, [labels.size()[0],1])
+        outputs = self.forward(images)
+        preds = outputs
+        return preds
+    def configure_optimizers(self):
+        return optim.SGD(self.parameters(), lr=self.learning_rate)
+    def on_validation_epoch_end(self):
+        tensorboard = self.logger.experiment
+        all_preds = torch.concat(self.predicted_labels)
+        all_truths = torch.concat(self.truth_labels)
+        all_couple = torch.cat((all_truths, all_preds), dim=1)
+        wind_values = torch.unique(all_truths)
+        pred_means = []
+        pred_std = []
+        pred_n = []
+        for value in wind_values:
+            # find all the couple (truth, preds) where truth == value and compute the mean of all the prediction for this value
+            m = torch.mean((all_couple[torch.where(all_couple[:,0] == value)][:,1].float()))
+            std = torch.std((all_couple[torch.where(all_couple[:,0] == value)][:,1].float()))
+            n = len(all_couple[torch.where(all_couple[:,0] == value)][:,1].float())
+            pred_means.append(m)
+            pred_std.append(std)
+            pred_n.append(n)
+        # Log regression line graph every 5 epochs
+        if(self.current_epoch %5 == 0 ):
+            for i in range(len(wind_values)):
+                tensorboard.add_scalars(f"epoch_{self.current_epoch}",{'pred_mean':pred_means[i],'truth':wind_values[i]},wind_values[i])
+                tensorboard.add_scalars(f"epoch_{self.current_epoch}_stats",{'pred_std':pred_std[i],'pred_n':pred_n[i]},wind_values[i])
+        self.log("validation_RMSE", self.accuracy(all_preds,all_truths),
+            on_step=False, on_epoch=True, sync_dist=True)
+        self.predicted_labels.clear()  # free memory
+        self.truth_labels.clear()
+    def on_test_epoch_end(self):
+        tensorboard= self.logger.experiment
+        all_preds = torch.concat(self.predicted_labels)
+        all_truths = torch.concat(self.truth_labels)
+        all_couple = torch.cat((all_truths, all_preds), dim=1)
+        self.logger.experiment.add_embedding(all_couple, tag="couple_label_pred_ep" + str(self.compt) + ".tsv")
+        unique_values = torch.unique(all_truths)
+        pred_means = []
+        pred_std = []
+        pred_n = []
+        for value in unique_values:
+            # find all the couple (truth, preds) where truth == value and compute the mean of all the prediction for this value
+            m = torch.mean((all_couple[torch.where(all_couple[:,0] == value)][:,1].float()))
+            std = torch.std((all_couple[torch.where(all_couple[:,0] == value)][:,1].float()))
+            n = len(all_couple[torch.where(all_couple[:,0] == value)][:,1].float())
+            pred_means.append(m)
+            pred_std.append(std)
+            pred_n.append(n)
+        # Log regression line graph every 5 epochs
+        if(self.current_epoch %5 == 0 ):
+            for i in range(len(unique_values)):
+                tensorboard.add_scalars(f"test_{self.compt}",{'pred_mean':pred_means[i],'truth':unique_values[i]},unique_values[i])
+                tensorboard.add_scalars(f"test_{self.compt}_stats",{'pred_std':pred_std[i],'pred_n':pred_n[i]},unique_values[i])
+        Accuracy = self.accuracy(all_preds,all_truths)
+        self.log(f"test_{self.compt}_RMSE", Accuracy,
+            on_step=False, on_epoch=True, sync_dist=True)
+        with open("log.txt","a+") as file:
+            file.write(f"test_{self.compt}_RMSE : {Accuracy} \n")
+        self.predicted_labels.clear()  # free memory
+        self.truth_labels.clear()
+        self.compt +=1

loading.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from torch.utils.data import DataLoader
+def load(type,dataset,batch_size,num_workers,type_save='standard'):
+    train, test = [],[]
+    if (type_save=='standard') :
+        file_dir = 'save/'
+    if (type_save=='same_size') :
+        file_dir = 'save_same/'
+    if type==0 :
+        with open(file_dir + 'old_train.txt','r') as file:
+            train_id=[line for line in file]
+        with open(file_dir + 'old_val.txt','r') as file:
+            test_id =[line for line in file]
+    if type==1 :
+        with open(file_dir + 'recent_train.txt','r') as file:
+            train_id=[line for line in file]
+        with open(file_dir + 'recent_val.txt','r') as file:
+            test_id =[line for line in file]
+    if type==2 :
+        with open(file_dir + 'now_train.txt','r') as file:
+            train_id=[line for line in file]
+        with open(file_dir + 'now_val.txt','r') as file:
+            test_id =[line for line in file]
+    if type==3 :
+        with open(file_dir + 'now_train.txt','r') as file:
+            train_id1=[line for line in file]
+        with open(file_dir + 'now_val.txt','r') as file:
+            test_id1 =[line for line in file]
+        with open(file_dir + 'recent_train.txt','r') as file:
+            train_id2=[line for line in file]
+        with open(file_dir + 'recent_val.txt','r') as file:
+            test_id2 =[line for line in file]
+        train_id = train_id1 +train_id2
+        test_id = test_id1+ test_id2
+    train_id = [x.replace('\n', '') for x in train_id]
+    test_id = [x.replace('\n','') for x in test_id]
+    train = DataLoader(dataset.images_from_sequences(train_id),batch_size= batch_size,num_workers=num_workers,shuffle=True)
+    test = DataLoader(dataset.images_from_sequences(test_id),batch_size= batch_size,num_workers=num_workers,shuffle=False)
+    return train, test

split_testing.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import pytorch_lightning as pl
+from pytorch_lightning.loggers import TensorBoardLogger
+from lightning_resnetReg import LightningResnetReg
+import config
+import loading
+import torch
+from torch import nn
+import os
+from pathlib import Path
+import numpy as np
+from pyphoon2.DigitalTyphoonDataset import DigitalTyphoonDataset
+def main():
+    logger_old = TensorBoardLogger("tb_logs", name="resnet_test_old_same")
+    logger_recent = TensorBoardLogger("tb_logs", name="resnet_test_recent_same")
+    logger_now = TensorBoardLogger("tb_logs", name="resnet_test_now_same")
+    # Set up data
+    data_root = config.DATA_DIR
+    batch_size=config.BATCH_SIZE
+    num_workers=config.NUM_WORKERS
+    standardize_range=config.STANDARDIZE_RANGE
+    downsample_size=config.DOWNSAMPLE_SIZE
+    type_save = config.TYPE_SAVE
+    versions = config.TESTING_VERSION
+    data_path = Path(data_root)
+    images_path = str(data_path / "image") + "/"
+    track_path = str(data_path / "track") + "/"
+    metadata_path = str(data_path / "metadata.json")
+    def image_filter(image):
+        return (
+            (image.grade() < 7)
+            and (image.year() != 2023)
+            and (100.0 <= image.long() <= 180.0)
+        )  # and (image.mask_1_percent() <  self.corruption_ceiling_pct))
+    def transform_func(image_ray):
+        image_ray = np.clip(
+            image_ray,standardize_range[0],standardize_range[1]
+        )
+        image_ray = (image_ray - standardize_range[0]) / (
+            standardize_range[1] - standardize_range[0]
+        )
+        if downsample_size != (512, 512):
+            image_ray = torch.Tensor(image_ray)
+            image_ray = torch.reshape(
+                image_ray, [1, 1, image_ray.size()[0], image_ray.size()[1]]
+            )
+            image_ray = nn.functional.interpolate(
+                image_ray,
+                size=downsample_size,
+                mode="bilinear",
+                align_corners=False,
+            )
+            image_ray = torch.reshape(
+                image_ray, [image_ray.size()[2], image_ray.size()[3]]
+            )
+            image_ray = image_ray.numpy()
+        return image_ray
+    dataset = DigitalTyphoonDataset(
+                str(images_path),
+                str(track_path),
+                str(metadata_path),
+                "pressure",
+                load_data_into_memory='all_data',
+                filter_func=image_filter,
+                transform_func=transform_func,
+                spectrum="Infrared",
+                verbose=False,
+            )
+    _,test_old = loading.load(0,dataset,batch_size,num_workers,type_save)
+    _,test_recent = loading.load(1,dataset,batch_size,num_workers,type_save)
+    _,test_now = loading.load(2,dataset,batch_size,num_workers,type_save)
+    # Test
+    trainer_old = pl.Trainer(
+        logger=logger_old,
+        accelerator=config.ACCELERATOR,
+        devices=config.DEVICE,
+        max_epochs=config.MAX_EPOCHS,
+        default_root_dir=config.LOG_DIR,
+    )
+    trainer_recent = pl.Trainer(
+        logger=logger_recent,
+        accelerator=config.ACCELERATOR,
+        devices=config.DEVICE,
+        max_epochs=config.MAX_EPOCHS,
+        default_root_dir=config.LOG_DIR,
+    )
+    trainer_now = pl.Trainer(
+        logger=logger_now,
+        accelerator=config.ACCELERATOR,
+        devices=config.DEVICE,
+        max_epochs=config.MAX_EPOCHS,
+        default_root_dir=config.LOG_DIR,
+    )
+    version_dir_old = 'tb_logs/resnet_train_old'
+    version_dir_recent = 'tb_logs/resnet_train_recent'
+    version_dir_now = 'tb_logs/resnet_train_now'
+    if type_save == 'same_size':
+        version_dir_old += '_same'
+        version_dir_recent += '_same'
+        version_dir_now += '_same'
+    with open("log.txt","a+") as file :
+        file.write("\n------------------------------------------------------------ \n")
+    for i in versions:
+        with open("log.txt","a+") as file :
+            file.write(f"\nVersion : {i} \n")
+        version_path = f'/version_{i}/checkpoints/'
+        _,_,filename_old = next(os.walk(version_dir_old + version_path))
+        _,_,filename_recent = next(os.walk(version_dir_recent + version_path))
+        _,_,filename_now = next(os.walk(version_dir_now+ version_path))
+        model_old = LightningResnetReg.load_from_checkpoint(version_dir_old + version_path + filename_old[0])
+        model_recent = LightningResnetReg.load_from_checkpoint(version_dir_recent + version_path + filename_recent[0])
+        model_now = LightningResnetReg.load_from_checkpoint(version_dir_now + version_path + filename_now[0])
+        print("Testing <2005")
+        with open("log.txt","a+") as file :
+            file.write("Testing <2005 \n")
+        print("         on <2005 : ")
+        trainer_old.test(model_old, test_old)
+        print("         on >2005 : ")
+        trainer_old.test(model_old, test_recent)
+        print("         on >2015 : ")
+        trainer_old.test(model_old, test_now)
+        print("Testing >2005")
+        with open("log.txt","a+") as file :
+            file.write("Testing >2005\n")
+        print("         on <2005 : ")
+        trainer_recent.test(model_recent, test_old)
+        print("         on >2005 : ")
+        trainer_recent.test(model_recent, test_recent)
+        print("         on >2015 : ")
+        trainer_recent.test(model_recent, test_now)
+        print("Testing >2015")
+        with open("log.txt","a+") as file :
+            file.write("Testing >2015\n")
+        print("         on <2005 : ")
+        trainer_now.test(model_now, test_old)
+        print("         on >2005 : ")
+        trainer_now.test(model_now, test_recent)
+        print("         on >2015 : ")
+        trainer_now.test(model_now, test_now)
+        print(f"Run {i} done")
+if __name__ == "__main__":
+    main()

train_split.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import pytorch_lightning as pl
+from pytorch_lightning.loggers import TensorBoardLogger
+from lightning_resnetReg import LightningResnetReg
+import config
+import loading
+import torch
+from torch import nn
+from pathlib import Path
+import numpy as np
+from DigitalTyphoonDataloader.DigitalTyphoonDataset import DigitalTyphoonDataset
+def main():
+    logger_old = TensorBoardLogger("tb_logs", name="resnet_train_old_same")
+    logger_recent = TensorBoardLogger("tb_logs", name="resnet_train_recent_same")
+    logger_now = TensorBoardLogger("tb_logs", name="resnet_train_now_same")
+    # Set up data
+    batch_size=config.BATCH_SIZE
+    num_workers=config.NUM_WORKERS
+    standardize_range=config.STANDARDIZE_RANGE
+    downsample_size=config.DOWNSAMPLE_SIZE
+    type_save = config.TYPE_SAVE
+    nb_runs = config.NB_RUNS
+    data_path = Path("/app/datasets/wnp/")
+    images_path = str(data_path / "image") + "/"
+    track_path = str(data_path / "track") + "/"
+    metadata_path = str(data_path / "metadata.json")
+    def image_filter(image):
+        return (
+            (image.grade() < 7)
+            and (image.year() != 2023)
+            and (100.0 <= image.long() <= 180.0)
+        )  # and (image.mask_1_percent() <  self.corruption_ceiling_pct))
+    def transform_func(image_ray):
+        image_ray = np.clip(
+            image_ray,standardize_range[0],standardize_range[1]
+        )
+        image_ray = (image_ray - standardize_range[0]) / (
+            standardize_range[1] - standardize_range[0]
+        )
+        if downsample_size != (512, 512):
+            image_ray = torch.Tensor(image_ray)
+            image_ray = torch.reshape(
+                image_ray, [1, 1, image_ray.size()[0], image_ray.size()[1]]
+            )
+            image_ray = nn.functional.interpolate(
+                image_ray,
+                size=downsample_size,
+                mode="bilinear",
+                align_corners=False,
+            )
+            image_ray = torch.reshape(
+                image_ray, [image_ray.size()[2], image_ray.size()[3]]
+            )
+            image_ray = image_ray.numpy()
+        return image_ray
+    dataset = DigitalTyphoonDataset(
+                str(images_path),
+                str(track_path),
+                str(metadata_path),
+                "pressure",
+                load_data_into_memory='all_data',
+                filter_func=image_filter,
+                transform_func=transform_func,
+                spectrum="Infrared",
+                verbose=False,
+            )
+    train_old,test_old = loading.load(0,dataset,batch_size,num_workers,type_save)
+    train_recent,test_recent = loading.load(1,dataset,batch_size,num_workers,type_save)
+    train_now,test_now = loading.load(2,dataset,batch_size,num_workers,type_save)
+    # Train
+    model_old = LightningResnetReg(
+        learning_rate=config.LEARNING_RATE,
+        weights=config.WEIGHTS,
+        num_classes=config.NUM_CLASSES,
+    )
+    model_recent = LightningResnetReg(
+        learning_rate=config.LEARNING_RATE,
+        weights=config.WEIGHTS,
+        num_classes=config.NUM_CLASSES,
+    )
+    model_now = LightningResnetReg(
+        learning_rate=config.LEARNING_RATE,
+        weights=config.WEIGHTS,
+        num_classes=config.NUM_CLASSES,
+    )
+    trainer_old = pl.Trainer(
+        logger=logger_old,
+        accelerator=config.ACCELERATOR,
+        devices=config.DEVICE,
+        max_epochs=config.MAX_EPOCHS,
+        default_root_dir=config.LOG_DIR,
+    )
+    trainer_recent = pl.Trainer(
+        logger=logger_recent,
+        accelerator=config.ACCELERATOR,
+        devices=config.DEVICE,
+        max_epochs=config.MAX_EPOCHS,
+        default_root_dir=config.LOG_DIR,
+    )
+    trainer_now = pl.Trainer(
+        logger=logger_now,
+        accelerator=config.ACCELERATOR,
+        devices=config.DEVICE,
+        max_epochs=config.MAX_EPOCHS,
+        default_root_dir=config.LOG_DIR,
+    )
+    for i in range(nb_runs):
+        print("Training <2005")
+        trainer_old.fit(model_old, train_old, test_old)
+        print("Training >2005")
+        trainer_recent.fit(model_recent, train_recent, test_recent)
+        print("Training >2015")
+        trainer_now.fit(model_now, train_now, test_now)
+if __name__ == "__main__":
+    main()