Setup¶
In [1]:
import os
import random
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torchmetrics
from pytorch_lightning import callbacks as pl_callbacks
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from sklearn.metrics import classification_report
from torch import nn
from torch.nn import functional as F
from torch.optim.lr_scheduler import LinearLR
from torch.utils.data import DataLoader
from torchmetrics.functional import accuracy
from torchvision import datasets, models, transforms
# from torchsummary import summary
import torchvision
In [2]:
# Set global random seed for reproducibility
def set_seed(seed=1234):
os.environ["PYTHONHASHSEED"] = str(0) # disable hash randomization
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
pl.seed_everything(seed, workers=False)
set_seed()
[rank: 0] Seed set to 1234
In [3]:
!jupyter --version
print (pl.__version__)
print (torch.__version__)
!python --version
!nvidia-smi
Selected Jupyter core packages...
IPython : 9.3.0
ipykernel : 6.29.5
ipywidgets : not installed
jupyter_client : 8.6.3
jupyter_core : 5.8.1
jupyter_server : 2.16.0
jupyterlab : 4.4.3
nbclient : 0.10.2
nbconvert : 7.16.6
nbformat : 5.10.4
notebook : not installed
qtconsole : not installed
traitlets : 5.14.3
2.5.2
2.5.1.post303
Python 3.13.5
Sun Jun 22 11:22:32 2025
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 Tesla V100-SXM2... On | 00000000:86:00.0 Off | 0 |
| N/A 33C P0 55W / 300W | 957MiB / 32768MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 3888349 C .../envs/py-light/bin/python 646MiB |
| 0 N/A N/A 3891157 C .../envs/py-light/bin/python 308MiB |
+-----------------------------------------------------------------------------+
In [4]:
from os.path import expanduser
HOME = expanduser("~")
DATA_DIR = HOME + "/data/catsVsDogs"
CHECKPOINT_DIR = "models/finetune"
NUM_CPUS = 4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
In [5]:
IMAGE_DIM = 224
MEAN = (0.5, 0.5, 0.5)
STD = (0.5, 0.5, 0.5)
BATCH_SIZE = 16
LEARNING_RATE = 1e-5
Define Transforms¶
Same transforms as in feature extraction, just in a different format
In [6]:
transform = {
"train": transforms.Compose(
[
transforms.Resize(
size=(IMAGE_DIM, IMAGE_DIM),
interpolation=transforms.InterpolationMode.BILINEAR,
),
transforms.ToTensor(),
transforms.Normalize(mean=MEAN, std=STD),
transforms.RandomAffine(degrees=0, shear=0.2), # Shear
transforms.RandomResizedCrop(
size=IMAGE_DIM,
scale=(0.8, 1.2),
interpolation=transforms.InterpolationMode.NEAREST,
), # Zoom
transforms.RandomHorizontalFlip(),
]
),
"val": transforms.Compose(
[
transforms.Resize(
size=(IMAGE_DIM, IMAGE_DIM),
interpolation=transforms.InterpolationMode.BILINEAR,
),
transforms.ToTensor(),
transforms.Normalize(mean=MEAN, std=STD),
]
),
}
Define Data Module¶
In [7]:
class CatsDogsData(pl.LightningDataModule):
def __init__(self, data_dir=DATA_DIR, batch_size=BATCH_SIZE):
super().__init__()
self.data_dir = data_dir
self.batch_size = batch_size
self.transform = transform
def prepare_data(self):
self.train_data = datasets.ImageFolder(
root=os.path.join(self.data_dir, "train"), transform=self.transform["train"]
)
self.val_data = datasets.ImageFolder(
root=os.path.join(self.data_dir, "val"), transform=self.transform["val"]
)
self.test_data = datasets.ImageFolder(
root=os.path.join(self.data_dir, "test"), transform=self.transform["val"]
)
def train_dataloader(self):
"""Train DataLoader."""
# Num workers - speed up training
return DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True, num_workers=NUM_CPUS)
def val_dataloader(self):
"""Validation DataLoader."""
return DataLoader(self.val_data, batch_size=self.batch_size, shuffle=False, num_workers=NUM_CPUS)
def test_dataloader(self):
"""Test DataLoader."""
return DataLoader(self.test_data, batch_size=self.batch_size, shuffle=False, num_workers=NUM_CPUS)
Define Model¶
In [8]:
class MobileNetV2Model(pl.LightningModule):
"""MobileNetV2 model class."""
def __init__(self):
super().__init__()
self.automatic_optimization = (True)
self.model = torch.hub.load(
"pytorch/vision:v0.10.0", "mobilenet_v2", weights=torchvision.models.MobileNet_V2_Weights.DEFAULT, progress=False
)
self.accuracy = torchmetrics.Accuracy(task="binary").to(DEVICE)
# Freeze weights up to layer 116
for i, param in enumerate(self.model.parameters()):
if i <= 116: param.requires_grad = False
# Top model
self.model.pooling = nn.AdaptiveAvgPool2d(output_size=1)
self.model.classifier = nn.Sequential(
nn.Dropout(p=0.2), nn.Linear(1280, 1), nn.Sigmoid()
)
def forward(self, x):
return self.model(x.float())
def configure_optimizers(self):
opt = torch.optim.Adam(
filter(lambda p: p.requires_grad, self.model.parameters()), lr=LEARNING_RATE
)
scheduler = LinearLR(opt, start_factor=0.5, total_iters=3)
return [opt], [scheduler]
def training_step(self, batch: int, batch_idx: int):
"""Training step."""
X, y = batch
logits = self(X.float()).view(-1)
train_loss = F.binary_cross_entropy(logits, y.float())
pred = logits >= 0.5
train_acc = self.accuracy(pred, y)
self.log("train_loss", train_loss, prog_bar=True, on_epoch=True, on_step=False)
self.log("train_acc", train_acc, prog_bar=True, on_epoch=True, on_step=False)
return train_loss
def validation_step(self, batch: int, batch_idx: int):
"""Validation step."""
X, y = batch
logits = self(X.float()).view(-1)
valid_loss = F.binary_cross_entropy(logits, y.float())
pred = logits >= 0.5
valid_acc = self.accuracy(pred, y)
self.log("val_loss", valid_loss, prog_bar=True, on_epoch=True, on_step=False)
self.log("val_acc", valid_acc, prog_bar=True, on_epoch=True, on_step=False)
return valid_loss
def test_step(self, batch: int, batch_idx: int):
"""Test step."""
X, y = batch
logits = self(X.float()).view(-1)
test_loss = F.binary_cross_entropy(logits, y.float())
pred = logits >= 0.5
test_acc = self.accuracy(pred, y)
return test_loss
Train Model¶
In [9]:
# Define max epochs
num_epochs = 30
# Define early stopping callback
early_stop = pl_callbacks.EarlyStopping(
monitor="val_loss", patience=3, min_delta=1e-3, verbose=True, mode="min"
)
data = CatsDogsData()
data.prepare_data()
def get_predict(model, data_loader):
"""Get predictions from model and DataLoader."""
true_values = []
predicted_values = []
for imgs, labels in data_loader:
imgs = imgs.to(DEVICE)
outputs = model(imgs)
true_values.extend(labels)
predicted_values.extend((outputs >= 0.5).view(-1).cpu().numpy())
return true_values, predicted_values
# Define model checkpoint callback
checkpoint = pl.callbacks.ModelCheckpoint(
dirpath=CHECKPOINT_DIR,
filename=str(num_epochs) + "_{epoch:02d}-{step}",
monitor="val_loss",
mode="min",
save_weights_only=True,
save_top_k=1,
verbose = True,
)
trainer = pl.Trainer(
accelerator="gpu",
max_epochs=num_epochs,
callbacks=[checkpoint, early_stop, TQDMProgressBar(refresh_rate=50)]
)
model_path = "models/feature_extraction/best_model.ckpt"
# Load from feature extraction checkpoint (rescale + augment)
model = MobileNetV2Model.load_from_checkpoint(
checkpoint_path=model_path,
strict=False,
)
# summarize model
model = model.to(DEVICE)
# summary(model, (3, 224, 224))
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs Using cache found in /home/mhnguyen/.cache/torch/hub/pytorch_vision_v0.10.0
In [10]:
# Fit model and get best model path
trainer.fit(model, data)
best_model_path = checkpoint.best_model_path
print(f"Best model saved at: {best_model_path}")
/scratch/mhnguyen/job_40549290/miniconda3/envs/py-light/lib/python3.13/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:658: Checkpoint directory /home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune exists and is not empty. LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params | Mode ---------------------------------------------------- 0 | model | MobileNetV2 | 2.2 M | train 1 | accuracy | BinaryAccuracy | 0 | train ---------------------------------------------------- 1.7 M Trainable params 542 K Non-trainable params 2.2 M Total params 8.901 Total estimated model params size (MB) 216 Modules in train mode 0 Modules in eval mode SLURM auto-requeueing enabled. Setting signal handlers.
Epoch 0: 100%|██████████| 125/125 [00:02<00:00, 53.99it/s, v_num=4.05e+7] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.64it/s] Epoch 0: 100%|██████████| 125/125 [00:02<00:00, 43.11it/s, v_num=4.05e+7, val_loss=0.171, val_acc=0.965, train_loss=0.231, train_acc=0.945]
Metric val_loss improved. New best score: 0.171 Epoch 0, global step 125: 'val_loss' reached 0.17127 (best 0.17127), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=00-step=125.ckpt' as top 1
Epoch 1: 100%|██████████| 125/125 [00:02<00:00, 53.74it/s, v_num=4.05e+7, val_loss=0.171, val_acc=0.965, train_loss=0.231, train_acc=0.945] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.92it/s] Epoch 1: 100%|██████████| 125/125 [00:02<00:00, 42.97it/s, v_num=4.05e+7, val_loss=0.153, val_acc=0.970, train_loss=0.222, train_acc=0.934]
Metric val_loss improved by 0.018 >= min_delta = 0.001. New best score: 0.153 Epoch 1, global step 250: 'val_loss' reached 0.15334 (best 0.15334), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=01-step=250.ckpt' as top 1
Epoch 2: 100%|██████████| 125/125 [00:02<00:00, 54.01it/s, v_num=4.05e+7, val_loss=0.153, val_acc=0.970, train_loss=0.222, train_acc=0.934] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 85.64it/s] Epoch 2: 100%|██████████| 125/125 [00:02<00:00, 43.53it/s, v_num=4.05e+7, val_loss=0.127, val_acc=0.973, train_loss=0.184, train_acc=0.946]
Metric val_loss improved by 0.026 >= min_delta = 0.001. New best score: 0.127 Epoch 2, global step 375: 'val_loss' reached 0.12713 (best 0.12713), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=02-step=375.ckpt' as top 1
Epoch 3: 100%|██████████| 125/125 [00:02<00:00, 53.95it/s, v_num=4.05e+7, val_loss=0.127, val_acc=0.973, train_loss=0.184, train_acc=0.946] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 85.36it/s] Epoch 3: 100%|██████████| 125/125 [00:02<00:00, 43.94it/s, v_num=4.05e+7, val_loss=0.116, val_acc=0.967, train_loss=0.164, train_acc=0.955]
Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.116 Epoch 3, global step 500: 'val_loss' reached 0.11574 (best 0.11574), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=03-step=500.ckpt' as top 1
Epoch 4: 100%|██████████| 125/125 [00:02<00:00, 53.73it/s, v_num=4.05e+7, val_loss=0.116, val_acc=0.967, train_loss=0.164, train_acc=0.955] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 81.49it/s] Epoch 4: 100%|██████████| 125/125 [00:02<00:00, 43.27it/s, v_num=4.05e+7, val_loss=0.104, val_acc=0.970, train_loss=0.138, train_acc=0.964]
Metric val_loss improved by 0.012 >= min_delta = 0.001. New best score: 0.104 Epoch 4, global step 625: 'val_loss' reached 0.10368 (best 0.10368), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=04-step=625.ckpt' as top 1
Epoch 5: 100%|██████████| 125/125 [00:02<00:00, 51.70it/s, v_num=4.05e+7, val_loss=0.104, val_acc=0.970, train_loss=0.138, train_acc=0.964] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 81.51it/s] Epoch 5: 100%|██████████| 125/125 [00:02<00:00, 42.40it/s, v_num=4.05e+7, val_loss=0.0899, val_acc=0.973, train_loss=0.131, train_acc=0.961]
Metric val_loss improved by 0.014 >= min_delta = 0.001. New best score: 0.090 Epoch 5, global step 750: 'val_loss' reached 0.08989 (best 0.08989), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=05-step=750.ckpt' as top 1
Epoch 6: 100%|██████████| 125/125 [00:02<00:00, 52.25it/s, v_num=4.05e+7, val_loss=0.0899, val_acc=0.973, train_loss=0.131, train_acc=0.961] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 82.97it/s] Epoch 6: 100%|██████████| 125/125 [00:02<00:00, 42.06it/s, v_num=4.05e+7, val_loss=0.0874, val_acc=0.973, train_loss=0.123, train_acc=0.960]
Metric val_loss improved by 0.003 >= min_delta = 0.001. New best score: 0.087 Epoch 6, global step 875: 'val_loss' reached 0.08736 (best 0.08736), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=06-step=875.ckpt' as top 1
Epoch 7: 100%|██████████| 125/125 [00:02<00:00, 52.27it/s, v_num=4.05e+7, val_loss=0.0874, val_acc=0.973, train_loss=0.123, train_acc=0.960] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 85.52it/s] Epoch 7: 100%|██████████| 125/125 [00:02<00:00, 42.27it/s, v_num=4.05e+7, val_loss=0.0787, val_acc=0.978, train_loss=0.125, train_acc=0.952]
Metric val_loss improved by 0.009 >= min_delta = 0.001. New best score: 0.079 Epoch 7, global step 1000: 'val_loss' reached 0.07869 (best 0.07869), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=07-step=1000.ckpt' as top 1
Epoch 8: 100%|██████████| 125/125 [00:02<00:00, 53.86it/s, v_num=4.05e+7, val_loss=0.0787, val_acc=0.978, train_loss=0.125, train_acc=0.952] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.50it/s] Epoch 8: 100%|██████████| 125/125 [00:02<00:00, 43.41it/s, v_num=4.05e+7, val_loss=0.0735, val_acc=0.980, train_loss=0.093, train_acc=0.970]
Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.074 Epoch 8, global step 1125: 'val_loss' reached 0.07354 (best 0.07354), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=08-step=1125.ckpt' as top 1
Epoch 9: 100%|██████████| 125/125 [00:02<00:00, 51.57it/s, v_num=4.05e+7, val_loss=0.0735, val_acc=0.980, train_loss=0.093, train_acc=0.970] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 84.47it/s] Epoch 9: 100%|██████████| 125/125 [00:03<00:00, 41.65it/s, v_num=4.05e+7, val_loss=0.0697, val_acc=0.978, train_loss=0.0924, train_acc=0.970]
Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.070 Epoch 9, global step 1250: 'val_loss' reached 0.06972 (best 0.06972), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=09-step=1250.ckpt' as top 1
Epoch 10: 100%|██████████| 125/125 [00:02<00:00, 53.94it/s, v_num=4.05e+7, val_loss=0.0697, val_acc=0.978, train_loss=0.0924, train_acc=0.970] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.41it/s] Epoch 10: 100%|██████████| 125/125 [00:02<00:00, 43.64it/s, v_num=4.05e+7, val_loss=0.0703, val_acc=0.975, train_loss=0.0978, train_acc=0.965]
Epoch 10, global step 1375: 'val_loss' was not in top 1
Epoch 11: 100%|██████████| 125/125 [00:02<00:00, 52.04it/s, v_num=4.05e+7, val_loss=0.0703, val_acc=0.975, train_loss=0.0978, train_acc=0.965] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.63it/s] Epoch 11: 100%|██████████| 125/125 [00:02<00:00, 42.32it/s, v_num=4.05e+7, val_loss=0.0639, val_acc=0.980, train_loss=0.0848, train_acc=0.970]
Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.064 Epoch 11, global step 1500: 'val_loss' reached 0.06392 (best 0.06392), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=11-step=1500.ckpt' as top 1
Epoch 12: 100%|██████████| 125/125 [00:02<00:00, 53.63it/s, v_num=4.05e+7, val_loss=0.0639, val_acc=0.980, train_loss=0.0848, train_acc=0.970] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.05it/s] Epoch 12: 100%|██████████| 125/125 [00:02<00:00, 42.98it/s, v_num=4.05e+7, val_loss=0.0622, val_acc=0.978, train_loss=0.0853, train_acc=0.971]
Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.062 Epoch 12, global step 1625: 'val_loss' reached 0.06216 (best 0.06216), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=12-step=1625.ckpt' as top 1
Epoch 13: 100%|██████████| 125/125 [00:02<00:00, 51.74it/s, v_num=4.05e+7, val_loss=0.0622, val_acc=0.978, train_loss=0.0853, train_acc=0.971] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.90it/s] Epoch 13: 100%|██████████| 125/125 [00:02<00:00, 42.09it/s, v_num=4.05e+7, val_loss=0.0615, val_acc=0.975, train_loss=0.083, train_acc=0.967]
Epoch 13, global step 1750: 'val_loss' reached 0.06150 (best 0.06150), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=13-step=1750.ckpt' as top 1
Epoch 14: 100%|██████████| 125/125 [00:02<00:00, 53.52it/s, v_num=4.05e+7, val_loss=0.0615, val_acc=0.975, train_loss=0.083, train_acc=0.967] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 80.39it/s] Epoch 14: 100%|██████████| 125/125 [00:02<00:00, 43.41it/s, v_num=4.05e+7, val_loss=0.0604, val_acc=0.980, train_loss=0.0703, train_acc=0.980]
Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.060 Epoch 14, global step 1875: 'val_loss' reached 0.06042 (best 0.06042), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=14-step=1875.ckpt' as top 1
Epoch 15: 100%|██████████| 125/125 [00:02<00:00, 53.19it/s, v_num=4.05e+7, val_loss=0.0604, val_acc=0.980, train_loss=0.0703, train_acc=0.980] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 81.32it/s] Epoch 15: 100%|██████████| 125/125 [00:02<00:00, 42.82it/s, v_num=4.05e+7, val_loss=0.0586, val_acc=0.978, train_loss=0.0703, train_acc=0.978]
Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.059 Epoch 15, global step 2000: 'val_loss' reached 0.05862 (best 0.05862), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=15-step=2000.ckpt' as top 1
Epoch 16: 100%|██████████| 125/125 [00:02<00:00, 52.29it/s, v_num=4.05e+7, val_loss=0.0586, val_acc=0.978, train_loss=0.0703, train_acc=0.978] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 79.81it/s] Epoch 16: 100%|██████████| 125/125 [00:02<00:00, 42.60it/s, v_num=4.05e+7, val_loss=0.0578, val_acc=0.980, train_loss=0.0697, train_acc=0.978]
Epoch 16, global step 2125: 'val_loss' reached 0.05781 (best 0.05781), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=16-step=2125.ckpt' as top 1
Epoch 17: 100%|██████████| 125/125 [00:02<00:00, 51.12it/s, v_num=4.05e+7, val_loss=0.0578, val_acc=0.980, train_loss=0.0697, train_acc=0.978] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 84.23it/s] Epoch 17: 100%|██████████| 125/125 [00:03<00:00, 41.54it/s, v_num=4.05e+7, val_loss=0.0552, val_acc=0.980, train_loss=0.0793, train_acc=0.970]
Metric val_loss improved by 0.003 >= min_delta = 0.001. New best score: 0.055 Epoch 17, global step 2250: 'val_loss' reached 0.05523 (best 0.05523), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=17-step=2250.ckpt' as top 1
Epoch 18: 100%|██████████| 125/125 [00:02<00:00, 53.74it/s, v_num=4.05e+7, val_loss=0.0552, val_acc=0.980, train_loss=0.0793, train_acc=0.970] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 84.28it/s] Epoch 18: 100%|██████████| 125/125 [00:02<00:00, 43.53it/s, v_num=4.05e+7, val_loss=0.0578, val_acc=0.980, train_loss=0.0637, train_acc=0.975]
Epoch 18, global step 2375: 'val_loss' was not in top 1
Epoch 19: 100%|██████████| 125/125 [00:02<00:00, 51.89it/s, v_num=4.05e+7, val_loss=0.0578, val_acc=0.980, train_loss=0.0637, train_acc=0.975] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 84.46it/s] Epoch 19: 100%|██████████| 125/125 [00:02<00:00, 41.96it/s, v_num=4.05e+7, val_loss=0.0537, val_acc=0.978, train_loss=0.0662, train_acc=0.976]
Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.054 Epoch 19, global step 2500: 'val_loss' reached 0.05373 (best 0.05373), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=19-step=2500.ckpt' as top 1
Epoch 20: 100%|██████████| 125/125 [00:02<00:00, 52.33it/s, v_num=4.05e+7, val_loss=0.0537, val_acc=0.978, train_loss=0.0662, train_acc=0.976] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 84.75it/s] Epoch 20: 100%|██████████| 125/125 [00:02<00:00, 42.50it/s, v_num=4.05e+7, val_loss=0.0535, val_acc=0.978, train_loss=0.0563, train_acc=0.980]
Epoch 20, global step 2625: 'val_loss' reached 0.05354 (best 0.05354), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=20-step=2625-v2.ckpt' as top 1
Epoch 21: 100%|██████████| 125/125 [00:02<00:00, 52.25it/s, v_num=4.05e+7, val_loss=0.0535, val_acc=0.978, train_loss=0.0563, train_acc=0.980] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 85.10it/s] Epoch 21: 100%|██████████| 125/125 [00:02<00:00, 42.29it/s, v_num=4.05e+7, val_loss=0.0548, val_acc=0.978, train_loss=0.0461, train_acc=0.990]
Epoch 21, global step 2750: 'val_loss' was not in top 1
Epoch 22: 100%|██████████| 125/125 [00:02<00:00, 51.54it/s, v_num=4.05e+7, val_loss=0.0548, val_acc=0.978, train_loss=0.0461, train_acc=0.990] Validation: | | 0/? [00:00<?, ?it/s] Validation: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 0%| | 0/25 [00:00<?, ?it/s] Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 84.65it/s] Epoch 22: 100%|██████████| 125/125 [00:02<00:00, 41.83it/s, v_num=4.05e+7, val_loss=0.0581, val_acc=0.978, train_loss=0.0626, train_acc=0.976]
Monitored metric val_loss did not improve in the last 3 records. Best score: 0.054. Signaling Trainer to stop. Epoch 22, global step 2875: 'val_loss' was not in top 1
Epoch 22: 100%|██████████| 125/125 [00:03<00:00, 41.36it/s, v_num=4.05e+7, val_loss=0.0581, val_acc=0.978, train_loss=0.0626, train_acc=0.976] Best model saved at: /home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=20-step=2625-v2.ckpt
Evaluate Model¶
In [11]:
model = MobileNetV2Model.load_from_checkpoint(checkpoint_path=best_model_path)
model = model.to(DEVICE)
model.freeze()
# Make predictions
y_train, pred_train = get_predict(model, data.train_dataloader())
y_val, pred_val = get_predict(model, data.val_dataloader())
y_test, pred_test = get_predict(model, data.test_dataloader())
Using cache found in /home/mhnguyen/.cache/torch/hub/pytorch_vision_v0.10.0
In [12]:
print(checkpoint.best_model_path)
/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=20-step=2625-v2.ckpt
In [13]:
print(f"Train:\n {classification_report(y_train, pred_train, digits=4)}")
print(f"Val:\n {classification_report(y_val, pred_val, digits=4)}")
print(f"Test:\n {classification_report(y_test, pred_test, digits=4)}")
Train:
precision recall f1-score support
0 0.9980 0.9950 0.9965 1000
1 0.9950 0.9980 0.9965 1000
accuracy 0.9965 2000
macro avg 0.9965 0.9965 0.9965 2000
weighted avg 0.9965 0.9965 0.9965 2000
Val:
precision recall f1-score support
0 0.9704 0.9850 0.9777 200
1 0.9848 0.9700 0.9773 200
accuracy 0.9775 400
macro avg 0.9776 0.9775 0.9775 400
weighted avg 0.9776 0.9775 0.9775 400
Test:
precision recall f1-score support
0 0.9802 0.9900 0.9851 200
1 0.9899 0.9800 0.9849 200
accuracy 0.9850 400
macro avg 0.9850 0.9850 0.9850 400
weighted avg 0.9850 0.9850 0.9850 400
Perform Inference¶
In [14]:
model = model.to(DEVICE)
In [15]:
# helper method to prepare image for model inference
def image_loader(image_path):
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])
image = Image.open(image_path).convert("RGB")
plt.figure(figsize=(5, 5))
plt.imshow(image)
plt.axis("off")
plt.show()
image = transform(image).unsqueeze(0).to(DEVICE)
return image
In [16]:
image_path = DATA_DIR + "/test/cats/cat.1070.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
#Closer prediction is to 0, more confident it is a cat; close prediction is to 1, more confident it is a dog
Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1070.jpg: cat (0.0012)
In [17]:
image_path = DATA_DIR + "/test/dogs/dog.1233.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1233.jpg: dog (0.9819)
In [18]:
image_path = DATA_DIR + "/test/cats/cat.1080.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1080.jpg: cat (0.1485)
In [19]:
image_path = DATA_DIR + "/test/dogs/dog.1132.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1132.jpg: dog (0.9477)
In [20]:
image_path = DATA_DIR + "/test/dogs/dog.1311.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1311.jpg: cat (0.2584)
In [21]:
image_path = DATA_DIR + "/test/cats/cat.1338.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1338.jpg: cat (0.0179)
In [22]:
image_path = DATA_DIR + "/test/cats/cat.1342.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1342.jpg: cat (0.0825)
In [23]:
image_path = DATA_DIR + "/test/cats/cat.1180.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1180.jpg: cat (0.0088)
In [24]:
image_path = DATA_DIR + "/test/cats/cat.1048.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1048.jpg: cat (0.2482)
In [25]:
image_path = DATA_DIR + "/test/dogs/dog.1342.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1342.jpg: dog (0.6592)
In [26]:
image_path = DATA_DIR + "/test/dogs/dog.1308.jpg"
img = image_loader(image_path)
with torch.no_grad():
img_y_pred = model(img).item()
print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")
Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1308.jpg: dog (0.6402)
In [ ]:
In [ ]: