import os
import random
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torchmetrics
from pytorch_lightning import callbacks as pl_callbacks
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from sklearn.metrics import classification_report
from torch import nn
from torch.nn import functional as F
from torch.optim.lr_scheduler import LinearLR
from torch.utils.data import DataLoader
from torchmetrics.functional import accuracy
from torchvision import datasets, models, transforms
# from torchsummary import summary
import torchvision

# Set global random seed for reproducibility

def set_seed(seed=1234):
    os.environ["PYTHONHASHSEED"] = str(0)  # disable hash randomization
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    pl.seed_everything(seed, workers=False)

set_seed()

[rank: 0] Seed set to 1234

!jupyter --version
print (pl.__version__)
print (torch.__version__)
!python --version

!nvidia-smi

Selected Jupyter core packages...
IPython          : 9.3.0
ipykernel        : 6.29.5
ipywidgets       : not installed
jupyter_client   : 8.6.3
jupyter_core     : 5.8.1
jupyter_server   : 2.16.0
jupyterlab       : 4.4.3
nbclient         : 0.10.2
nbconvert        : 7.16.6
nbformat         : 5.10.4
notebook         : not installed
qtconsole        : not installed
traitlets        : 5.14.3
2.5.2
2.5.1.post303
Python 3.13.5
Sun Jun 22 11:22:32 2025       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla V100-SXM2...  On   | 00000000:86:00.0 Off |                    0 |
| N/A   33C    P0    55W / 300W |    957MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|    0   N/A  N/A   3888349      C   .../envs/py-light/bin/python      646MiB |
|    0   N/A  N/A   3891157      C   .../envs/py-light/bin/python      308MiB |
+-----------------------------------------------------------------------------+

from os.path import expanduser
HOME = expanduser("~")

DATA_DIR = HOME + "/data/catsVsDogs"
CHECKPOINT_DIR = "models/finetune"
NUM_CPUS = 4

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

IMAGE_DIM = 224
MEAN = (0.5, 0.5, 0.5)
STD = (0.5, 0.5, 0.5)
BATCH_SIZE = 16
LEARNING_RATE = 1e-5

transform = {
    "train": transforms.Compose(
        [
            transforms.Resize(
                size=(IMAGE_DIM, IMAGE_DIM),
                interpolation=transforms.InterpolationMode.BILINEAR,
            ),
            transforms.ToTensor(),
            transforms.Normalize(mean=MEAN, std=STD),
            transforms.RandomAffine(degrees=0, shear=0.2),  # Shear
            transforms.RandomResizedCrop(
                size=IMAGE_DIM,
                scale=(0.8, 1.2),
                interpolation=transforms.InterpolationMode.NEAREST,
            ),  # Zoom
            transforms.RandomHorizontalFlip(),
        ]
    ),
    "val": transforms.Compose(
        [
            transforms.Resize(
                size=(IMAGE_DIM, IMAGE_DIM),
                interpolation=transforms.InterpolationMode.BILINEAR,
            ),
            transforms.ToTensor(),
            transforms.Normalize(mean=MEAN, std=STD),
        ]
    ),
}

class CatsDogsData(pl.LightningDataModule):
    def __init__(self, data_dir=DATA_DIR, batch_size=BATCH_SIZE):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transform
        
    def prepare_data(self):
        self.train_data = datasets.ImageFolder(
            root=os.path.join(self.data_dir, "train"), transform=self.transform["train"]
        )
        self.val_data = datasets.ImageFolder(
            root=os.path.join(self.data_dir, "val"), transform=self.transform["val"]
        )
        self.test_data = datasets.ImageFolder(
            root=os.path.join(self.data_dir, "test"), transform=self.transform["val"]
        )
        
    def train_dataloader(self):
        """Train DataLoader."""
        # Num workers - speed up training
        return DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True, num_workers=NUM_CPUS)

    def val_dataloader(self):
        """Validation DataLoader."""
        return DataLoader(self.val_data, batch_size=self.batch_size, shuffle=False, num_workers=NUM_CPUS)

    def test_dataloader(self):
        """Test DataLoader."""
        return DataLoader(self.test_data, batch_size=self.batch_size, shuffle=False, num_workers=NUM_CPUS)

class MobileNetV2Model(pl.LightningModule):
    """MobileNetV2 model class."""

    def __init__(self):
        super().__init__()
        self.automatic_optimization = (True)

        self.model = torch.hub.load(
            "pytorch/vision:v0.10.0", "mobilenet_v2", weights=torchvision.models.MobileNet_V2_Weights.DEFAULT, progress=False
        )
        
        self.accuracy = torchmetrics.Accuracy(task="binary").to(DEVICE)

        # Freeze weights up to layer 116
        for i, param in enumerate(self.model.parameters()):
            if i <= 116: param.requires_grad = False
            
        # Top model
        self.model.pooling = nn.AdaptiveAvgPool2d(output_size=1)
        self.model.classifier = nn.Sequential(
            nn.Dropout(p=0.2), nn.Linear(1280, 1), nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x.float())

    def configure_optimizers(self):
        opt = torch.optim.Adam(
            filter(lambda p: p.requires_grad, self.model.parameters()), lr=LEARNING_RATE
        )
        scheduler = LinearLR(opt, start_factor=0.5, total_iters=3)
        return [opt], [scheduler]

    def training_step(self, batch: int, batch_idx: int):
        """Training step."""
        X, y = batch
        logits = self(X.float()).view(-1)
        train_loss = F.binary_cross_entropy(logits, y.float())
        pred = logits >= 0.5
        train_acc = self.accuracy(pred, y)

        self.log("train_loss", train_loss, prog_bar=True, on_epoch=True, on_step=False)
        self.log("train_acc", train_acc, prog_bar=True, on_epoch=True, on_step=False)
        return train_loss

    def validation_step(self, batch: int, batch_idx: int):
        """Validation step."""
        X, y = batch
        logits = self(X.float()).view(-1)
        valid_loss = F.binary_cross_entropy(logits, y.float())
        pred = logits >= 0.5
        valid_acc = self.accuracy(pred, y)

        self.log("val_loss", valid_loss, prog_bar=True, on_epoch=True, on_step=False)
        self.log("val_acc", valid_acc, prog_bar=True, on_epoch=True, on_step=False)
        return valid_loss

    def test_step(self, batch: int, batch_idx: int):
        """Test step."""
        X, y = batch
        logits = self(X.float()).view(-1)
        test_loss = F.binary_cross_entropy(logits, y.float())
        pred = logits >= 0.5
        test_acc = self.accuracy(pred, y)
        return test_loss

# Define max epochs
num_epochs = 30

# Define early stopping callback
early_stop = pl_callbacks.EarlyStopping(
    monitor="val_loss", patience=3, min_delta=1e-3, verbose=True, mode="min"
)

data = CatsDogsData()
data.prepare_data()


def get_predict(model, data_loader):
    """Get predictions from model and DataLoader."""
    true_values = []
    predicted_values = []
    for imgs, labels in data_loader:
        imgs = imgs.to(DEVICE)
        outputs = model(imgs)
        true_values.extend(labels)
        predicted_values.extend((outputs >= 0.5).view(-1).cpu().numpy())

    return true_values, predicted_values


# Define model checkpoint callback
checkpoint = pl.callbacks.ModelCheckpoint(
    dirpath=CHECKPOINT_DIR,
    filename=str(num_epochs) + "_{epoch:02d}-{step}",
    monitor="val_loss",
    mode="min",
    save_weights_only=True,
    save_top_k=1,
    verbose = True,
)

trainer = pl.Trainer(
    accelerator="gpu",
    max_epochs=num_epochs,
    callbacks=[checkpoint, early_stop, TQDMProgressBar(refresh_rate=50)]
)

model_path = "models/feature_extraction/best_model.ckpt"
# Load from feature extraction checkpoint (rescale + augment)
model = MobileNetV2Model.load_from_checkpoint(
    checkpoint_path=model_path,
    strict=False,
)

# summarize model
model = model.to(DEVICE)
# summary(model, (3, 224, 224))

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using cache found in /home/mhnguyen/.cache/torch/hub/pytorch_vision_v0.10.0

# Fit model and get best model path
trainer.fit(model, data)
best_model_path = checkpoint.best_model_path
print(f"Best model saved at: {best_model_path}")

/scratch/mhnguyen/job_40549290/miniconda3/envs/py-light/lib/python3.13/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:658: Checkpoint directory /home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | model    | MobileNetV2    | 2.2 M  | train
1 | accuracy | BinaryAccuracy | 0      | train
----------------------------------------------------
1.7 M     Trainable params
542 K     Non-trainable params
2.2 M     Total params
8.901     Total estimated model params size (MB)
216       Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.

Epoch 0: 100%|██████████| 125/125 [00:02<00:00, 53.99it/s, v_num=4.05e+7]  
Validation: |          | 0/? [00:00<?, ?it/s]
Validation:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.64it/s]
Epoch 0: 100%|██████████| 125/125 [00:02<00:00, 43.11it/s, v_num=4.05e+7, val_loss=0.171, val_acc=0.965, train_loss=0.231, train_acc=0.945]

Metric val_loss improved. New best score: 0.171
Epoch 0, global step 125: 'val_loss' reached 0.17127 (best 0.17127), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=00-step=125.ckpt' as top 1

Epoch 1: 100%|██████████| 125/125 [00:02<00:00, 53.74it/s, v_num=4.05e+7, val_loss=0.171, val_acc=0.965, train_loss=0.231, train_acc=0.945]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 83.92it/s]
Epoch 1: 100%|██████████| 125/125 [00:02<00:00, 42.97it/s, v_num=4.05e+7, val_loss=0.153, val_acc=0.970, train_loss=0.222, train_acc=0.934]

Metric val_loss improved by 0.018 >= min_delta = 0.001. New best score: 0.153
Epoch 1, global step 250: 'val_loss' reached 0.15334 (best 0.15334), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=01-step=250.ckpt' as top 1

Epoch 2: 100%|██████████| 125/125 [00:02<00:00, 54.01it/s, v_num=4.05e+7, val_loss=0.153, val_acc=0.970, train_loss=0.222, train_acc=0.934]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 85.64it/s]
Epoch 2: 100%|██████████| 125/125 [00:02<00:00, 43.53it/s, v_num=4.05e+7, val_loss=0.127, val_acc=0.973, train_loss=0.184, train_acc=0.946]

Metric val_loss improved by 0.026 >= min_delta = 0.001. New best score: 0.127
Epoch 2, global step 375: 'val_loss' reached 0.12713 (best 0.12713), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=02-step=375.ckpt' as top 1

Epoch 3: 100%|██████████| 125/125 [00:02<00:00, 53.95it/s, v_num=4.05e+7, val_loss=0.127, val_acc=0.973, train_loss=0.184, train_acc=0.946]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 85.36it/s]
Epoch 3: 100%|██████████| 125/125 [00:02<00:00, 43.94it/s, v_num=4.05e+7, val_loss=0.116, val_acc=0.967, train_loss=0.164, train_acc=0.955]

Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.116
Epoch 3, global step 500: 'val_loss' reached 0.11574 (best 0.11574), saving model to '/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=03-step=500.ckpt' as top 1

Epoch 4: 100%|██████████| 125/125 [00:02<00:00, 53.73it/s, v_num=4.05e+7, val_loss=0.116, val_acc=0.967, train_loss=0.164, train_acc=0.955]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0:   0%|          | 0/25 [00:00<?, ?it/s]
Validation DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 81.49it/s]
Epoch 4: 100%|██████████| 125/125 [00:02<00:00, 43.27it/s, v_num=4.05e+7, val_loss=0.104, val_acc=0.970, train_loss=0.138, train_acc=0.964]

model = MobileNetV2Model.load_from_checkpoint(checkpoint_path=best_model_path)
model = model.to(DEVICE)
model.freeze()

# Make predictions
y_train, pred_train = get_predict(model, data.train_dataloader())
y_val, pred_val = get_predict(model, data.val_dataloader())
y_test, pred_test = get_predict(model, data.test_dataloader())

Using cache found in /home/mhnguyen/.cache/torch/hub/pytorch_vision_v0.10.0

print(checkpoint.best_model_path)

/home/mhnguyen/Teaching/CIML2025/mhn-ciml/ptl/models/finetune/30_epoch=20-step=2625-v2.ckpt

print(f"Train:\n {classification_report(y_train, pred_train, digits=4)}")
print(f"Val:\n {classification_report(y_val, pred_val, digits=4)}")
print(f"Test:\n {classification_report(y_test, pred_test, digits=4)}")

Train:
               precision    recall  f1-score   support

           0     0.9980    0.9950    0.9965      1000
           1     0.9950    0.9980    0.9965      1000

    accuracy                         0.9965      2000
   macro avg     0.9965    0.9965    0.9965      2000
weighted avg     0.9965    0.9965    0.9965      2000

Val:
               precision    recall  f1-score   support

           0     0.9704    0.9850    0.9777       200
           1     0.9848    0.9700    0.9773       200

    accuracy                         0.9775       400
   macro avg     0.9776    0.9775    0.9775       400
weighted avg     0.9776    0.9775    0.9775       400

Test:
               precision    recall  f1-score   support

           0     0.9802    0.9900    0.9851       200
           1     0.9899    0.9800    0.9849       200

    accuracy                         0.9850       400
   macro avg     0.9850    0.9850    0.9850       400
weighted avg     0.9850    0.9850    0.9850       400

model = model.to(DEVICE)

# helper method to prepare image for model inference
def image_loader(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    image = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(5, 5))
    plt.imshow(image)
    plt.axis("off")
    plt.show()

    image = transform(image).unsqueeze(0).to(DEVICE)
    return image

image_path = DATA_DIR + "/test/cats/cat.1070.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

#Closer prediction is to 0, more confident it is a cat; close prediction is to 1, more confident it is a dog

Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1070.jpg: 
cat (0.0012)

image_path = DATA_DIR + "/test/dogs/dog.1233.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1233.jpg: 
dog (0.9819)

image_path = DATA_DIR + "/test/cats/cat.1080.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1080.jpg: 
cat (0.1485)

image_path = DATA_DIR + "/test/dogs/dog.1132.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1132.jpg: 
dog (0.9477)

image_path = DATA_DIR + "/test/dogs/dog.1311.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1311.jpg: 
cat (0.2584)

image_path = DATA_DIR + "/test/cats/cat.1338.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1338.jpg: 
cat (0.0179)

image_path = DATA_DIR + "/test/cats/cat.1342.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1342.jpg: 
cat (0.0825)

image_path = DATA_DIR + "/test/cats/cat.1180.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1180.jpg: 
cat (0.0088)

image_path = DATA_DIR + "/test/cats/cat.1048.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/cats/cat.1048.jpg: 
cat (0.2482)

image_path = DATA_DIR + "/test/dogs/dog.1342.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1342.jpg: 
dog (0.6592)

image_path = DATA_DIR + "/test/dogs/dog.1308.jpg"
img = image_loader(image_path)
with torch.no_grad():
    img_y_pred = model(img).item()

print()
print(f"Prediction for {image_path}: \n{'dog' if img_y_pred >= 0.5 else 'cat'} ({img_y_pred:.4f})")

Prediction for /home/mhnguyen/data/catsVsDogs/test/dogs/dog.1308.jpg: 
dog (0.6402)

Transfer Learning on Cats-Dogs Classification - Fine Tune - Solution¶

Fine-tune pre-trained CNN's top layers and classification layers to classify cats vs. dogs.¶

Adapted from https://www.tensorflow.org/tutorials/images/transfer_learning ¶

CIML Summer Institute¶

UC San Diego¶

Setup¶

Define Transforms¶

Define Data Module¶

Define Model¶

Train Model¶

Evaluate Model¶

Perform Inference¶

Transfer Learning on Cats-Dogs Classification - Fine Tune - Solution¶

Fine-tune pre-trained CNN's top layers and classification layers to classify cats vs. dogs.¶

Adapted from https://www.tensorflow.org/tutorials/images/transfer_learning¶

CIML Summer Institute¶

UC San Diego¶

Setup¶

Define Transforms¶

Define Data Module¶

Define Model¶

Train Model¶

Evaluate Model¶

Perform Inference¶

Adapted from https://www.tensorflow.org/tutorials/images/transfer_learning ¶