# ----------- IMPORT STATEMENTS ---------------
import argparse
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
import os
import sys
import numpy as np
import time

#---------------------------------------------
print('import done')

# -----------------------------------
#Parameters for training
# -----------------------------------
num_worker2use = 4     #for parallel reading/prefetching of data (for bigger data)
batch_size     = 256  
max_numtrain   = 1024       #for this exercise, train on limited num of input, to save time
max_numtest    = batch_size # and test on limited num of input
epochs         = 10
lrate          = 0.01
numfilt        = 16   #Try 8 or 24? or a mininumal number like 2?

# --------------------------------------------------------------
# NOTE 3x3 kernel leaves 12x12 after maxpooling, so use 12 for reduced_size
#     16x16 leaves 6x6
#     9x9 leaves  9x9
# --------------------------------------------------------------
kernel_size2use= 3   #Try 9 or even 16,
reduced_size   = 12    # also, see the note below in fwd method for 'MyNet' class
# --------------------------------------------------------------

data_path      = './data'
torch.manual_seed(777)

# -------------------------------------------------------------
#   Define network class object and its 
#             initialization and forward function
#             (other functions are inherited from torch.nn)
# -------------------------------------------------------------
class MyNet(torch.nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        #Conv:  input size 1 channel, output is number of filters, the 
        #  actual batch of input is implicit
        # see:   https://docs.pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
        self.conv1   = torch.nn.Conv2d(in_channels=1,out_channels=numfilt,kernel_size=kernel_size2use,stride=1) 
        self.linear1 = torch.nn.Linear(numfilt*reduced_size*reduced_size,16) #after max pooling it wil lbe 12 x12
        self.linear2 = torch.nn.Linear(16, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        #Uncomment this to see what the size actually is after max pooling
        #print('MYINFO  fwd, after conv1relu, x shape:',x.shape)

        x = F.max_pool2d(x, 3, 2)
        # <<<<<<<<<<<<<<<<<--------------------
        #Uncomment this to see what the size actually is after max pooling
        #print('MYINFO  fwd, after max, x shape:',x.shape)

        x = torch.flatten(x, 1)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        #not sure i need this   x = F.relu(x)
        output = F.log_softmax(x, dim=1)  #log softmax for classfcnt or binary?
        return output
print('Net class defined ')

# --------------------------------------------------------
#   Define training function
# --------------------------------------------------------
def train(model, device, train_loader, optimizer, epoch):
    ''' This is called for each epoch.  
        Arguments:  the model, the device to run on, data loader, optimizer, and current epoch
    ''' 
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
      if batch_idx*batch_size>= max_numtrain:
           break
      else:
        if batch_idx==0:  #print one message
          print('INFO train, ep:',epoch,' batidx:',batch_idx, ' batch size:',target.shape[0])
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()                 #reset optimizer state
        output = model(data)                  #get predictions
        loss = F.nll_loss(output, target)     #get loss (nll_loss for softmax outputs)
        loss.backward()                       #backprop loss
        optimizer.step()                      #update weights

# -------------------------------------------------------------
#   Define test function
# -------------------------------------------------------------
def test(model, device, test_loader):
    ''' This is called for after training each epoch 
        Arguments:  the model, the device to run on, test data loader
    ''' 
    model.eval()

    #accumulate loss, accuracy info
    total_loss    = 0
    total_correct = 0
    total         = 0
    with torch.no_grad():
      for batch_idx, (data, target) in enumerate(test_loader):
        if batch_idx*batch_size>= max_numtest:
           break
        else:
            data, target = data.to(device), target.to(device)
            output       = model(data)
            total_loss  += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss

            _, predicted  = torch.max(output, dim=1)
            total_correct += (predicted == target).sum().item()
            total         += output.shape[0]
           
    acc       = total_correct/total
    test_loss = total_loss/total 
    print('INFO evaluation acc:',f'{acc:.4}',' loss:',f'{test_loss:.4}','tot:',total)
    return acc,test_loss
def get_activation(name, activation):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
    
print('Train,test, support functions defined ')

# -------------------------------------------------
#  Get device  
#  (note, this is set up for 1 GPU device
#    if this were to run on a full GPU node with >1 gpu device, you would
#     want to get rank, world size info and set device id 
#     as in:   torch.cuda.set_device(local_rank) 
#     and then also run distributed initialization )
# -------------------------------------------------
use_cuda = torch.cuda.is_available() 
if use_cuda:
        num_gpu = torch.cuda.device_count()
        print('INFO,  cuda, num gpu:',num_gpu)
        device     = torch.cuda.current_device()
        print('environ visdevs:',os.environ["CUDA_VISIBLE_DEVICES"])
else:
        num_gpu = 0
        print('INFO, cuda not available')
        device  = torch.device("cpu")   
print('INFO, device is:', device)

# -------------------------------------------
#prepare images for network as they are loaded
#   crop or other functions can be added here
# -------------------------------------------
transform=transforms.Compose([
        transforms.ToTensor(),  #])  #also transforms image pixels to 0,1 range from 0,255
        transforms.Normalize((0.1307,), (0.3081,))
        ])

dataset1 = datasets.MNIST(data_path, train=True, download=True,transform=transform)
dataset2 = datasets.MNIST(data_path, train=False,download=True,transform=transform)

train_loader =torch.utils.data.DataLoader(dataset1, 
            batch_size =batch_size,     sampler   =None,
            num_workers=num_worker2use, pin_memory=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset2, 
            batch_size =batch_size,     sampler   =None,
            num_workers=num_worker2use, pin_memory=True, drop_last=True)

# -------------------------------------------
#  Set up model
# -------------------------------------------
mymodel = MyNet().to(device)

#summary(mymodel,input_size=(1, 1, 28, 28))

# -------------------------------------------
#  Do training loop
# -------------------------------------------

# Dictionary to store activations
activations = {}
# Register hooks
mymodel.conv1.register_forward_hook(get_activation('conv1', activations))

optimizer = torch.optim.Adam(mymodel.parameters(), lr=lrate)

train_results = []
test_results  = []
for epoch in range(epochs):
        print('INFO about to train epoch:',epoch)
        start_time=time.time()
        train(mymodel, device, train_loader, optimizer, epoch)
        print('INFO training time:',str.format('{0:.5f}', time.time()-start_time))
        print('INFO train metrics for epoch:',epoch)
        train_results.append(test(mymodel, device, train_loader))
        print('INFO test metrices for epoch:',epoch)
        test_results.append(test(mymodel, device, test_loader))

print('INFO  done');

#reshape results
train_results=np.array(train_results)
test_results =np.array(test_results)

# plot accuracy over epochs

import matplotlib.pyplot as plt      #These provide matlab type of plotting functions
import matplotlib.image as mpimg
%matplotlib inline                   

plt.figure()
plt.axis([0 ,epochs, 0, 1])
plt.plot(train_results[:,0]) #0th col is accuracy, col 1 is loss
plt.plot(test_results[:,0])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Get some sample predictions
with torch.no_grad():
  for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
        output       = mymodel(data)
        _, predicted = torch.max(output, dim=1)
        break
output=output.cpu().numpy()
predicted=predicted.cpu().numpy()

#To view a sample image and predictions
import matplotlib.pyplot as plt      
import matplotlib.image as mpimg

#use 0 to 5, or 1 to 6, etc.. (but 5 at a time)
range_start=11
for i in range(range_start,range_start+5):
  #print('For example i:',i,' rawoutput:',np.round(output[i,:],1))
  print('For example i:',i,' predicted:',predicted[i])
  print('----------------------------------------------------')
for i in range(5):
    plt.subplot(1,5,i+1)
    plt.xticks([])
    plt.yticks([])
    tmpimg=np.squeeze(data[range_start+i,:,:,:].cpu()).reshape((28,28))
    plt.imshow(tmpimg,'gray')

# ------------ GET WEIGHTS From Convolution Layer and make mosaic image

#take weights from conv layer and detach from model, move to cpu (in case we are on gpu)
Wlist   =mymodel.conv1.weight.detach().cpu()  #returns array: numfilters,1,3,3 

W3Dchan     =np.squeeze(Wlist) #get the channels 
print("W3D shape:"+str(W3Dchan.shape))

#plot mosaic of filters of 
ncol =4
nrow =np.ceil(16/ncol).astype(int)   #assume 16 is number of filters
plt.figure()
for i in range(min(16,W3Dchan.shape[0])):
   plt.subplot(nrow,ncol,i+1)
   plt.imshow(W3Dchan[i],'gray')
   plt.axis('off')

plt.show()
print('done plotting weights mosaic')

#  ---------------- NOW Visualize the activations for the first training example --------
#   1. gather activations from the model layers
# -------------------------------------------------------------------------

with torch.no_grad():
  for batch_idx, (data, target) in enumerate(test_loader):
      break
#try different images by change 0:1 to  1:2, etc..      
test_img   = data[0:1,:,:,:].to(device)
model_pred = mymodel(test_img) #run model on 1 imput
conv1_act  = np.squeeze(activations['conv1'].detach().cpu())
print('activation array shape:',conv1_act.shape)
# 2.  Now output a mosaic of layer 1
ncol =4
nrow =np.ceil(16/ncol).astype(int)
plt.figure()
for i in range(min(conv1_act.shape[0],16)):  
   plt.subplot(nrow,ncol,i+1)
   plt.imshow(conv1_act[i,:,:],'gray')
   plt.axis('off')
#plt.savefig("test.png", bbox_inches='tight')
plt.show()
print('done plotting layer1 activation output mosaic')

#=========================

MNIST exercise (handwritten printed digits recognition tutorial)¶

Below is code to plot and see results¶