MNIST exercise (handwritten printed digits recognition tutorial)¶
Goal: Introduction convolution feature maps, and features
Exercise:
Run the notebook, observe the images of filter weights and activations (at end)
Try changing the filter size for the first convolution layer to something large (like 9x9 or 16x16) How does that change the images of filter weights and activations?
Question to consider: for 10 digits what is min number of filters needed?
In [ ]:
# ----------- IMPORT STATEMENTS ---------------
import argparse
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
import os
import sys
import numpy as np
import time
#---------------------------------------------
print('import done')
In [ ]:
# -----------------------------------
#Parameters for training
# -----------------------------------
num_worker2use = 4 #for parallel reading/prefetching of data (for bigger data)
batch_size = 256
max_numtrain = 1024 #for this exercise, train on limited num of input, to save time
max_numtest = batch_size # and test on limited num of input
epochs = 10
lrate = 0.01
numfilt = 16 #Try 8 or 24? or a mininumal number like 2?
# --------------------------------------------------------------
# NOTE 3x3 kernel leaves 12x12 after maxpooling, so use 12 for reduced_size
# 16x16 leaves 6x6
# 9x9 leaves 9x9
# --------------------------------------------------------------
kernel_size2use= 3 #Try 9 or even 16,
reduced_size = 12 # also, see the note below in fwd method for 'MyNet' class
# --------------------------------------------------------------
data_path = './data'
torch.manual_seed(777)
In [ ]:
# -------------------------------------------------------------
# Define network class object and its
# initialization and forward function
# (other functions are inherited from torch.nn)
# -------------------------------------------------------------
class MyNet(torch.nn.Module):
def __init__(self):
super(MyNet, self).__init__()
#Conv: input size 1 channel, output is number of filters, the
# actual batch of input is implicit
# see: https://docs.pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
self.conv1 = torch.nn.Conv2d(in_channels=1,out_channels=numfilt,kernel_size=kernel_size2use,stride=1)
self.linear1 = torch.nn.Linear(numfilt*reduced_size*reduced_size,16) #after max pooling it wil lbe 12 x12
self.linear2 = torch.nn.Linear(16, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
#Uncomment this to see what the size actually is after max pooling
#print('MYINFO fwd, after conv1relu, x shape:',x.shape)
x = F.max_pool2d(x, 3, 2)
# <<<<<<<<<<<<<<<<<--------------------
#Uncomment this to see what the size actually is after max pooling
#print('MYINFO fwd, after max, x shape:',x.shape)
x = torch.flatten(x, 1)
x = self.linear1(x)
x = F.relu(x)
x = self.linear2(x)
#not sure i need this x = F.relu(x)
output = F.log_softmax(x, dim=1) #log softmax for classfcnt or binary?
return output
print('Net class defined ')
In [ ]:
# --------------------------------------------------------
# Define training function
# --------------------------------------------------------
def train(model, device, train_loader, optimizer, epoch):
''' This is called for each epoch.
Arguments: the model, the device to run on, data loader, optimizer, and current epoch
'''
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if batch_idx*batch_size>= max_numtrain:
break
else:
if batch_idx==0: #print one message
print('INFO train, ep:',epoch,' batidx:',batch_idx, ' batch size:',target.shape[0])
data, target = data.to(device), target.to(device)
optimizer.zero_grad() #reset optimizer state
output = model(data) #get predictions
loss = F.nll_loss(output, target) #get loss (nll_loss for softmax outputs)
loss.backward() #backprop loss
optimizer.step() #update weights
# -------------------------------------------------------------
# Define test function
# -------------------------------------------------------------
def test(model, device, test_loader):
''' This is called for after training each epoch
Arguments: the model, the device to run on, test data loader
'''
model.eval()
#accumulate loss, accuracy info
total_loss = 0
total_correct = 0
total = 0
with torch.no_grad():
for batch_idx, (data, target) in enumerate(test_loader):
if batch_idx*batch_size>= max_numtest:
break
else:
data, target = data.to(device), target.to(device)
output = model(data)
total_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
_, predicted = torch.max(output, dim=1)
total_correct += (predicted == target).sum().item()
total += output.shape[0]
acc = total_correct/total
test_loss = total_loss/total
print('INFO evaluation acc:',f'{acc:.4}',' loss:',f'{test_loss:.4}','tot:',total)
return acc,test_loss
def get_activation(name, activation):
def hook(model, input, output):
activation[name] = output.detach()
return hook
print('Train,test, support functions defined ')
In [ ]:
# -------------------------------------------------
# Get device
# (note, this is set up for 1 GPU device
# if this were to run on a full GPU node with >1 gpu device, you would
# want to get rank, world size info and set device id
# as in: torch.cuda.set_device(local_rank)
# and then also run distributed initialization )
# -------------------------------------------------
use_cuda = torch.cuda.is_available()
if use_cuda:
num_gpu = torch.cuda.device_count()
print('INFO, cuda, num gpu:',num_gpu)
device = torch.cuda.current_device()
print('environ visdevs:',os.environ["CUDA_VISIBLE_DEVICES"])
else:
num_gpu = 0
print('INFO, cuda not available')
device = torch.device("cpu")
print('INFO, device is:', device)
In [ ]:
# -------------------------------------------
#prepare images for network as they are loaded
# crop or other functions can be added here
# -------------------------------------------
transform=transforms.Compose([
transforms.ToTensor(), #]) #also transforms image pixels to 0,1 range from 0,255
transforms.Normalize((0.1307,), (0.3081,))
])
dataset1 = datasets.MNIST(data_path, train=True, download=True,transform=transform)
dataset2 = datasets.MNIST(data_path, train=False,download=True,transform=transform)
train_loader =torch.utils.data.DataLoader(dataset1,
batch_size =batch_size, sampler =None,
num_workers=num_worker2use, pin_memory=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset2,
batch_size =batch_size, sampler =None,
num_workers=num_worker2use, pin_memory=True, drop_last=True)
In [ ]:
# -------------------------------------------
# Set up model
# -------------------------------------------
mymodel = MyNet().to(device)
#summary(mymodel,input_size=(1, 1, 28, 28))
In [ ]:
# -------------------------------------------
# Do training loop
# -------------------------------------------
# Dictionary to store activations
activations = {}
# Register hooks
mymodel.conv1.register_forward_hook(get_activation('conv1', activations))
optimizer = torch.optim.Adam(mymodel.parameters(), lr=lrate)
train_results = []
test_results = []
for epoch in range(epochs):
print('INFO about to train epoch:',epoch)
start_time=time.time()
train(mymodel, device, train_loader, optimizer, epoch)
print('INFO training time:',str.format('{0:.5f}', time.time()-start_time))
print('INFO train metrics for epoch:',epoch)
train_results.append(test(mymodel, device, train_loader))
print('INFO test metrices for epoch:',epoch)
test_results.append(test(mymodel, device, test_loader))
print('INFO done');
In [ ]:
Below is code to plot and see results¶
In [ ]:
#reshape results
train_results=np.array(train_results)
test_results =np.array(test_results)
In [ ]:
# plot accuracy over epochs
import matplotlib.pyplot as plt #These provide matlab type of plotting functions
import matplotlib.image as mpimg
%matplotlib inline
plt.figure()
plt.axis([0 ,epochs, 0, 1])
plt.plot(train_results[:,0]) #0th col is accuracy, col 1 is loss
plt.plot(test_results[:,0])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
In [ ]:
# Get some sample predictions
with torch.no_grad():
for batch_idx, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device)
output = mymodel(data)
_, predicted = torch.max(output, dim=1)
break
output=output.cpu().numpy()
predicted=predicted.cpu().numpy()
In [ ]:
#To view a sample image and predictions
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
#use 0 to 5, or 1 to 6, etc.. (but 5 at a time)
range_start=11
for i in range(range_start,range_start+5):
#print('For example i:',i,' rawoutput:',np.round(output[i,:],1))
print('For example i:',i,' predicted:',predicted[i])
print('----------------------------------------------------')
for i in range(5):
plt.subplot(1,5,i+1)
plt.xticks([])
plt.yticks([])
tmpimg=np.squeeze(data[range_start+i,:,:,:].cpu()).reshape((28,28))
plt.imshow(tmpimg,'gray')
In [ ]:
In [ ]:
In [ ]:
# ------------ GET WEIGHTS From Convolution Layer and make mosaic image
#take weights from conv layer and detach from model, move to cpu (in case we are on gpu)
Wlist =mymodel.conv1.weight.detach().cpu() #returns array: numfilters,1,3,3
W3Dchan =np.squeeze(Wlist) #get the channels
print("W3D shape:"+str(W3Dchan.shape))
#plot mosaic of filters of
ncol =4
nrow =np.ceil(16/ncol).astype(int) #assume 16 is number of filters
plt.figure()
for i in range(min(16,W3Dchan.shape[0])):
plt.subplot(nrow,ncol,i+1)
plt.imshow(W3Dchan[i],'gray')
plt.axis('off')
plt.show()
print('done plotting weights mosaic')
In [ ]:
# ---------------- NOW Visualize the activations for the first training example --------
# 1. gather activations from the model layers
# -------------------------------------------------------------------------
with torch.no_grad():
for batch_idx, (data, target) in enumerate(test_loader):
break
#try different images by change 0:1 to 1:2, etc..
test_img = data[0:1,:,:,:].to(device)
model_pred = mymodel(test_img) #run model on 1 imput
conv1_act = np.squeeze(activations['conv1'].detach().cpu())
print('activation array shape:',conv1_act.shape)
# 2. Now output a mosaic of layer 1
ncol =4
nrow =np.ceil(16/ncol).astype(int)
plt.figure()
for i in range(min(conv1_act.shape[0],16)):
plt.subplot(nrow,ncol,i+1)
plt.imshow(conv1_act[i,:,:],'gray')
plt.axis('off')
#plt.savefig("test.png", bbox_inches='tight')
plt.show()
print('done plotting layer1 activation output mosaic')
In [ ]:
In [ ]:
#=========================
In [ ]: