Convolutional Neural Network for Image Classification


Bikash Santra

Indian Statistical Institute, Kolkata


Importing Libraries

In [1]:
# Pytorch libraries
import torch
import torchvision
import torchvision.transforms as transforms

# For displaying images and numpy operations
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# For CNN Purpose
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

# Loss function and optimizer
import torch.optim as optim

import scipy.io

Choosing CUDA Device (GPU) and Clearing GPU Cache Memory

In [2]:
torch.cuda.set_device(1)
torch.cuda.empty_cache()

Downloading and Loading MNIST Dataset

In [3]:
# Dataloader

# For fully connected network 20*20
transform = transforms.Compose(
    [transforms.Resize(24),
     transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=4)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=4)

mnistClasses = ('zero', 'one', 'two', 'three',
           'four', 'five', 'six', 'seven', 'eight', 'nine')

Visualizing a Mini-batch of Train Images of the Dataset

In [4]:
# Visualizing the train images
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.5])
    std = np.array([0.5])
    inp = std * inp + mean
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(trainloader))
print(inputs.shape)

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[mnistClasses[x] for x in classes])
torch.Size([32, 1, 24, 24])

Defining a Convolutional Connected Neural Network

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 1)
        self.fc1 = nn.Linear(64 * 6 * 6, 150)
        self.relu3 = nn.ReLU()
        self.drop1 = nn.Dropout()
        self.fc2 = nn.Linear(150, 10)
        self.sm = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.view(-1, 64 * 6 * 6)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.sm(x) 
        return x

Creating and Initialiizing the Network with Random Weights

In [6]:
# Network object Creation    
net = Net()
print(net)
Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2304, out_features=150, bias=True)
  (relu3): ReLU()
  (drop1): Dropout(p=0.5)
  (fc2): Linear(in_features=150, out_features=10, bias=True)
  (sm): LogSoftmax()
)

Sending Network to the GPU

In [7]:
# Using GPU, if available
use_gpu = torch.cuda.is_available()
print(use_gpu)

if use_gpu:
    net.cuda()
True

Regressing a Mini-batch of Train Images using Randomly Initialized Network

In [8]:
inputs, classes = next(iter(trainloader))
inputs = inputs.cuda()
out = net(inputs)
print(out)
tensor([[-2.2765, -2.2695, -2.3661, -2.2173, -2.3151, -2.3953, -2.4678, -2.2085,
         -2.3569, -2.1895],
        [-2.2875, -2.3231, -2.3517, -2.3194, -2.2954, -2.3638, -2.3051, -2.2589,
         -2.2907, -2.2371],
        [-2.3489, -2.2069, -2.3084, -2.2622, -2.2699, -2.3102, -2.4540, -2.3609,
         -2.3029, -2.2244],
        [-2.3623, -2.2862, -2.3678, -2.2708, -2.3366, -2.2798, -2.3414, -2.2262,
         -2.3089, -2.2561],
        [-2.3505, -2.3090, -2.3920, -2.2193, -2.3096, -2.3890, -2.3968, -2.2172,
         -2.3104, -2.1627],
        [-2.4590, -2.3148, -2.2735, -2.1548, -2.1832, -2.3486, -2.4776, -2.3030,
         -2.2818, -2.2768],
        [-2.3469, -2.3717, -2.3333, -2.2891, -2.2975, -2.3505, -2.3602, -2.1284,
         -2.3319, -2.2415],
        [-2.2852, -2.2527, -2.3404, -2.4050, -2.2660, -2.3554, -2.4256, -2.2654,
         -2.2986, -2.1595],
        [-2.3333, -2.2786, -2.3478, -2.3044, -2.3502, -2.2860, -2.3782, -2.2463,
         -2.3041, -2.2089],
        [-2.3748, -2.3005, -2.3170, -2.3793, -2.2638, -2.3560, -2.3649, -2.2526,
         -2.2566, -2.1801],
        [-2.3687, -2.2484, -2.2797, -2.2010, -2.3374, -2.3186, -2.3345, -2.3839,
         -2.2676, -2.3002],
        [-2.3335, -2.2338, -2.3995, -2.1612, -2.2927, -2.4213, -2.3888, -2.2622,
         -2.3143, -2.2492],
        [-2.4098, -2.3392, -2.3427, -2.1676, -2.1781, -2.3439, -2.3494, -2.3619,
         -2.2915, -2.2705],
        [-2.4344, -2.3099, -2.4172, -2.1451, -2.2380, -2.3447, -2.3433, -2.2590,
         -2.2771, -2.2900],
        [-2.2337, -2.3463, -2.3472, -2.2563, -2.1977, -2.3724, -2.4494, -2.3189,
         -2.3118, -2.2197],
        [-2.3447, -2.2914, -2.3329, -2.2278, -2.2171, -2.3684, -2.3896, -2.2660,
         -2.3505, -2.2542],
        [-2.3454, -2.2930, -2.4245, -2.1964, -2.3042, -2.3122, -2.3685, -2.2415,
         -2.2952, -2.2636],
        [-2.4398, -2.2253, -2.3031, -2.3610, -2.2634, -2.3735, -2.3264, -2.3163,
         -2.3040, -2.1438],
        [-2.3815, -2.2893, -2.4340, -2.2384, -2.2846, -2.3223, -2.3764, -2.2878,
         -2.2862, -2.1536],
        [-2.3322, -2.3547, -2.4253, -2.2624, -2.3615, -2.3000, -2.3883, -2.2403,
         -2.2980, -2.1018],
        [-2.3434, -2.3048, -2.3860, -2.2443, -2.1948, -2.3291, -2.2929, -2.2930,
         -2.3660, -2.2861],
        [-2.3435, -2.2867, -2.2729, -2.2776, -2.2761, -2.3740, -2.4012, -2.2726,
         -2.2320, -2.3016],
        [-2.4025, -2.2457, -2.3519, -2.1833, -2.3124, -2.3632, -2.4270, -2.2119,
         -2.2644, -2.2928],
        [-2.3526, -2.2593, -2.2710, -2.2550, -2.2797, -2.3555, -2.3072, -2.3040,
         -2.3162, -2.3312],
        [-2.3796, -2.2747, -2.4133, -2.2172, -2.2463, -2.4052, -2.3379, -2.2720,
         -2.3566, -2.1564],
        [-2.3246, -2.2742, -2.2760, -2.3343, -2.3139, -2.3220, -2.4736, -2.2673,
         -2.2751, -2.1882],
        [-2.3184, -2.3037, -2.3060, -2.2492, -2.2155, -2.2858, -2.4667, -2.3129,
         -2.3258, -2.2615],
        [-2.3542, -2.2874, -2.3847, -2.2533, -2.1396, -2.3940, -2.4305, -2.2568,
         -2.3519, -2.2118],
        [-2.3198, -2.2598, -2.2830, -2.3213, -2.2301, -2.3597, -2.4207, -2.2758,
         -2.3436, -2.2284],
        [-2.3135, -2.2708, -2.3421, -2.2587, -2.2076, -2.3044, -2.3968, -2.3533,
         -2.3731, -2.2237],
        [-2.3188, -2.3412, -2.3809, -2.2501, -2.2226, -2.3312, -2.4829, -2.2330,
         -2.2943, -2.2027],
        [-2.3213, -2.2813, -2.4190, -2.2519, -2.2969, -2.3864, -2.4487, -2.2314,
         -2.2983, -2.1307]], device='cuda:1', grad_fn=<LogSoftmaxBackward>)

Defining Loss Function and Optimizer

In [9]:
# Let’s use a Classification Cross-Entropy loss and SGD with momentum
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

Training the Network

In [10]:
# This is when things start to get interesting. 
# We simply have to loop over our data iterator, and feed the inputs to the network and optimize
net.train(True)

epochs = 10
lossInEpochs = np.array([])
xaxis = range(epochs)

for epoch in range(epochs):  # loop over the dataset multiple times
    
    running_loss = 0.0
    total_loss = 0.0
    miniBatchEpoch = 0
    
    scheduler.step()
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        # wrap them in Variable
        if use_gpu:
            inputs, labels =inputs.cuda(), labels.cuda()

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        total_loss += loss.item()
        miniBatchEpoch += 1
        if i % 1000 == 999:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0
            
            for param_group in optimizer.param_groups:
                print(param_group['lr'])
            
    lossInEpochs = np.hstack([lossInEpochs, total_loss/miniBatchEpoch])
    
print('Finished Training')
[1,  1000] loss: 0.309
0.01
[2,  1000] loss: 0.066
0.01
[3,  1000] loss: 0.048
0.01
[4,  1000] loss: 0.035
0.01
[5,  1000] loss: 0.031
0.01
[6,  1000] loss: 0.018
0.001
[7,  1000] loss: 0.013
0.001
[8,  1000] loss: 0.013
0.001
[9,  1000] loss: 0.013
0.001
[10,  1000] loss: 0.011
0.001
Finished Training

Saving the Epoch Error

In [11]:
filePath = './epochWiseTrainError/epochError_CNN_MNIST.mat'
scipy.io.savemat(filePath, mdict={'lossInEpochs': lossInEpochs})

Saving the Trained Model

In [12]:
torch.save(net.state_dict(), './trainedModels/model_CNN_MNIST.pth')

Plotting Loss vs. Epoch

In [13]:
plt.plot(xaxis,lossInEpochs)
Out[13]:
[<matplotlib.lines.Line2D at 0x7ffac17904d0>]

Analyzing Performance

In [14]:
# Set model to evaluation mode
net.eval()
Out[14]:
Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2304, out_features=150, bias=True)
  (relu3): ReLU()
  (drop1): Dropout(p=0.5)
  (fc2): Linear(in_features=150, out_features=10, bias=True)
  (sm): LogSoftmax()
)

Dataset-wise Accuracy

In [15]:
correctR = np.array([0], dtype='float')
total = np.array([0], dtype='float')

for data in testloader:
    images, labels = data
    
    outputs = net(images.cuda())
    outputs = outputs.cpu()
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    predicted = predicted.data.cpu().numpy()
    labels = labels.data.numpy()
    correctR += (predicted == labels).sum()

print('correctR, totalAccuracy of the network on the 10000 test images: %.4f%%' % (
    100 * correctR / total))
correctR, totalAccuracy of the network on the 10000 test images: 99.4300%

Class-wise Accuracy

In [16]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
for data in testloader:
    images, labels = data
    
    outputs = net(images.cuda())
    outputs = outputs.cpu()
    _, predicted = torch.max(outputs.data, 1)
    predicted = predicted.data.cpu().numpy()
    labels = labels.data.numpy()
    c = (predicted == labels).squeeze()
    for i in range(4):
        label = labels[i]
        class_correct[label] += c[i]
        class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %.4f%%' % (
        mnistClasses[i], 100 * class_correct[i] / class_total[i]))
Accuracy of  zero : 100.0000%
Accuracy of   one : 98.5185%
Accuracy of   two : 99.2908%
Accuracy of three : 98.3871%
Accuracy of  four : 100.0000%
Accuracy of  five : 97.7528%
Accuracy of   six : 99.2308%
Accuracy of seven : 100.0000%
Accuracy of eight : 99.1150%
Accuracy of  nine : 100.0000%