R

Convolutional Neural Networks:

# Clean workspace
rm(list=ls())
setwd("D:/MyOne/R/R programes/ppts/cse 7219/cnn")

# Installation of mxnet library
#install.packages("drat", repos="http://cran.rstudio.com")
#drat:::addRepo("dmlc")
#install.packages("mxnet")



cran <- getOption("repos")
cran["dmlc"] <- "https://s3.amazonaws.com/mxnet-r/"
options(repos = cran)
#install.packages("mxnet")
# Load MXNet
require(mxnet)
library(mxnet)

# Loading data and set up

# Load train and test datasets
train <- read.csv("train_sample.csv")
test <- read.csv("test_sample.csv")

###########################################
###########################################  Viewing the Images ########################
train_mat <- as.matrix(train)
# inc <- which(results_test[,3] == 0)

## Color ramp def.
colors <- c('white','black')
cus_col <- colorRampPalette(colors=colors)

## Plot the first 12 images
par(mfrow=c(4,3),pty='s',mar=c(1,1,1,1),xaxt='n',yaxt='n')
sm = sample(nrow(train_mat), 12)
for(di in sm)
{
  print(di)
  #   all_img[di+1,] <- apply(train[train[,785]==di,-785],2,sum)
  #   all_img[di+1,] <- all_img[di+1,]/max(all_img[di+1,])*255
  
  z <- array(train_mat[di,-1],dim=c(28,28))
  z <- z[,28:1] ##right side up
  z <- matrix(as.numeric(z), 28, 28)
  image(1:28,1:28,z,main=train_mat[di,1],col=cus_col(256))
}
##############################################################################
###########################################  Dat Preprocessing in required format for mxnet library ###########
# In our actual data, first column is target attribute and other columns represent the attributes 
# each row represents a record/sample/imge
# In mxnet library each column represents the record and each row reprsents an attribute and hence
# taking transpose of our actual data to make it compatible to the mxnet.
# Actually each row represents an image - it is in a vector format.  We need to convert this into 2D matrix
# to represent the image  28 * 28 matrix - 784 values represent each image
# first column is the target attribute and hence remove this and remaining 784 cells will become the data for each imge
# train_x is the transpose of train data .
# in train_x, columns are the number of images/samples.  similaryly test_x
##############################################################################

# Set up train and test datasets
train <- data.matrix(train)
train_x <- t(train[, -1])
train_y <- train[, 1]
train_array <- train_x
dim(train_array) <- c(28, 28, 1, ncol(train_x))

test_x <- t(test[, -1])
test_y <- test[, 1]
test_array <- test_x
dim(test_array) <- c(28, 28, 1, ncol(test_x))

# Set up the symbolic model
# creating the variable data in mxnet format
data <- mx.symbol.Variable('data')

# kernel represnets the size of kernel and num_filter represents the number of kernels
# act_type is the activation function
# stride is the size with which the kernel shape (to shift on data matrix)

# 1st convolutional layer  5x5 kernel and 16 filters
conv_1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 16)
relu_1 <- mx.symbol.Activation(data = conv_1, act_type = "relu")
pool_1 <- mx.symbol.Pooling(data = relu_1, pool_type = "max", kernel = c(2, 2), 
                            stride = c(2, 2))


# 2nd convolutional layer 5x5 kernel and 32 filters
conv_2 <- mx.symbol.Convolution(data = pool_1, kernel = c(5, 5), num_filter = 32)
relu_2 <- mx.symbol.Activation(data = conv_2, act_type = "relu")
pool_2 <- mx.symbol.Pooling(data=relu_2, pool_type = "max", kernel = c(2, 2), 
                            stride = c(2, 2))


# 1st fully connected layer (Input & Hidden layers)
flatten <- mx.symbol.Flatten(data = pool_2)
fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 128)
relu_3 <- mx.symbol.Activation(data = fc_1, act_type = "relu")


# 2nd fully connected layer(Hidden & Output layers)  
# num_hidden here is the number of target class levels
fc_2 <- mx.symbol.FullyConnected(data = relu_3, num_hidden = 10)
# Output. Softmax output since we'd like to get some probabilities.
NN_model <- mx.symbol.SoftmaxOutput(data = fc_2)

# Pre-training set up

# Set seed for reproducibility
mx.set.seed(100)

# Create a mxnet CPU context. - Device used. CPU in this case, but not the GPU 
devices <- mx.cpu()

# Training


# Train the model
model <- mx.model.FeedForward.create(NN_model,
                                     X = train_array,
                                     y = train_y,
                                     ctx = devices,
                                     num.round = 20, # number of epochs
                                     array.batch.size = 40,
                                     learning.rate = 0.01,
                                     momentum = 0.5,
                                     eval.metric = mx.metric.accuracy,
                                     epoch.end.callback = mx.callback.log.train.metric(100))  

# Testing

# Predict labels
predicted <- predict(model, test_array)
# Assign labels
predicted_labels <- max.col(t(predicted)) - 1
# Get accuracy
(sum(diag(table(test[, 1], predicted_labels)))/1000) * 100