Convolutional Neural Networks:

# Clean workspace
# Installation of mxnet library
#install.packages("drat", repos="http://cran.rstudio.com")

cran <- getOption("repos")
cran["dmlc"] <- "https://s3.amazonaws.com/mxnet-r/"
options(repos = cran)
# Load MXNet

# Loading data and set up

# Load train and test datasets
train <- read.csv("train_sample.csv")
test <- read.csv("test_sample.csv")

###########################################  Viewing the Images ########################
train_mat <- as.matrix(train)
## Color ramp def.
colors <- c('white','black')
cus_col <- colorRampPalette(colors=colors)

## Plot the first 12 images
sm = sample(nrow(train_mat), 12)
for(di in sm)
  z <- array(train_mat[di,-1],dim=c(28,28))
  z <- z[,28:1] ##right side up
  z <- matrix(as.numeric(z), 28, 28)
###########################################  Dat Preprocessing in required format for mxnet library ###########
# In our actual data, first column is target attribute and other columns represent the attributes 
# each row represents a record/sample/imge
# In mxnet library each column represents the record and each row reprsents an attribute and hence
# taking transpose of our actual data to make it compatible to the mxnet.
# Actually each row represents an image - it is in a vector format.  We need to convert this into 2D matrix
# to represent the image  28 * 28 matrix - 784 values represent each image
# first column is the target attribute and hence remove this and remaining 784 cells will become the data for each imge
# train_x is the transpose of train data .
# in train_x, columns are the number of images/samples.  similaryly test_x

# Set up train and test datasets
train <- data.matrix(train)
train_x <- t(train[, -1])
train_y <- train[, 1]
train_array <- train_x
dim(train_array) <- c(28, 28, 1, ncol(train_x))

test_x <- t(test[, -1])
test_y <- test[, 1]
test_array <- test_x
dim(test_array) <- c(28, 28, 1, ncol(test_x))

# Set up the symbolic model
# creating the variable data in mxnet format
data <- mx.symbol.Variable('data')

# kernel represnets the size of kernel and num_filter represents the number of kernels
# act_type is the activation function
# stride is the size with which the kernel shape (to shift on data matrix)

# 1st convolutional layer  5x5 kernel and 16 filters
conv_1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 16)
relu_1 <- mx.symbol.Activation(data = conv_1, act_type = "relu")
pool_1 <- mx.symbol.Pooling(data = relu_1, pool_type = "max", kernel = c(2, 2), 
                            stride = c(2, 2))

# 2nd convolutional layer 5x5 kernel and 32 filters
conv_2 <- mx.symbol.Convolution(data = pool_1, kernel = c(5, 5), num_filter = 32)
relu_2 <- mx.symbol.Activation(data = conv_2, act_type = "relu")
pool_2 <- mx.symbol.Pooling(data=relu_2, pool_type = "max", kernel = c(2, 2), 
                            stride = c(2, 2))

# 1st fully connected layer (Input & Hidden layers)
flatten <- mx.symbol.Flatten(data = pool_2)
fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 128)
relu_3 <- mx.symbol.Activation(data = fc_1, act_type = "relu")

# 2nd fully connected layer(Hidden & Output layers)  
# num_hidden here is the number of target class levels
fc_2 <- mx.symbol.FullyConnected(data = relu_3, num_hidden = 10)
# Output. Softmax output since we'd like to get some probabilities.
NN_model <- mx.symbol.SoftmaxOutput(data = fc_2)

# Pre-training set up

# Set seed for reproducibility

# Create a mxnet CPU context. - Device used. CPU in this case, but not the GPU 
devices <- mx.cpu()

# Training

# Train the model
model <- mx.model.FeedForward.create(NN_model,
                                     X = train_array,
                                     y = train_y,
                                     ctx = devices,
                                     num.round = 20, # number of epochs
                                     array.batch.size = 40,
                                     learning.rate = 0.01,
                                     momentum = 0.5,
                                     eval.metric = mx.metric.accuracy,
                                     epoch.end.callback = mx.callback.log.train.metric(100))  

# Testing

# Predict labels
predicted <- predict(model, test_array)
# Assign labels
predicted_labels <- max.col(t(predicted)) - 1
# Get accuracy
(sum(diag(table(test[, 1], predicted_labels)))/1000) * 100