Convolutional Neural Network (CNN) Prediction
Kyle Brewster
Introduction
Loading packages and data
pacman::p_load(tidyverse, tidymodels, data.table, patchwork,
keras, tensorflow)
set.seed(123)
train_df = fread("train.csv")
test_df = fread("test.csv")
Visualizing the Data
An example of what a single observation looks like from the data
ggplot(data = expand_grid(
y = 28:1,
x = 1:28
) %>% mutate(value = train_df[2, -"label"] %>% unlist()),
aes(x = x, y = y, fill = value)) + geom_raster() + coord_equal()
Note the slight differences in representation of the same digit
vec = c(2,5,6,18,24,55,64,70,99,109,111,112,115,130,142,147)
# Creating function to plot example of different unique digit values
gg = lapply(X = vec, FUN = function(i) {
ggplot(data = expand_grid(
y = 28:1,
x = 1:28
) %>% mutate(value = train_df[i,-"label"] %>% unlist()),
aes(x = x, y = y, fill = value)
) +
geom_raster() +
coord_equal()+
theme_void()+
theme(legend.position = "none")
})
# Grid of examples
(gg[[1]] + gg[[2]] + gg[[3]]) /
(gg[[5]] + gg[[6]] + gg[[7]]) /
(gg[[9]] + gg[[10]] + gg[[11]]) /
(gg[[13]] + gg[[14]] + gg[[15]])
An overview of the different values
gg = lapply(X = c(1,17,8,4,9,22,7,11,12), FUN = function(i) {
ggplot(data = expand_grid(
y = 28:1,
x = 1:28
) %>% mutate(value = train_df[i,-"label"] %>% unlist()),
aes(x = x, y = y, fill = value)
) +
geom_raster() +
coord_equal()+
theme_void()+
theme(legend.position = "none")
})
(gg[[1]] + gg[[2]] + gg[[3]]) /
(gg[[4]] + gg[[5]] + gg[[6]]) /
(gg[[7]] + gg[[8]] + gg[[9]])
Modeling
First to prepare the data for modeling
x.train = array_reshape(
as.matrix(train_df[,-1]), c(nrow(train_df), 784))
x.test = array_reshape(
as.matrix(test_df), c(nrow(test_df), 784))
# Dividing by range to scale values
x.train = x.train/max(x.train)
x.test = x.test/max(x.test)
y.train = to_categorical(train_df$label, 10)
## Loaded Tensorflow version 2.9.1
Creating our CNN model with keras
cnn_mod = keras_model_sequential() %>%
layer_dense(units = 128, activation = 'relu', input_shape=c(784)) %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 64, activation = 'relu') %>%
layer_dropout(rate = 0.01) %>%
layer_dense(units = 10, activation = 'softmax')
cnn_mod %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_adam(lr = 0.01, beta_1 = 0.9, beta_2 = 0.99),
metrics = c('accuracy', 'categorical_crossentropy'))
summary(cnn_mod)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## dense_2 (Dense) (None, 128) 100480
## dropout_1 (Dropout) (None, 128) 0
## dense_1 (Dense) (None, 64) 8256
## dropout (Dropout) (None, 64) 0
## dense (Dense) (None, 10) 650
## ================================================================================
## Total params: 109,386
## Trainable params: 109,386
## Non-trainable params: 0
## ________________________________________________________________________________
Results
Fitting model
fitted = cnn_mod %>%
fit(
x = x.train, y = y.train,
epochs = 250,
batch_size = 128,
validation_split = 0.2)
plot(fitted)
summary(fitted)
## Length Class Mode
## params 3 -none- list
## metrics 6 -none- list
Results of model accuracy
cnn_mod %>% evaluate(x.train, y.train)
## loss accuracy categorical_crossentropy
## 0.1209369 0.9927381 0.1209369