• No products in the cart.

203.5.12 Practice : Digit Recognizer

Implementing Neural Network on digit image data.

In previous section, we studied about Hidden Layers and Their Roles

As promised in the first post of the series we will build a Neural Network that will read the image of a digit and correctly identify the number.

LAB: Digit Recognizer

  • Take an image of a handwritten single digit, and determine what that digit is.
  • Normalized handwritten digits, automatically scanned from envelopes by the U.S. Postal Service. The original scanned digits are binary and of different sizes and orientations; the images here have been de slanted and size normalized, resultingin 16 x 16 grayscale images (Le Cun et al., 1990).
  • The data are in two zipped files, and each line consists of the digitid (0-9) followed by the 256 grayscale values.
  • Build a neural network model that can be used as the digit recognizer.
  • Use the test dataset to validate the true classification power of the model.
  • What is the final accuracy of the model?

Code: Digit Recognizer

#Importing test and training data
digits_train <- read.table("C:\\Amrita\\Datavedi\\Digit Recognizer\\USPS\\zip.train.txt", quote="\"", comment.char="")
digits_test <- read.table("C:\\Amrita\\Datavedi\\Digit Recognizer\\USPS\\zip.test.txt", quote="\"", comment.char="")

dim(digits_train)
## [1] 7291  257
col_names <- names(digits_train[,-1])
label_levels<-names(table(digits_train$V1))

#Lets see some images. 
for(i in 1:10)
{
data_row<-digits_train[i,-1]
pixels = matrix(as.numeric(data_row),16,16,byrow=TRUE)
image(pixels, axes = FALSE)
title(main = paste("Label is" , digits_train[i,1]), font.main = 4)
}

#####Creating multiple columns for multiple outputs
#####We need these variables while building the model
digit_labels<-data.frame(label=digits_train[,1])
for (i in 1:10)
  {
    digit_labels<-cbind(digit_labels, digit_labels$label==i-1)
    names(digit_labels)[i+1]<-paste("l",i-1,sep="")
    }

label_names<-names(digit_labels[,-1])

#Update the training dataset
digits_train1<-cbind(digits_train,digit_labels)
names(digits_train1)
##   [1] "V1"    "V2"    "V3"    "V4"    "V5"    "V6"    "V7"    "V8"   
##   [9] "V9"    "V10"   "V11"   "V12"   "V13"   "V14"   "V15"   "V16"  
##  [17] "V17"   "V18"   "V19"   "V20"   "V21"   "V22"   "V23"   "V24"  
##  [25] "V25"   "V26"   "V27"   "V28"   "V29"   "V30"   "V31"   "V32"  
##  [33] "V33"   "V34"   "V35"   "V36"   "V37"   "V38"   "V39"   "V40"  
##  [41] "V41"   "V42"   "V43"   "V44"   "V45"   "V46"   "V47"   "V48"  
##  [49] "V49"   "V50"   "V51"   "V52"   "V53"   "V54"   "V55"   "V56"  
##  [57] "V57"   "V58"   "V59"   "V60"   "V61"   "V62"   "V63"   "V64"  
##  [65] "V65"   "V66"   "V67"   "V68"   "V69"   "V70"   "V71"   "V72"  
##  [73] "V73"   "V74"   "V75"   "V76"   "V77"   "V78"   "V79"   "V80"  
##  [81] "V81"   "V82"   "V83"   "V84"   "V85"   "V86"   "V87"   "V88"  
##  [89] "V89"   "V90"   "V91"   "V92"   "V93"   "V94"   "V95"   "V96"  
##  [97] "V97"   "V98"   "V99"   "V100"  "V101"  "V102"  "V103"  "V104" 
## [105] "V105"  "V106"  "V107"  "V108"  "V109"  "V110"  "V111"  "V112" 
## [113] "V113"  "V114"  "V115"  "V116"  "V117"  "V118"  "V119"  "V120" 
## [121] "V121"  "V122"  "V123"  "V124"  "V125"  "V126"  "V127"  "V128" 
## [129] "V129"  "V130"  "V131"  "V132"  "V133"  "V134"  "V135"  "V136" 
## [137] "V137"  "V138"  "V139"  "V140"  "V141"  "V142"  "V143"  "V144" 
## [145] "V145"  "V146"  "V147"  "V148"  "V149"  "V150"  "V151"  "V152" 
## [153] "V153"  "V154"  "V155"  "V156"  "V157"  "V158"  "V159"  "V160" 
## [161] "V161"  "V162"  "V163"  "V164"  "V165"  "V166"  "V167"  "V168" 
## [169] "V169"  "V170"  "V171"  "V172"  "V173"  "V174"  "V175"  "V176" 
## [177] "V177"  "V178"  "V179"  "V180"  "V181"  "V182"  "V183"  "V184" 
## [185] "V185"  "V186"  "V187"  "V188"  "V189"  "V190"  "V191"  "V192" 
## [193] "V193"  "V194"  "V195"  "V196"  "V197"  "V198"  "V199"  "V200" 
## [201] "V201"  "V202"  "V203"  "V204"  "V205"  "V206"  "V207"  "V208" 
## [209] "V209"  "V210"  "V211"  "V212"  "V213"  "V214"  "V215"  "V216" 
## [217] "V217"  "V218"  "V219"  "V220"  "V221"  "V222"  "V223"  "V224" 
## [225] "V225"  "V226"  "V227"  "V228"  "V229"  "V230"  "V231"  "V232" 
## [233] "V233"  "V234"  "V235"  "V236"  "V237"  "V238"  "V239"  "V240" 
## [241] "V241"  "V242"  "V243"  "V244"  "V245"  "V246"  "V247"  "V248" 
## [249] "V249"  "V250"  "V251"  "V252"  "V253"  "V254"  "V255"  "V256" 
## [257] "V257"  "label" "l0"    "l1"    "l2"    "l3"    "l4"    "l5"   
## [265] "l6"    "l7"    "l8"    "l9"
#formula y~. doesn't work in neuralnet function
model_form <- as.formula(paste(paste(label_names, collapse = " + "), "~", paste(col_names, collapse = " + ")))

#Lets keep an eye on runtime
pc <- proc.time()

library(neuralnet)
Digit_model<-neuralnet(model_form, data=digits_train1, hidden=15,linear.output=FALSE)
summary(Digit_model)
##                     Length  Class      Mode    
## call                      5 -none-     call    
## response              72910 -none-     logical 
## covariate           1866496 -none-     numeric 
## model.list                2 -none-     list    
## err.fct                   1 -none-     function
## act.fct                   1 -none-     function
## linear.output             1 -none-     logical 
## data                    268 data.frame list    
## net.result                1 -none-     list    
## weights                   1 -none-     list    
## startweights              1 -none-     list    
## generalized.weights       1 -none-     list    
## result.matrix          4018 -none-     numeric
proc.time() - pc
##    user  system elapsed 
##  133.61    0.80  138.75
#######Prediction  on holdout data
test_predicted<-data.frame(compute(Digit_model,digits_test[,-1])$net.result)

########Collating all labels into a single column
pred_label<-0
for(i in 1:nrow(test_predicted))
  {
    pred_label[i]<-which.max(apply(test_predicted[i,],MARGIN=2,min))-1
    }   
test_predicted$pred_label<-pred_label

###Confusion Matrix and Accuracy
library(caret)
## Loading required package: lattice
confuse<-confusionMatrix(test_predicted$pred_label,digits_test$V1)
confuse
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1   2   3   4   5   6   7   8   9
##          0 345   0   5   6   3   6   0   0   3   0
##          1   0 248   0   0   1   0   0   0   1   0
##          2   5   2 170   8   9   5   4   1   3   0
##          3   1   2   1 129   0   7   0   0   3   0
##          4   2   5  12   1 166   3   4   5   5   5
##          5   1   1   0  16   2 132   5   0   5   2
##          6   3   5   3   0   2   1 156   0   0   0
##          7   0   1   1   0   3   0   0 133   2   4
##          8   1   0   6   5   3   4   1   2 143   2
##          9   1   0   0   1  11   2   0   6   1 164
## 
## Overall Statistics
##                                                   
##                Accuracy : 0.8898854               
##                  95% CI : (0.8753677, 0.9032526)  
##     No Information Rate : 0.1788739               
##     P-Value [Acc > NIR] : < 0.00000000000000022204
##                                                   
##                   Kappa : 0.8763343               
##  Mcnemar's Test P-Value : NA                      
## 
## Statistics by Class:
## 
##                       Class: 0  Class: 1   Class: 2   Class: 3   Class: 4
## Sensitivity          0.9610028 0.9393939 0.85858586 0.77710843 0.83000000
## Specificity          0.9860437 0.9988526 0.97954671 0.99239544 0.97675706
## Pos Pred Value       0.9375000 0.9920000 0.82125604 0.90209790 0.79807692
## Neg Pred Value       0.9914582 0.9908936 0.98444444 0.98015021 0.98110061
## Prevalence           0.1788739 0.1315396 0.09865471 0.08271051 0.09965122
## Detection Rate       0.1718984 0.1235675 0.08470354 0.06427504 0.08271051
## Detection Prevalence 0.1833582 0.1245640 0.10313901 0.07125062 0.10363727
## Balanced Accuracy    0.9735232 0.9691232 0.91906628 0.88475194 0.90337853
##                        Class: 5   Class: 6   Class: 7   Class: 8
## Sensitivity          0.82500000 0.91764706 0.90476190 0.86144578
## Specificity          0.98267461 0.99237888 0.99408602 0.98696361
## Pos Pred Value       0.80487805 0.91764706 0.92361111 0.85628743
## Neg Pred Value       0.98480738 0.99237888 0.99248524 0.98750000
## Prevalence           0.07972098 0.08470354 0.07324365 0.08271051
## Detection Rate       0.06576981 0.07772795 0.06626806 0.07125062
## Detection Prevalence 0.08171400 0.08470354 0.07174888 0.08320877
## Balanced Accuracy    0.90383730 0.95501297 0.94942396 0.92420469
##                        Class: 9
## Sensitivity          0.92655367
## Specificity          0.98797814
## Pos Pred Value       0.88172043
## Neg Pred Value       0.99286107
## Prevalence           0.08819133
## Detection Rate       0.08171400
## Detection Prevalence 0.09267564
## Balanced Accuracy    0.95726591
confuse$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.8898854011   0.8763342842   0.8753676969   0.9032525591   0.1788739412 
## AccuracyPValue  McnemarPValue 
##   0.0000000000            NaN

Statinfer

Statinfer derived from Statistical inference. We provide training in various Data Analytics and Data Science courses and assist candidates in securing placements.

Contact Us

info@statinfer.com

+91- 9676098897

+91- 9494762485

 

Our Social Links

top
© 2020. All Rights Reserved.