• No products in the cart.

203.5.12 Practice : Digit Recognizer

In previous section, we studied about Hidden Layers and Their Roles

As promised in the first post of the series we will build a Neural Network that will read the image of a digit and correctly identify the number.

LAB: Digit Recognizer

  • Take an image of a handwritten single digit, and determine what that digit is.
  • Normalized handwritten digits, automatically scanned from envelopes by the U.S. Postal Service. The original scanned digits are binary and of different sizes and orientations; the images here have been de slanted and size normalized, resultingin 16 x 16 grayscale images (Le Cun et al., 1990).
  • The data are in two zipped files, and each line consists of the digitid (0-9) followed by the 256 grayscale values.
  • Build a neural network model that can be used as the digit recognizer.
  • Use the test dataset to validate the true classification power of the model.
  • What is the final accuracy of the model?

Code: Digit Recognizer

#Importing test and training data
digits_train <- read.table("C:\\Amrita\\Datavedi\\Digit Recognizer\\USPS\\zip.train.txt", quote="\"", comment.char="")
digits_test <- read.table("C:\\Amrita\\Datavedi\\Digit Recognizer\\USPS\\zip.test.txt", quote="\"", comment.char="")

dim(digits_train)
## [1] 7291  257
col_names <- names(digits_train[,-1])
label_levels<-names(table(digits_train$V1))

#Lets see some images. 
for(i in 1:10)
{
data_row<-digits_train[i,-1]
pixels = matrix(as.numeric(data_row),16,16,byrow=TRUE)
image(pixels, axes = FALSE)
title(main = paste("Label is" , digits_train[i,1]), font.main = 4)
}

#####Creating multiple columns for multiple outputs
#####We need these variables while building the model
digit_labels<-data.frame(label=digits_train[,1])
for (i in 1:10)
  {
    digit_labels<-cbind(digit_labels, digit_labels$label==i-1)
    names(digit_labels)[i+1]<-paste("l",i-1,sep="")
    }

label_names<-names(digit_labels[,-1])

#Update the training dataset
digits_train1<-cbind(digits_train,digit_labels)
names(digits_train1)
##   [1] "V1"    "V2"    "V3"    "V4"    "V5"    "V6"    "V7"    "V8"   
##   [9] "V9"    "V10"   "V11"   "V12"   "V13"   "V14"   "V15"   "V16"  
##  [17] "V17"   "V18"   "V19"   "V20"   "V21"   "V22"   "V23"   "V24"  
##  [25] "V25"   "V26"   "V27"   "V28"   "V29"   "V30"   "V31"   "V32"  
##  [33] "V33"   "V34"   "V35"   "V36"   "V37"   "V38"   "V39"   "V40"  
##  [41] "V41"   "V42"   "V43"   "V44"   "V45"   "V46"   "V47"   "V48"  
##  [49] "V49"   "V50"   "V51"   "V52"   "V53"   "V54"   "V55"   "V56"  
##  [57] "V57"   "V58"   "V59"   "V60"   "V61"   "V62"   "V63"   "V64"  
##  [65] "V65"   "V66"   "V67"   "V68"   "V69"   "V70"   "V71"   "V72"  
##  [73] "V73"   "V74"   "V75"   "V76"   "V77"   "V78"   "V79"   "V80"  
##  [81] "V81"   "V82"   "V83"   "V84"   "V85"   "V86"   "V87"   "V88"  
##  [89] "V89"   "V90"   "V91"   "V92"   "V93"   "V94"   "V95"   "V96"  
##  [97] "V97"   "V98"   "V99"   "V100"  "V101"  "V102"  "V103"  "V104" 
## [105] "V105"  "V106"  "V107"  "V108"  "V109"  "V110"  "V111"  "V112" 
## [113] "V113"  "V114"  "V115"  "V116"  "V117"  "V118"  "V119"  "V120" 
## [121] "V121"  "V122"  "V123"  "V124"  "V125"  "V126"  "V127"  "V128" 
## [129] "V129"  "V130"  "V131"  "V132"  "V133"  "V134"  "V135"  "V136" 
## [137] "V137"  "V138"  "V139"  "V140"  "V141"  "V142"  "V143"  "V144" 
## [145] "V145"  "V146"  "V147"  "V148"  "V149"  "V150"  "V151"  "V152" 
## [153] "V153"  "V154"  "V155"  "V156"  "V157"  "V158"  "V159"  "V160" 
## [161] "V161"  "V162"  "V163"  "V164"  "V165"  "V166"  "V167"  "V168" 
## [169] "V169"  "V170"  "V171"  "V172"  "V173"  "V174"  "V175"  "V176" 
## [177] "V177"  "V178"  "V179"  "V180"  "V181"  "V182"  "V183"  "V184" 
## [185] "V185"  "V186"  "V187"  "V188"  "V189"  "V190"  "V191"  "V192" 
## [193] "V193"  "V194"  "V195"  "V196"  "V197"  "V198"  "V199"  "V200" 
## [201] "V201"  "V202"  "V203"  "V204"  "V205"  "V206"  "V207"  "V208" 
## [209] "V209"  "V210"  "V211"  "V212"  "V213"  "V214"  "V215"  "V216" 
## [217] "V217"  "V218"  "V219"  "V220"  "V221"  "V222"  "V223"  "V224" 
## [225] "V225"  "V226"  "V227"  "V228"  "V229"  "V230"  "V231"  "V232" 
## [233] "V233"  "V234"  "V235"  "V236"  "V237"  "V238"  "V239"  "V240" 
## [241] "V241"  "V242"  "V243"  "V244"  "V245"  "V246"  "V247"  "V248" 
## [249] "V249"  "V250"  "V251"  "V252"  "V253"  "V254"  "V255"  "V256" 
## [257] "V257"  "label" "l0"    "l1"    "l2"    "l3"    "l4"    "l5"   
## [265] "l6"    "l7"    "l8"    "l9"
#formula y~. doesn't work in neuralnet function
model_form <- as.formula(paste(paste(label_names, collapse = " + "), "~", paste(col_names, collapse = " + ")))

#Lets keep an eye on runtime
pc <- proc.time()

library(neuralnet)
Digit_model<-neuralnet(model_form, data=digits_train1, hidden=15,linear.output=FALSE)
summary(Digit_model)
##                     Length  Class      Mode    
## call                      5 -none-     call    
## response              72910 -none-     logical 
## covariate           1866496 -none-     numeric 
## model.list                2 -none-     list    
## err.fct                   1 -none-     function
## act.fct                   1 -none-     function
## linear.output             1 -none-     logical 
## data                    268 data.frame list    
## net.result                1 -none-     list    
## weights                   1 -none-     list    
## startweights              1 -none-     list    
## generalized.weights       1 -none-     list    
## result.matrix          4018 -none-     numeric
proc.time() - pc
##    user  system elapsed 
##  133.61    0.80  138.75
#######Prediction  on holdout data
test_predicted<-data.frame(compute(Digit_model,digits_test[,-1])$net.result)

########Collating all labels into a single column
pred_label<-0
for(i in 1:nrow(test_predicted))
  {
    pred_label[i]<-which.max(apply(test_predicted[i,],MARGIN=2,min))-1
    }   
test_predicted$pred_label<-pred_label

###Confusion Matrix and Accuracy
library(caret)
## Loading required package: lattice
confuse<-confusionMatrix(test_predicted$pred_label,digits_test$V1)
confuse
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1   2   3   4   5   6   7   8   9
##          0 345   0   5   6   3   6   0   0   3   0
##          1   0 248   0   0   1   0   0   0   1   0
##          2   5   2 170   8   9   5   4   1   3   0
##          3   1   2   1 129   0   7   0   0   3   0
##          4   2   5  12   1 166   3   4   5   5   5
##          5   1   1   0  16   2 132   5   0   5   2
##          6   3   5   3   0   2   1 156   0   0   0
##          7   0   1   1   0   3   0   0 133   2   4
##          8   1   0   6   5   3   4   1   2 143   2
##          9   1   0   0   1  11   2   0   6   1 164
## 
## Overall Statistics
##                                                   
##                Accuracy : 0.8898854               
##                  95% CI : (0.8753677, 0.9032526)  
##     No Information Rate : 0.1788739               
##     P-Value [Acc > NIR] : < 0.00000000000000022204
##                                                   
##                   Kappa : 0.8763343               
##  Mcnemar's Test P-Value : NA                      
## 
## Statistics by Class:
## 
##                       Class: 0  Class: 1   Class: 2   Class: 3   Class: 4
## Sensitivity          0.9610028 0.9393939 0.85858586 0.77710843 0.83000000
## Specificity          0.9860437 0.9988526 0.97954671 0.99239544 0.97675706
## Pos Pred Value       0.9375000 0.9920000 0.82125604 0.90209790 0.79807692
## Neg Pred Value       0.9914582 0.9908936 0.98444444 0.98015021 0.98110061
## Prevalence           0.1788739 0.1315396 0.09865471 0.08271051 0.09965122
## Detection Rate       0.1718984 0.1235675 0.08470354 0.06427504 0.08271051
## Detection Prevalence 0.1833582 0.1245640 0.10313901 0.07125062 0.10363727
## Balanced Accuracy    0.9735232 0.9691232 0.91906628 0.88475194 0.90337853
##                        Class: 5   Class: 6   Class: 7   Class: 8
## Sensitivity          0.82500000 0.91764706 0.90476190 0.86144578
## Specificity          0.98267461 0.99237888 0.99408602 0.98696361
## Pos Pred Value       0.80487805 0.91764706 0.92361111 0.85628743
## Neg Pred Value       0.98480738 0.99237888 0.99248524 0.98750000
## Prevalence           0.07972098 0.08470354 0.07324365 0.08271051
## Detection Rate       0.06576981 0.07772795 0.06626806 0.07125062
## Detection Prevalence 0.08171400 0.08470354 0.07174888 0.08320877
## Balanced Accuracy    0.90383730 0.95501297 0.94942396 0.92420469
##                        Class: 9
## Sensitivity          0.92655367
## Specificity          0.98797814
## Pos Pred Value       0.88172043
## Neg Pred Value       0.99286107
## Prevalence           0.08819133
## Detection Rate       0.08171400
## Detection Prevalence 0.09267564
## Balanced Accuracy    0.95726591
confuse$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.8898854011   0.8763342842   0.8753676969   0.9032525591   0.1788739412 
## AccuracyPValue  McnemarPValue 
##   0.0000000000            NaN
0 responses on "203.5.12 Practice : Digit Recognizer"

Leave a Message

Statinfer

Statinfer derived from Statistical inference is a company that focuses on the data science training and R&D.We offer training on Machine Learning, Deep Learning and Artificial Intelligence using tools like R, Python and TensorFlow

Contact Us

We Accept

Our Social Links

How to Become a Data Scientist?

top
© 2020. All Rights Reserved.