203.6.6 Practice : Kernel – Non Linear Classifier

In previous section, we studied about The Non-Linear Decision Boundary

In this session we will practice non linear kernels of SVM in python.

LAB: Kernel – Non linear classifier

Dataset : Software users/sw_user_profile.csv
How many variables are there in software user profile data?
Plot the active users against and check weather the relation between age and “Active” status is linear or non-linear
Build an SVM model(model-1), make sure that there is no kernel or the kernel is linear
For model-1, create the confusion matrix and find out the accuracy
Create a new variable. By using the polynomial kernel
Build an SVM model(model-2), with the new data mapped on to higher dimensions. Keep the default kernel in R as linear
For model-2, create the confusion matrix and find out the accuracy
Plot the SVM with results.
With the original data re-cerate the model(model-3) and let R choose the default kernel function.
What is the accuracy of model-3?

Solution

sw_user_profile <- read.csv("C:\\Amrita\\Datavedi\\Software Users\\sw_user_profile.csv")
head(sw_user_profile)

##   Id       Age Active
## 1  1  9.438867      0
## 2  2  8.614807      0
## 3  3  5.817555      0
## 4  4 10.329219      0
## 5  5  6.527926      0
## 6  6  8.231147      0

#How many variables are there in software user profile data?
names(sw_user_profile)

## [1] "Id"     "Age"    "Active"

#Plot the active users against and check weather the relation between age and "Active" status is linear or non-linear
plot(sw_user_profile$Age,sw_user_profile$Id,  col=as.integer(sw_user_profile$Active+1))

#Build an SVM model(model-1), make sure that there is no kernel or the kernel is linear
library(e1071)
svm_model_nl <- svm(Active~Age,  type="C",  data=sw_user_profile)
summary(svm_model_nl)

## 
## Call:
## svm(formula = Active ~ Age, data = sw_user_profile, type = "C")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  1 
## 
## Number of Support Vectors:  16
## 
##  ( 8 8 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1

#Making the kernel to linear
svm_model_nl <- svm(Active~Age,  type="C", kernel="linear", data=sw_user_profile)
summary(svm_model_nl)

## 
## Call:
## svm(formula = Active ~ Age, data = sw_user_profile, type = "C", 
##     kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  1 
## 
## Number of Support Vectors:  347
## 
##  ( 174 173 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1

#For model-1, create the confusion matrix and find out the accuracy
library(caret)

## Loading required package: lattice

Age_predicted<-predict(svm_model_nl)
confusionMatrix(Age_predicted,sw_user_profile$Active)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 317 173
##          1   0   0
##                                           
##                Accuracy : 0.6469          
##                  95% CI : (0.6028, 0.6893)
##     No Information Rate : 0.6469          
##     P-Value [Acc > NIR] : 0.5207          
##                                           
##                   Kappa : 0               
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.6469          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.6469          
##          Detection Rate : 0.6469          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : 0               
##

#Create a new variable. By using the polynomial kernel

###Standardizing the data to visualize the results clearly
sw_user_profile$age_nor<-(sw_user_profile$Age-mean(sw_user_profile$Age))/sd(sw_user_profile$Age)
plot(sw_user_profile$age_nor,sw_user_profile$Id,  col=as.integer(sw_user_profile$Active+1))

#Creating the new variable
sw_user_profile$new<-(sw_user_profile$age_nor)^2
plot(sw_user_profile$Age,sw_user_profile$new,  col=as.integer(sw_user_profile$Active+1))

#Build an SVM model(model-2), with the new data mapped on to higher dimensions. Keep the default kernel in R as linear
svm_model_2 <- svm(Active~Age+new,  type="C", kernel="linear", data=sw_user_profile)
summary(svm_model_2)

## 
## Call:
## svm(formula = Active ~ Age + new, data = sw_user_profile, type = "C", 
##     kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.5 
## 
## Number of Support Vectors:  15
## 
##  ( 8 7 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1

#For model-2, create the confusion matrix and find out the accuracy
library(caret)
Age_predicted<-predict(svm_model_2)
confusionMatrix(Age_predicted,sw_user_profile$Active)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 317   0
##          1   0 173
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9925, 1)
##     No Information Rate : 0.6469     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.6469     
##          Detection Rate : 0.6469     
##    Detection Prevalence : 0.6469     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : 0          
##

#Plot the SVM with results.
plot(svm_model_2, sw_user_profile,new~Age )

#With the original data re-cerate the model(model-3) and let R choose the default kernel function. 
library(e1071)
svm_model_3 <- svm(Active~Age,  type="C", data=sw_user_profile)
summary(svm_model_3)

## 
## Call:
## svm(formula = Active ~ Age, data = sw_user_profile, type = "C")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  1 
## 
## Number of Support Vectors:  16
## 
##  ( 8 8 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1

#What is the accuracy of model-3?
library(caret)
Age_predicted<-predict(svm_model_3)
confusionMatrix(Age_predicted,sw_user_profile$Active)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 317   0
##          1   0 173
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9925, 1)
##     No Information Rate : 0.6469     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.6469     
##          Detection Rate : 0.6469     
##    Detection Prevalence : 0.6469     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : 0

The next post is about Soft Margin Classification Noisy Data and Validation.

21st June 2017

203.6.6 Practice : Kernel – Non Linear Classifier

Putting Kernels into practice.

LAB: Kernel – Non linear classifier

Solution