This example shows feature interaction graphs and its application. Target dataset is Vehicle. It has 846 observations on 19 variables. The purpose is to classify a given silhouette as one of four types of vehicle, using a set of features extracted from the silhouette.
library(mlbench)
library(caret)
## Warning: package 'caret' was built under R version 3.6.1
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.1
library(C50)
## Warning: package 'C50' was built under R version 3.6.1
source("d:/f_interact_lib.R")
data(Vehicle)
str(Vehicle)
## 'data.frame': 846 obs. of 19 variables:
## $ Comp : num 95 91 104 93 85 107 97 90 86 93 ...
## $ Circ : num 48 41 50 41 44 57 43 43 34 44 ...
## $ D.Circ : num 83 84 106 82 70 106 73 66 62 98 ...
## $ Rad.Ra : num 178 141 209 159 205 172 173 157 140 197 ...
## $ Pr.Axis.Ra : num 72 57 66 63 103 50 65 65 61 62 ...
## $ Max.L.Ra : num 10 9 10 9 52 6 6 9 7 11 ...
## $ Scat.Ra : num 162 149 207 144 149 255 153 137 122 183 ...
## $ Elong : num 42 45 32 46 45 26 42 48 54 36 ...
## $ Pr.Axis.Rect: num 20 19 23 19 19 28 19 18 17 22 ...
## $ Max.L.Rect : num 159 143 158 143 144 169 143 146 127 146 ...
## $ Sc.Var.Maxis: num 176 170 223 160 241 280 176 162 141 202 ...
## $ Sc.Var.maxis: num 379 330 635 309 325 957 361 281 223 505 ...
## $ Ra.Gyr : num 184 158 220 127 188 264 172 164 112 152 ...
## $ Skew.Maxis : num 70 72 73 63 127 85 66 67 64 64 ...
## $ Skew.maxis : num 6 9 14 6 9 5 13 3 2 4 ...
## $ Kurt.maxis : num 16 14 9 10 11 9 1 3 14 14 ...
## $ Kurt.Maxis : num 187 189 188 199 180 181 200 193 200 195 ...
## $ Holl.Ra : num 197 199 196 207 183 183 204 202 208 204 ...
## $ Class : Factor w/ 4 levels "bus","opel","saab",..: 4 4 3 4 1 1 1 4 4 3 ...
head(Vehicle)
table(Vehicle$Class) # class info
##
## bus opel saab van
## 218 212 217 199
## Prepare data
ds <- Vehicle[,-19]
cl <- Vehicle[,19]
## Fit a model (knn)
set.seed(1234)
fitControl <- trainControl(method = "repeatedcv",
number = 5,
repeats = 2, returnResamp="all")
model <- train(x=ds,y=cl, trControl=fitControl, method="knn")
pred <- predict(model, ds)
acc.total <- mean(pred==cl)
acc.total # training accuracy of a whole model
## [1] 0.7576832
#
## Printf feature impact of each feature
for(i in 1:ncol(ds)) {
cat(names(ds)[i], "\t",f.impact(model, i), "\n")
}
## Comp 0.01548463
## Circ 0.004137116
## D.Circ 0.07328605
## Rad.Ra 0.1297872
## Pr.Axis.Ra 0.01903073
## Max.L.Ra 0.004728132
## Scat.Ra 0.004846336
## Elong -0.00248227
## Pr.Axis.Rect 0.002955083
## Max.L.Rect 0.05023641
## Sc.Var.Maxis 0.04787234
## Sc.Var.maxis 0.43026
## Ra.Gyr 0.09468085
## Skew.Maxis 0.006264775
## Skew.maxis 0.02009456
## Kurt.maxis 0.002009456
## Kurt.Maxis 0.0107565
## Holl.Ra 0.01312057
Circ, Skew.Maxis, Skew.maxis, and Holl.Ra have negative power to decrease model performance.
## Grid chart of two-way feature interaction
fi.grid.chart(model, "Vehicle")
## [1] "This function may take long time ..."
## 1 / 18 end..
## 2 / 18 end..
## 3 / 18 end..
## 4 / 18 end..
## 5 / 18 end..
## 6 / 18 end..
## 7 / 18 end..
## 8 / 18 end..
## 9 / 18 end..
## 10 / 18 end..
## 11 / 18 end..
## 12 / 18 end..
## 13 / 18 end..
## 14 / 18 end..
## 15 / 18 end..
## 16 / 18 end..
## 17 / 18 end..
## 18 / 18 end..
## Create a feature interaction matrix
f.matrix <- fi.matrix(model)
## [1] "This function may take long time ..."
## 1 / 18 end..
## 2 / 18 end..
## 3 / 18 end..
## 4 / 18 end..
## 5 / 18 end..
## 6 / 18 end..
## 7 / 18 end..
## 8 / 18 end..
## 9 / 18 end..
## 10 / 18 end..
## 11 / 18 end..
## 12 / 18 end..
## 13 / 18 end..
## 14 / 18 end..
## 15 / 18 end..
## 16 / 18 end..
## 17 / 18 end..
## 18 / 18 end..
print(f.matrix)
## Comp Circ D.Circ Rad.Ra
## Comp 0.0083924350 8.156028e-03 -0.0060283688 0.003191489
## Circ 0.0081560284 5.910165e-04 0.0020094563 -0.004137116
## D.Circ -0.0060283688 2.009456e-03 0.0815602837 0.021867612
## Rad.Ra 0.0031914894 -4.137116e-03 0.0218676123 0.132151300
## Pr.Axis.Ra -0.0023640662 -3.546099e-03 0.0039007092 0.004137116
## Max.L.Ra -0.0082742317 3.546099e-04 -0.0043735225 -0.002955083
## Scat.Ra -0.0078014184 7.328605e-03 -0.0164302600 0.001654846
## Elong 0.0004728132 -1.182033e-03 -0.0106382979 -0.003664303
## Pr.Axis.Rect 0.0030732861 -6.737589e-03 0.0087470449 0.009101655
## Max.L.Rect -0.0034278960 -2.245863e-03 0.0113475177 0.015721040
## Sc.Var.Maxis 0.0083924350 -7.446809e-03 0.0151300236 0.036052009
## Sc.Var.maxis 0.0089834515 9.929078e-03 0.0547281324 0.098699764
## Ra.Gyr -0.0014184397 3.191489e-03 0.0020094563 0.048108747
## Skew.Maxis 0.0135933806 -9.456265e-04 0.0000000000 0.004137116
## Skew.maxis 0.0107565012 -4.964539e-03 -0.0028368794 0.003900709
## Kurt.maxis -0.0015366430 4.373522e-03 -0.0003546099 -0.001654846
## Kurt.Maxis 0.0069739953 2.836879e-03 0.0096926714 0.001063830
## Holl.Ra -0.0022458629 -1.110223e-16 -0.0003546099 0.002836879
## Pr.Axis.Ra Max.L.Ra Scat.Ra Elong
## Comp -0.0023640662 -0.0082742317 -0.0078014184 0.0004728132
## Circ -0.0035460993 0.0003546099 0.0073286052 -0.0011820331
## D.Circ 0.0039007092 -0.0043735225 -0.0164302600 -0.0106382979
## Rad.Ra 0.0041371158 -0.0029550827 0.0016548463 -0.0036643026
## Pr.Axis.Ra 0.0095744681 0.0007092199 0.0078014184 0.0033096927
## Max.L.Ra 0.0007092199 -0.0047281324 -0.0003546099 0.0029550827
## Scat.Ra 0.0078014184 -0.0003546099 0.0048463357 -0.0010638298
## Elong 0.0033096927 0.0029550827 -0.0010638298 -0.0013002364
## Pr.Axis.Rect -0.0009456265 0.0014184397 0.0059101655 0.0008274232
## Max.L.Rect 0.0001182033 -0.0060283688 -0.0037825059 0.0066193853
## Sc.Var.Maxis 0.0111111111 -0.0046099291 -0.0096926714 -0.0061465721
## Sc.Var.maxis 0.0081560284 -0.0010638298 -0.0030732861 0.0001182033
## Ra.Gyr 0.0031914894 0.0034278960 -0.0160756501 0.0018912530
## Skew.Maxis -0.0057919622 -0.0101654846 -0.0053191489 0.0043735225
## Skew.maxis 0.0065011820 0.0055555556 0.0021276596 0.0062647754
## Kurt.maxis 0.0079196217 -0.0021276596 0.0024822695 0.0013002364
## Kurt.Maxis 0.0004728132 -0.0021276596 0.0062647754 0.0030732861
## Holl.Ra 0.0000000000 0.0047281324 0.0017730496 0.0042553191
## Pr.Axis.Rect Max.L.Rect Sc.Var.Maxis Sc.Var.maxis
## Comp 0.0030732861 -0.0034278960 0.0083924350 0.0089834515
## Circ -0.0067375887 -0.0022458629 -0.0074468085 0.0099290780
## D.Circ 0.0087470449 0.0113475177 0.0151300236 0.0547281324
## Rad.Ra 0.0091016548 0.0157210402 0.0360520095 0.0986997636
## Pr.Axis.Ra -0.0009456265 0.0001182033 0.0111111111 0.0081560284
## Max.L.Ra 0.0014184397 -0.0060283688 -0.0046099291 -0.0010638298
## Scat.Ra 0.0059101655 -0.0037825059 -0.0096926714 -0.0030732861
## Elong 0.0008274232 0.0066193853 -0.0061465721 0.0001182033
## Pr.Axis.Rect -0.0041371158 0.0010638298 0.0054373522 -0.0016548463
## Max.L.Rect 0.0010638298 0.0466903073 0.0080378251 0.0393617021
## Sc.Var.Maxis 0.0054373522 0.0080378251 0.0549645390 0.0257683215
## Sc.Var.maxis -0.0016548463 0.0393617021 0.0257683215 0.4160756501
## Ra.Gyr 0.0047281324 0.0119385343 0.0054373522 0.0802600473
## Skew.Maxis 0.0030732861 0.0013002364 0.0111111111 -0.0022458629
## Skew.maxis -0.0004728132 0.0124113475 0.0005910165 0.0074468085
## Kurt.maxis 0.0007092199 0.0057919622 -0.0007092199 0.0098108747
## Kurt.Maxis 0.0112293144 0.0039007092 0.0034278960 0.0122931442
## Holl.Ra -0.0042553191 -0.0028368794 0.0022458629 0.0075650118
## Ra.Gyr Skew.Maxis Skew.maxis Kurt.maxis
## Comp -0.001418440 0.0135933806 0.0107565012 -0.0015366430
## Circ 0.003191489 -0.0009456265 -0.0049645390 0.0043735225
## D.Circ 0.002009456 0.0000000000 -0.0028368794 -0.0003546099
## Rad.Ra 0.048108747 0.0041371158 0.0039007092 -0.0016548463
## Pr.Axis.Ra 0.003191489 -0.0057919622 0.0065011820 0.0079196217
## Max.L.Ra 0.003427896 -0.0101654846 0.0055555556 -0.0021276596
## Scat.Ra -0.016075650 -0.0053191489 0.0021276596 0.0024822695
## Elong 0.001891253 0.0043735225 0.0062647754 0.0013002364
## Pr.Axis.Rect 0.004728132 0.0030732861 -0.0004728132 0.0007092199
## Max.L.Rect 0.011938534 0.0013002364 0.0124113475 0.0057919622
## Sc.Var.Maxis 0.005437352 0.0111111111 0.0005910165 -0.0007092199
## Sc.Var.maxis 0.080260047 -0.0022458629 0.0074468085 0.0098108747
## Ra.Gyr 0.101773050 -0.0085106383 -0.0023640662 0.0043735225
## Skew.Maxis -0.008510638 0.0062647754 0.0049645390 0.0072104019
## Skew.maxis -0.002364066 0.0049645390 0.0047281324 0.0208037825
## Kurt.maxis 0.004373522 0.0072104019 0.0208037825 0.0020094563
## Kurt.Maxis 0.006028369 0.0190307329 0.0022458629 0.0016548463
## Holl.Ra 0.004255319 -0.0027186761 -0.0027186761 -0.0007092199
## Kurt.Maxis Holl.Ra
## Comp 0.0069739953 -2.245863e-03
## Circ 0.0028368794 -1.110223e-16
## D.Circ 0.0096926714 -3.546099e-04
## Rad.Ra 0.0010638298 2.836879e-03
## Pr.Axis.Ra 0.0004728132 0.000000e+00
## Max.L.Ra -0.0021276596 4.728132e-03
## Scat.Ra 0.0062647754 1.773050e-03
## Elong 0.0030732861 4.255319e-03
## Pr.Axis.Rect 0.0112293144 -4.255319e-03
## Max.L.Rect 0.0039007092 -2.836879e-03
## Sc.Var.Maxis 0.0034278960 2.245863e-03
## Sc.Var.maxis 0.0122931442 7.565012e-03
## Ra.Gyr 0.0060283688 4.255319e-03
## Skew.Maxis 0.0190307329 -2.718676e-03
## Skew.maxis 0.0022458629 -2.718676e-03
## Kurt.maxis 0.0016548463 -7.092199e-04
## Kurt.Maxis 0.0036643026 1.193853e-02
## Holl.Ra 0.0119385343 1.312057e-02
## Total degree of interaction of each feature
fi.total.plot(f.matrix)
Skew.maxis and Skew.Maxis have negative prediction power and negative interaction with others. It may be removed from the model.
## Remove 'Skew.maxis' and 'Skew.Maxis', and rebuild the model
## Prepare data
ds2 <- ds[,-c(14,15)] # remove pregnant, pedigree
## Fit a model (knn)
set.seed(1234)
fitControl <- trainControl(method = "repeatedcv",
number = 5,
repeats = 2, returnResamp="all")
model2 <- train(x=ds2,y=cl, trControl=fitControl, method="knn")
pred2 <- predict(model2, ds2)
acc.new <- mean(pred2==cl)
acc.new # training accuracy of a whole model
## [1] 0.7931442
The training accuracy is improved from 0.7576832 to 0.7931442.