########################################################################
## install packages if not exist
#install.packages("mlbench")
#install.packages("caret")
#install.packages("caret")
#install.packages("ggplot2")
#install.packages("forcats")
#install.packages("RColorBrewer")
# We assume all related files are in "D:\rworks"
setwd("D:/rworks")
source("f_interact_lib.R")
library(mlbench)
library(caret)
########################################################################
## Regression
## Prepare data
data("BostonHousing")
ds <- BostonHousing[,-14]
cl <- BostonHousing[,14] # price of a house
## Fit a model
fitControl <- trainControl(method = "repeatedcv",
number = 5,
repeats = 2, returnResamp="all")
model.lm <- train(x=ds,y=cl, trControl=fitControl, method="lm")
pred <- predict(model.lm, ds)
rmse <- RMSE(pred,cl)
rmse # rmse of a whole model
[1] 4.679191
## Printf feature impact of each feature
for(i in 1:ncol(ds)) {
cat(names(ds)[i], "\t",f.impact(model.lm, i), "\n")
}
crim 0.1825532
zn 0.2331253
indus 0.003786494
chas 0.09189613
nox 0.8244577
rm 1.374748
age -2.37516e-05
dis 1.769364
rad 1.292213
tax 0.8304573
ptratio 0.8302797
b 0.1488995
lstat 2.423039
## Degree of feature interaction between two features
result <- f.interact(model.lm, "crim", "zn")
print.fi(result)
** feature interaction between [ crim ],[ zn ]
Task type : Regression
Performance of whole dataset : 4.679191
Reduced performance by F1 : 0.1825532
Reduced performance by F2 : 0.256769
Reduced performance by {F1,F2} : 0.4663873
Interaction between {F1,F2} : 0.02706504
## Create a feature interaction matrix
f.matrix <- fi.matrix(model.lm)
[1] "This function may take long time ..."
1 / 13 end..
2 / 13 end..
3 / 13 end..
4 / 13 end..
5 / 13 end..
6 / 13 end..
7 / 13 end..
8 / 13 end..
9 / 13 end..
10 / 13 end..
11 / 13 end..
12 / 13 end..
13 / 13 end..
print(f.matrix)
crim zn indus chas nox rm age
crim 0.182553229 0.027065039 -0.0106608895 0.0080191720 0.107880215 0.040389283 -0.0013082538
zn 0.027065039 0.233125343 -0.0165526752 -0.0090711658 0.169439028 0.110001871 -0.0025209938
indus -0.010660889 -0.016552675 0.0037864936 0.0002806107 -0.038999021 -0.026816095 0.0003823493
chas 0.008019172 -0.009071166 0.0002806107 0.0918961293 -0.043323665 0.008267674 0.0003119213
nox 0.107880215 0.169439028 -0.0389990212 -0.0433236647 0.824457684 0.074450738 -0.0052279108
rm 0.040389283 0.110001871 -0.0268160945 0.0082676736 0.074450738 1.374748235 -0.0021911376
age -0.001308254 -0.002520994 0.0003823493 0.0003119213 -0.005227911 -0.002191138 -0.0000237516
dis -0.223775842 -0.427364623 0.0463930828 0.0178351928 -0.927368681 -0.568433136 0.0069948220
rad -0.287422682 -0.212615358 0.0366221687 -0.0155821499 -0.673049673 -0.509543333 0.0042759030
tax 0.158414111 0.102333506 -0.0381552502 -0.0146905949 0.322154196 0.092302857 -0.0039008408
ptratio 0.065261967 0.128630311 -0.0213886716 0.0178508862 0.015984175 0.142753218 -0.0019165943
b 0.053293555 0.019912453 -0.0080007178 0.0050594436 0.083352024 0.017453131 -0.0008910354
lstat 0.147046326 0.162318568 -0.0471308146 -0.0198857623 0.309456961 0.384614795 -0.0061582880
dis rad tax ptratio b lstat
crim -0.223775842 -0.287422682 0.158414111 0.065261967 0.0532935550 0.147046326
zn -0.427364623 -0.212615358 0.102333506 0.128630311 0.0199124527 0.162318568
indus 0.046393083 0.036622169 -0.038155250 -0.021388672 -0.0080007178 -0.047130815
chas 0.017835193 -0.015582150 -0.014690595 0.017850886 0.0050594436 -0.019885762
nox -0.927368681 -0.673049673 0.322154196 0.015984175 0.0833520238 0.309456961
rm -0.568433136 -0.509543333 0.092302857 0.142753218 0.0174531310 0.384614795
age 0.006994822 0.004275903 -0.003900841 -0.001916594 -0.0008910354 -0.006158288
dis 1.769364421 0.262119308 -0.733352922 -0.443354780 -0.1551927970 -1.238700599
rad 0.262119308 1.292213260 -0.941227759 -0.541265550 -0.1851499833 -1.010397838
tax -0.733352922 -0.941227759 0.830457340 0.188359832 0.0998758032 0.273803614
ptratio -0.443354780 -0.541265550 0.188359832 0.830279659 0.0231617239 0.138010924
b -0.155192797 -0.185149983 0.099875803 0.023161724 0.1488995070 0.116944685
lstat -1.238700599 -1.010397838 0.273803614 0.138010924 0.1169446846 2.423039283
## Grid chart of two-way feature interaction
fi.grid.chart(model.lm, "BostonHousing")
[1] "This function may take long time ..."
1 / 13 end..
2 / 13 end..
3 / 13 end..
4 / 13 end..
5 / 13 end..
6 / 13 end..
7 / 13 end..
8 / 13 end..
9 / 13 end..
10 / 13 end..
11 / 13 end..
12 / 13 end..
13 / 13 end..
## Total degree of interaction of each feature
fi.total.plot(f.matrix)
## Best pair of features (Postive interaction)
fi.best(f.matrix, type="pos")
## Best pair of features (Negative interaction)
fi.best(f.matrix, type="neg")