- ### 10.2.1 ID3算法基本原理 ###
- mtcars2 <- within(mtcars[,c('cyl','vs','am','gear')], {
- am <- factor(am, labels = c("automatic", "manual"))
- vs <- factor(vs, labels = c("V", "S"))
- cyl <- ordered(cyl)
- gear <- ordered(gear)
- })
-
- table(mtcars2$am) # 查看因变量的类别数量
-
- I_am <- -19/32*log2(19/32)-13/32*log2(13/32) # 计算因变量的信息熵
- I_am
-
-
- # 自定义函数计算信息熵、信息增益
- information_gain <- function(x,y){
- m1 <- matrix(table(y))
- entropy_y <- sum(-(m1/sum(m1))*log2(m1/sum(m1)))
- t <- table(x,y)
- m <- matrix(t,length(unique(x)),length(unique(y)),
- dimnames = list(levels(x),levels(y)))
- freq <- -rowSums((m/rowSums(m))*log2(m/rowSums(m)))
- entropy <- sum(rowSums(