Sunday, February 28, 2016
Tree based imputation Part 2 of 2
Conditional tree based imputation
Note that formula making command for casting its type.
Generally, ctree performs better than rpart
It is noted that GBM can handle missing value, perhaps GBM can be used to impute. Refer here.
rm(list=ls())
library("partykit") #for ctree
setwd ('C:\\Users\\Ted\\Documents\\MSPA\\2016 Winter\\Pred 411\\Unit_03')
train = read.csv('wine.csv')
str(train); class(train)
colnames(train)[1] = 'INDEX'
apply(apply(train,2,is.na),2,sum)
missCol = names(which(apply(is.na(train),2,any)))
for (i in 1:length(missCol)) {
var = missCol[i]
MIFlag = paste0(var,'_MIFLAG')
formula = as.formula(paste0(var, '~.'))
model.ctree =
ctree(formula,
data=train[!is.na(train[,missCol[i]]),
!names(train) %in% c('INDEX', 'TARGET_FLAG', 'TARGET_AMT')]
)
#assign(paste0('model.ctree.',var), model.ctree)
train[[MIFlag]] = ifelse(is.na(train[[var]]), 1, 0)
train[,var] =
ifelse(is.na(train[,var]), predict(model.ctree), train[,var])
rm(model.ctree, formula, i, MIFlag, var)
}
apply(apply(train,2,is.na),2,sum)
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment