names(train)
pca <- prcomp(train[,-c(1,2)], scale=TRUE)
#names(pca); pca$sdev; pca$rotation; pca$center; pca$scale; pca$x
#summary(pca); biplot(pca, scale=0); head(pca$x)
###############################################
### Select number of PCA
###############################################
cluster = cbind(train[,c(1,2)],pca$x[,1:10]) #head(cluster)
fn.cv.kmean = function(x,y){
set.seed(12345)
error = NULL
for(i in 1:y){
km=kmeans(x[,!names(x) %in% c('INDEX', 'TARGET')], i, iter.max=1e6, nstart=50, algorithm='Lloyd')
error = rbind(error, data.frame(i, km$tot.withinss, km$totss))
#table(km$cluster, x$TARGET)
#plot(x[,3], x[,4], col=km$cluster)
}
return(error)
}
cv=fn.cv.kmean(cluster,12)
plot(cv$i, cv$km.tot.withinss, type='l')
set.seed(12345)
km=kmeans(cluster[,!names(cluster) %in% c('INDEX', 'TARGET')], 6, iter.max=1e6, nstart=50, algorithm='Lloyd')
cluster$kmean = km$cluster
table(km$cluster, cluster$TARGET)
#summary(km); head(cluster)
#names(km)
plot(table(km$cluster, cluster$TARGET))
plot(km$cluster, cluster$TARGET)
library(scatterplot3d)
scatterplot3d(cluster$PC1, cluster$PC2, cluster$kmean, main='',
highlight.3d=TRUE, color='green', col.grid='lightblue', col.axis = 'blue')
scatterplot3d(cluster$PC1, cluster$PC2, cluster$TARGET,
highlight.3d=TRUE, color='green', col.grid='lightblue', col.axis = 'blue')
scatterplot3d(cluster$PC1, cluster$PC2, cluster$km, pch=1,
color=cluster$TARGET, col.grid='lightblue', col.axis='blue')
scatterplot3d(cluster$PC1, cluster$PC2, cluster$TARGET,
color=cluster$km, col.grid='lightblue', col.axis='blue')
error = abs(cluster$km - cluster$TARGET)
mean(error)
Add caption |
############################
############################
hc.average = hclust(dist(cluster[,-c(1,2)]), method='average')
hc.single = hclust(dist(cluster[,-c(1,2)]), method='single')
#summary(hc.average); names(hc.average)
plot(hc.complete)
plot(hc.average)
plot(hc.single)
No comments:
Post a Comment