#Iris data from randomForest help file is used! ## Classification: ##data(iris) set.seed(71) iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE, proximity=TRUE) #Default MDSplot MDSplot(iris.rf, iris$Species) #Figure Supervised RF MDSplot(iris.rf, iris$Species, bg=rainbow(length(levels(iris$Species)))[iris$Species], pch=c(21:23), palette=rep("black",length(levels(iris$Species)))) legend("topleft",levels(iris$Species), pch=c(21:23), col = rep("black",length(levels(iris$Species))), pt.bg = rainbow(length(levels(iris$Species))), cex=.8) title(main="Supervised Random Forest MDS Plot of Iris Data") #Now Unsupervised RF! ## The `unsupervised' case: set.seed(17) iris.urf <- randomForest(iris[, -5]) #Let's pretend we KNOW there are 3 groups! #Figure Unsupervised RF with pamData <- pam(1 - iris.urf$proximity, k = 3, diss = TRUE) MDSplot(iris.urf, pamData$clustering, bg=rainbow(length(levels(as.factor(pamData$clustering))))[unclass=pamData$clustering], pch=c(21:23)[unclass=pamData$clustering], palette=rep("black",3)) legend("bottomleft",c("Group 1", "Group 2", "Group 3"), pch=c(21:23), col = rep("black", 3), pt.bg = rainbow(3), cex=.8) title(main="Unsupervised Random Forest MDS Plot of Iris Data \n PAM Groupings, k=3") #Let's Look at the Silhouette Widths and Average Silhoutte Width to Decide on the number of groups! #We could examine the silhouette plots for k=2,3,... #Let's use our k=3 from above plot(pamData) #OR even better, let's plot the average over a range of groups! pamSilhouetteWidth.fun(1 - iris.urf$proximity) # found in pamFunctions.R title(main="PAM Silhouette Width, Unsupervised Random Forest Dissimilarity") #3 or 4 Groups, it's pretty close!