#install.packages('rpart') # if you haven't installed the rpart package, the above command will do so. library(rpart) # use cars3 numerical variables cars4<-read.table('cars4.txt',header=T,sep='\t') print(names(cars4)) # # ii<-sample(seq(1,dim(cars4)[1]),50) # sample 50 observations at random tree1<-rpart(log.mid.price~city.gpm+hw.gpm+rpm.at.max+engrev.high+fueltank+length+width+uturn+luggage,data=cars4[ii,]) # fit a CART model to the data plot(tree1,branch=.2,compress=T,uniform=T,margin=.1) text(tree1,all=T,use.n=T,fancy=T) # plot the tree model print(tree1) # a description of the tree model # p<-locator() par(mfrow=c(2,1)) rsq.rpart(tree1) # top panel - the Rsquared as a function of tree size (number of questions) # dashed -cross-validation Rsquared # bottom panel - relative error (1-Rsquared_ p<-locator() par(mfrow=c(1,1)) plotcp(tree1) printcp(tree1) # Plot mean CV pMSE (across 10 folds) (dots) and standard deviation bands # the horizontal line is minimum(pMSE)+standard deviation. # We select the model that is the smallest one that still is within one # standard deviation of the best pMSE # p<-locator() nspl<-seq(1,dim(tree1$cptable)[1]) rowmin<-nspl[tree1$cptable[,4]==min(tree1$cptable[,4])] minmin<-tree1$cptable[rowmin,4]+tree1$cptable[rowmin,5] pickmin<-min(nspl[tree1$cptable[,4]<=minmin]) pickcp<-tree1$cptable[pickmin,1] # Here I just pick the smallest tree that is within the sd band # ptree1<-prune(tree1,cp=pickcp) plot(ptree1,branch=.2,compress=T,uniform=T,margin=.1) text(ptree1,all=T,use.n=T,fancy=T) # the selected tree