123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257 |
- # Supplementary routines for Boruta.
- #' Extract attribute statistics
- #'
- #' \code{attStats} shows a summary of a Boruta run in an attribute-centred way.
- #' It produces a data frame containing some importance stats as well as the number of hits that attribute scored and the decision it was given.
- #' @param x an object of a class Boruta, from which attribute stats should be extracted.
- #' @return A data frame containing, for each attribute that was originally in information system, mean, median, maximal and minimal importance, number of hits normalised to number of importance source runs performed and the decision copied from \code{finalDecision}.
- #' @note When using a Boruta object generated by a \code{\link{TentativeRoughFix}}, the resulting data frame will consist a rough-fixed decision.
- #' @note \code{x} has to be made with \code{holdHistory} set to \code{TRUE} for this code to run.
- #' @export
- #' @examples
- #' \dontrun{
- #' library(mlbench); data(Sonar)
- #' #Takes some time, so be patient
- #' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
- #' print(Bor.son)
- #' stats<-attStats(Bor.son)
- #' print(stats)
- #' plot(normHits~meanImp,col=stats$decision,data=stats)
- #' }
- attStats<-function(x){
- if(class(x)!='Boruta')
- stop('This function needs Boruta object as an argument.')
- if(is.null(x$ImpHistory))
- stop('Importance history was not stored during the Boruta run.')
- lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
- colnames(x$ImpHistory)->names(lz)
- mr<-lz$shadowMax; lz[1:(length(lz)-3)]->lz
- t(sapply(lz,function(x) c(mean(x),stats::median(x),min(x),max(x),sum(mr[1:length(x)]<x)/length(mr))))->st
- st<-data.frame(st,x$finalDecision)
- names(st)<-c("meanImp","medianImp","minImp","maxImp","normHits","decision")
- return(st)
- }
- #' Extract names of the selected attributes
- #'
- #' \code{getSelectedAttributes} returns a vector of names of attributes selected during a Boruta run.
- #' @param x an object of a class Boruta, from which relevant attributes names should be extracted.
- #' @param withTentative if set to \code{TRUE}, Tentative attributes will be also returned.
- #' @return A character vector with names of the relevant attributes.
- #' @export
- #' @examples
- #' \dontrun{
- #' data(iris)
- #' #Takes some time, so be patient
- #' Boruta(Species~.,data=iris,doTrace=2)->Bor.iris
- #' print(Bor.iris)
- #' print(getSelectedAttributes(Bor.iris))
- #' }
- getSelectedAttributes<-function(x,withTentative=FALSE){
- if(class(x)!='Boruta') stop('This function needs Boruta object as an argument.')
- names(x$finalDecision)[
- x$finalDecision%in%(if(!withTentative) "Confirmed" else c("Confirmed","Tentative"))
- ]
- }
- #' Rough fix of Tentative attributes
- #'
- #' In some circumstances (too short Boruta run, unfortunate mixing of shadow attributes, tricky dataset\ldots), Boruta can leave some attributes Tentative.
- #' \code{TentativeRoughFix} performs a simplified, weaker test for judging such attributes.
- #' @param x an object of a class Boruta.
- #' @param averageOver Either number of last importance source runs to
- #' average over or Inf for averaging over the whole Boruta run.
- #' @return A Boruta class object with modified \code{finalDecision} element.
- #' Such object has few additional elements:
- #' \item{originalDecision}{Original \code{finalDecision}.}
- #' \item{averageOver}{Copy of \code{averageOver} parameter.}
- #' @details Function claims as Confirmed those attributes that
- #' have median importance higher than the median importance of
- #' maximal shadow attribute, and the rest as Rejected.
- #' Depending of the user choice, medians for the test
- #' are count over last round, all rounds or N last
- #' importance source runs.
- #' @note This function should be used only when strict decision is
- #' highly desired, because this test is much weaker than Boruta
- #' and can lower the confidence of the final result.
- #' @note \code{x} has to be made with \code{holdHistory} set to
- #' \code{TRUE} for this code to run.
- #' @export
- TentativeRoughFix<-function(x,averageOver=Inf){
- if(!inherits(x,'Boruta'))
- stop('This function needs Boruta object as an argument.')
- if(is.null(x$ImpHistory))
- stop('Importance history was not stored during the Boruta run.')
- if(!is.numeric(averageOver))
- stop('averageOver should be a numeric vector.')
- if(length(averageOver)!=1)
- stop('averageOver should be a one-element vector.')
- if(averageOver<1)
- stop('averageOver should be positive.')
- tentIdx<-which(x$finalDecision=='Tentative')
- if(length(tentIdx)==0){
- warning('There are no Tentative attributes! Returning original object.')
- return(x)
- }
- nRuns<-dim(x$ImpHistory)[1]
- if(averageOver>nRuns)
- averageOver<-nRuns
- impHistorySubset<-x$ImpHistory[(nRuns-averageOver+1):nRuns,]
- medianTentImp<-sapply(impHistorySubset[,tentIdx],stats::median)
- medianShaMaxImp<-stats::median(impHistorySubset[,'shadowMax'])
- medianTentImp>medianShaMaxImp->toOrdain
- ans<-x
- ans$roughfixed<-TRUE
- ans$averageOver<-averageOver
- ans$originalDecision<-x$finalDecision
- ans$finalDecision[tentIdx[toOrdain]]<-'Confirmed'
- ans$finalDecision[tentIdx[!toOrdain]]<-'Rejected'
- return(ans)
- }
- ##generateCol is internally used by plot.Boruta and plotImpHistory
- generateCol<-function(x,colCode,col,numShadow){
- #Checking arguments
- if(is.null(col) & length(colCode)!=4)
- stop('colCode should have 4 elements.')
- #Generating col
- if(is.null(col)){
- rep(colCode[4],length(x$finalDecision)+numShadow)->cc
- cc[c(x$finalDecision=='Confirmed',rep(FALSE,numShadow))]<-colCode[1]
- cc[c(x$finalDecision=='Tentative',rep(FALSE,numShadow))]<-colCode[2]
- cc[c(x$finalDecision=='Rejected',rep(FALSE,numShadow))]<-colCode[3]
- col=cc
- }
- return(col)
- }
- #' Plot Boruta object
- #'
- #' Default plot method for Boruta objects, showing boxplots of attribute importances over run.
- #' @method plot Boruta
- #' @param x an object of a class Boruta.
- #' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
- #' @param sort controls whether boxplots should be ordered, or left in original order.
- #' @param whichShadow a logical vector controlling which shadows should be drawn; switches respectively max shadow, mean shadow and min shadow.
- #' @param col standard \code{col} attribute. If given, suppresses effects of \code{colCode}.
- #' @param xlab X axis label that will be passed to \code{\link{boxplot}}.
- #' @param ylab Y axis label that will be passed to \code{\link{boxplot}}.
- #' @param ... additional graphical parameter that will be passed to \code{\link{boxplot}}.
- #' @note If \code{col} is given and \code{sort} is \code{TRUE}, the \code{col} will be permuted, so that its order corresponds to attribute order in \code{ImpHistory}.
- #' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
- #' @return Invisible copy of \code{x}.
- #' @examples
- #' \dontrun{
- #' library(mlbench); data(HouseVotes84)
- #' na.omit(HouseVotes84)->hvo
- #' #Takes some time, so be patient
- #' Boruta(Class~.,data=hvo,doTrace=2)->Bor.hvo
- #' print(Bor.hvo)
- #' plot(Bor.hvo)
- #' }
- #' @export
- plot.Boruta<-function(x,colCode=c('green','yellow','red','blue'),sort=TRUE,whichShadow=c(TRUE,TRUE,TRUE),
- col=NULL,xlab='Attributes',ylab='Importance',...){
- #Checking arguments
- if(class(x)!='Boruta')
- stop('This function needs Boruta object as an argument.')
- if(is.null(x$ImpHistory))
- stop('Importance history was not stored during the Boruta run.')
- #Removal of -Infs and conversion to a list
- lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
- colnames(x$ImpHistory)->names(lz)
- #Selection of shadow meta-attributes
- numShadow<-sum(whichShadow)
- lz[c(rep(TRUE,length(x$finalDecision)),whichShadow)]->lz
- #Generating color vector
- col<-generateCol(x,colCode,col,numShadow)
- #Ordering boxes due to attribute median importance
- if(sort){
- ii<-order(sapply(lz,stats::median))
- lz[ii]->lz; col<-col[ii]
- }
- #Final plotting
- graphics::boxplot(lz,xlab=xlab,ylab=ylab,col=col,...)
- invisible(x)
- }
- #' Plot Boruta object as importance history
- #'
- #' Alternative plot method for Boruta objects, showing matplot of attribute importances over run.
- #' @param x an object of a class Boruta.
- #' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
- #' @param col standard \code{col} attribute, passed to \code{\link{matplot}}. If given, suppresses effects of \code{colCode}.
- #' @param type Plot type that will be passed to \code{\link{matplot}}.
- #' @param lty Line type that will be passed to \code{\link{matplot}}.
- #' @param pch Point mark type that will be passed to \code{\link{matplot}}.
- #' @param xlab X axis label that will be passed to \code{\link{matplot}}.
- #' @param ylab Y axis label that will be passed to \code{\link{matplot}}.
- #' @param ... additional graphical parameter that will be passed to \code{\link{matplot}}.
- #' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
- #' @return Invisible copy of \code{x}.
- #' @examples
- #' \dontrun{
- #' library(mlbench); data(Sonar)
- #' #Takes some time, so be patient
- #' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
- #' print(Bor.son)
- #' plotImpHistory(Bor.son)
- #' }
- #' @export
- plotImpHistory<-function(x,colCode=c('green','yellow','red','blue'),col=NULL,type="l",lty=1,pch=0,
- xlab='Classifier run',ylab='Importance',...){
- #Checking arguments
- if(class(x)!='Boruta')
- stop('This function needs Boruta object as an argument.')
- if(is.null(x$ImpHistory))
- stop('Importance history was not stored during the Boruta run.')
- col<-generateCol(x,colCode,col,3)
- #Final plotting
- graphics::matplot(0:(nrow(x$ImpHistory)-1),x$ImpHistory,xlab=xlab,ylab=ylab,col=col,type=type,lty=lty,pch=pch,...)
- invisible(x)
- }
- #' Export Boruta result as a formula
- #'
- #' Functions which convert the Boruta selection into a formula, so that it could be passed further to other functions.
- #' @param x an object of a class Boruta, made using a formula interface.
- #' @return Formula, corresponding to the Boruta results.
- #' \code{getConfirmedFormula} returns only Confirmed attributes, \code{getNonRejectedFormula} also adds Tentative ones.
- #' @note This operation is possible only when Boruta selection was invoked using a formula interface.
- #' @rdname getFormulae
- #' @export
- getConfirmedFormula<-function(x){
- if(!inherits(x,'Boruta'))
- stop('This function needs Boruta object as an argument.')
- if(is.null(x$call[["formula"]]))
- stop('The model for this Boruta run was not a formula.')
- deparse(x$call[["formula"]][[2]])->dec
- preds<-paste(names(x$finalDecision)[x$finalDecision=='Confirmed'],collapse="+")
- return(stats::as.formula(sprintf('%s~%s',dec,preds)))
- }
- #' @rdname getFormulae
- #' @export
- getNonRejectedFormula<-function(x){
- if(!inherits(x,'Boruta'))
- stop('This function needs Boruta object as an argument.')
- if(is.null(x$call[["formula"]]))
- stop('The model for this Boruta run was not a formula.')
- deparse(x$call[["formula"]][[2]])->dec
- preds<-paste(names(x$finalDecision)[x$finalDecision!='Rejected'],collapse="+")
- return(stats::as.formula(sprintf('%s~%s',dec,preds)))
- }
|