tools.R 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. # Supplementary routines for Boruta.
  2. #' Extract attribute statistics
  3. #'
  4. #' \code{attStats} shows a summary of a Boruta run in an attribute-centred way.
  5. #' It produces a data frame containing some importance stats as well as the number of hits that attribute scored and the decision it was given.
  6. #' @param x an object of a class Boruta, from which attribute stats should be extracted.
  7. #' @return A data frame containing, for each attribute that was originally in information system, mean, median, maximal and minimal importance, number of hits normalised to number of importance source runs performed and the decision copied from \code{finalDecision}.
  8. #' @note When using a Boruta object generated by a \code{\link{TentativeRoughFix}}, the resulting data frame will consist a rough-fixed decision.
  9. #' @note \code{x} has to be made with \code{holdHistory} set to \code{TRUE} for this code to run.
  10. #' @export
  11. #' @examples
  12. #' \dontrun{
  13. #' library(mlbench); data(Sonar)
  14. #' #Takes some time, so be patient
  15. #' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
  16. #' print(Bor.son)
  17. #' stats<-attStats(Bor.son)
  18. #' print(stats)
  19. #' plot(normHits~meanImp,col=stats$decision,data=stats)
  20. #' }
  21. attStats<-function(x){
  22. if(class(x)!='Boruta')
  23. stop('This function needs Boruta object as an argument.')
  24. if(is.null(x$ImpHistory))
  25. stop('Importance history was not stored during the Boruta run.')
  26. lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
  27. colnames(x$ImpHistory)->names(lz)
  28. mr<-lz$shadowMax; lz[1:(length(lz)-3)]->lz
  29. t(sapply(lz,function(x) c(mean(x),stats::median(x),min(x),max(x),sum(mr[1:length(x)]<x)/length(mr))))->st
  30. st<-data.frame(st,x$finalDecision)
  31. names(st)<-c("meanImp","medianImp","minImp","maxImp","normHits","decision")
  32. return(st)
  33. }
  34. #' Extract names of the selected attributes
  35. #'
  36. #' \code{getSelectedAttributes} returns a vector of names of attributes selected during a Boruta run.
  37. #' @param x an object of a class Boruta, from which relevant attributes names should be extracted.
  38. #' @param withTentative if set to \code{TRUE}, Tentative attributes will be also returned.
  39. #' @return A character vector with names of the relevant attributes.
  40. #' @export
  41. #' @examples
  42. #' \dontrun{
  43. #' data(iris)
  44. #' #Takes some time, so be patient
  45. #' Boruta(Species~.,data=iris,doTrace=2)->Bor.iris
  46. #' print(Bor.iris)
  47. #' print(getSelectedAttributes(Bor.iris))
  48. #' }
  49. getSelectedAttributes<-function(x,withTentative=FALSE){
  50. if(class(x)!='Boruta') stop('This function needs Boruta object as an argument.')
  51. names(x$finalDecision)[
  52. x$finalDecision%in%(if(!withTentative) "Confirmed" else c("Confirmed","Tentative"))
  53. ]
  54. }
  55. #' Rough fix of Tentative attributes
  56. #'
  57. #' In some circumstances (too short Boruta run, unfortunate mixing of shadow attributes, tricky dataset\ldots), Boruta can leave some attributes Tentative.
  58. #' \code{TentativeRoughFix} performs a simplified, weaker test for judging such attributes.
  59. #' @param x an object of a class Boruta.
  60. #' @param averageOver Either number of last importance source runs to
  61. #' average over or Inf for averaging over the whole Boruta run.
  62. #' @return A Boruta class object with modified \code{finalDecision} element.
  63. #' Such object has few additional elements:
  64. #' \item{originalDecision}{Original \code{finalDecision}.}
  65. #' \item{averageOver}{Copy of \code{averageOver} parameter.}
  66. #' @details Function claims as Confirmed those attributes that
  67. #' have median importance higher than the median importance of
  68. #' maximal shadow attribute, and the rest as Rejected.
  69. #' Depending of the user choice, medians for the test
  70. #' are count over last round, all rounds or N last
  71. #' importance source runs.
  72. #' @note This function should be used only when strict decision is
  73. #' highly desired, because this test is much weaker than Boruta
  74. #' and can lower the confidence of the final result.
  75. #' @note \code{x} has to be made with \code{holdHistory} set to
  76. #' \code{TRUE} for this code to run.
  77. #' @export
  78. TentativeRoughFix<-function(x,averageOver=Inf){
  79. if(!inherits(x,'Boruta'))
  80. stop('This function needs Boruta object as an argument.')
  81. if(is.null(x$ImpHistory))
  82. stop('Importance history was not stored during the Boruta run.')
  83. if(!is.numeric(averageOver))
  84. stop('averageOver should be a numeric vector.')
  85. if(length(averageOver)!=1)
  86. stop('averageOver should be a one-element vector.')
  87. if(averageOver<1)
  88. stop('averageOver should be positive.')
  89. tentIdx<-which(x$finalDecision=='Tentative')
  90. if(length(tentIdx)==0){
  91. warning('There are no Tentative attributes! Returning original object.')
  92. return(x)
  93. }
  94. nRuns<-dim(x$ImpHistory)[1]
  95. if(averageOver>nRuns)
  96. averageOver<-nRuns
  97. impHistorySubset<-x$ImpHistory[(nRuns-averageOver+1):nRuns,]
  98. medianTentImp<-sapply(impHistorySubset[,tentIdx],stats::median)
  99. medianShaMaxImp<-stats::median(impHistorySubset[,'shadowMax'])
  100. medianTentImp>medianShaMaxImp->toOrdain
  101. ans<-x
  102. ans$roughfixed<-TRUE
  103. ans$averageOver<-averageOver
  104. ans$originalDecision<-x$finalDecision
  105. ans$finalDecision[tentIdx[toOrdain]]<-'Confirmed'
  106. ans$finalDecision[tentIdx[!toOrdain]]<-'Rejected'
  107. return(ans)
  108. }
  109. ##generateCol is internally used by plot.Boruta and plotImpHistory
  110. generateCol<-function(x,colCode,col,numShadow){
  111. #Checking arguments
  112. if(is.null(col) & length(colCode)!=4)
  113. stop('colCode should have 4 elements.')
  114. #Generating col
  115. if(is.null(col)){
  116. rep(colCode[4],length(x$finalDecision)+numShadow)->cc
  117. cc[c(x$finalDecision=='Confirmed',rep(FALSE,numShadow))]<-colCode[1]
  118. cc[c(x$finalDecision=='Tentative',rep(FALSE,numShadow))]<-colCode[2]
  119. cc[c(x$finalDecision=='Rejected',rep(FALSE,numShadow))]<-colCode[3]
  120. col=cc
  121. }
  122. return(col)
  123. }
  124. #' Plot Boruta object
  125. #'
  126. #' Default plot method for Boruta objects, showing boxplots of attribute importances over run.
  127. #' @method plot Boruta
  128. #' @param x an object of a class Boruta.
  129. #' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
  130. #' @param sort controls whether boxplots should be ordered, or left in original order.
  131. #' @param whichShadow a logical vector controlling which shadows should be drawn; switches respectively max shadow, mean shadow and min shadow.
  132. #' @param col standard \code{col} attribute. If given, suppresses effects of \code{colCode}.
  133. #' @param xlab X axis label that will be passed to \code{\link{boxplot}}.
  134. #' @param ylab Y axis label that will be passed to \code{\link{boxplot}}.
  135. #' @param ... additional graphical parameter that will be passed to \code{\link{boxplot}}.
  136. #' @note If \code{col} is given and \code{sort} is \code{TRUE}, the \code{col} will be permuted, so that its order corresponds to attribute order in \code{ImpHistory}.
  137. #' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
  138. #' @return Invisible copy of \code{x}.
  139. #' @examples
  140. #' \dontrun{
  141. #' library(mlbench); data(HouseVotes84)
  142. #' na.omit(HouseVotes84)->hvo
  143. #' #Takes some time, so be patient
  144. #' Boruta(Class~.,data=hvo,doTrace=2)->Bor.hvo
  145. #' print(Bor.hvo)
  146. #' plot(Bor.hvo)
  147. #' }
  148. #' @export
  149. plot.Boruta<-function(x,colCode=c('green','yellow','red','blue'),sort=TRUE,whichShadow=c(TRUE,TRUE,TRUE),
  150. col=NULL,xlab='Attributes',ylab='Importance',...){
  151. #Checking arguments
  152. if(class(x)!='Boruta')
  153. stop('This function needs Boruta object as an argument.')
  154. if(is.null(x$ImpHistory))
  155. stop('Importance history was not stored during the Boruta run.')
  156. #Removal of -Infs and conversion to a list
  157. lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
  158. colnames(x$ImpHistory)->names(lz)
  159. #Selection of shadow meta-attributes
  160. numShadow<-sum(whichShadow)
  161. lz[c(rep(TRUE,length(x$finalDecision)),whichShadow)]->lz
  162. #Generating color vector
  163. col<-generateCol(x,colCode,col,numShadow)
  164. #Ordering boxes due to attribute median importance
  165. if(sort){
  166. ii<-order(sapply(lz,stats::median))
  167. lz[ii]->lz; col<-col[ii]
  168. }
  169. #Final plotting
  170. graphics::boxplot(lz,xlab=xlab,ylab=ylab,col=col,...)
  171. invisible(x)
  172. }
  173. #' Plot Boruta object as importance history
  174. #'
  175. #' Alternative plot method for Boruta objects, showing matplot of attribute importances over run.
  176. #' @param x an object of a class Boruta.
  177. #' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
  178. #' @param col standard \code{col} attribute, passed to \code{\link{matplot}}. If given, suppresses effects of \code{colCode}.
  179. #' @param type Plot type that will be passed to \code{\link{matplot}}.
  180. #' @param lty Line type that will be passed to \code{\link{matplot}}.
  181. #' @param pch Point mark type that will be passed to \code{\link{matplot}}.
  182. #' @param xlab X axis label that will be passed to \code{\link{matplot}}.
  183. #' @param ylab Y axis label that will be passed to \code{\link{matplot}}.
  184. #' @param ... additional graphical parameter that will be passed to \code{\link{matplot}}.
  185. #' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
  186. #' @return Invisible copy of \code{x}.
  187. #' @examples
  188. #' \dontrun{
  189. #' library(mlbench); data(Sonar)
  190. #' #Takes some time, so be patient
  191. #' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
  192. #' print(Bor.son)
  193. #' plotImpHistory(Bor.son)
  194. #' }
  195. #' @export
  196. plotImpHistory<-function(x,colCode=c('green','yellow','red','blue'),col=NULL,type="l",lty=1,pch=0,
  197. xlab='Classifier run',ylab='Importance',...){
  198. #Checking arguments
  199. if(class(x)!='Boruta')
  200. stop('This function needs Boruta object as an argument.')
  201. if(is.null(x$ImpHistory))
  202. stop('Importance history was not stored during the Boruta run.')
  203. col<-generateCol(x,colCode,col,3)
  204. #Final plotting
  205. graphics::matplot(0:(nrow(x$ImpHistory)-1),x$ImpHistory,xlab=xlab,ylab=ylab,col=col,type=type,lty=lty,pch=pch,...)
  206. invisible(x)
  207. }
  208. #' Export Boruta result as a formula
  209. #'
  210. #' Functions which convert the Boruta selection into a formula, so that it could be passed further to other functions.
  211. #' @param x an object of a class Boruta, made using a formula interface.
  212. #' @return Formula, corresponding to the Boruta results.
  213. #' \code{getConfirmedFormula} returns only Confirmed attributes, \code{getNonRejectedFormula} also adds Tentative ones.
  214. #' @note This operation is possible only when Boruta selection was invoked using a formula interface.
  215. #' @rdname getFormulae
  216. #' @export
  217. getConfirmedFormula<-function(x){
  218. if(!inherits(x,'Boruta'))
  219. stop('This function needs Boruta object as an argument.')
  220. if(is.null(x$call[["formula"]]))
  221. stop('The model for this Boruta run was not a formula.')
  222. deparse(x$call[["formula"]][[2]])->dec
  223. preds<-paste(names(x$finalDecision)[x$finalDecision=='Confirmed'],collapse="+")
  224. return(stats::as.formula(sprintf('%s~%s',dec,preds)))
  225. }
  226. #' @rdname getFormulae
  227. #' @export
  228. getNonRejectedFormula<-function(x){
  229. if(!inherits(x,'Boruta'))
  230. stop('This function needs Boruta object as an argument.')
  231. if(is.null(x$call[["formula"]]))
  232. stop('The model for this Boruta run was not a formula.')
  233. deparse(x$call[["formula"]][[2]])->dec
  234. preds<-paste(names(x$finalDecision)[x$finalDecision!='Rejected'],collapse="+")
  235. return(stats::as.formula(sprintf('%s~%s',dec,preds)))
  236. }