ferns.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. /* R front-end to C code
  2. Copyright 2011-2018 Miron B. Kursa
  3. This file is part of rFerns R package.
  4. rFerns is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  5. rFerns is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  6. You should have received a copy of the GNU General Public License along with rFerns. If not, see http://www.gnu.org/licenses/.
  7. */
  8. #include <R.h>
  9. #include <Rdefines.h>
  10. #include <Rinternals.h>
  11. #include <R_ext/Utils.h>
  12. #include <R_ext/Rdynload.h>
  13. #include <R_ext/Visibility.h>
  14. #define PRINT Rprintf
  15. #define IN_R 7
  16. #include "tools.h"
  17. #include "fern.h"
  18. #include "forest.h"
  19. void loadAttributes(SEXP sAttributes,struct attribute **X,uint *nAtt,uint *nObj){
  20. //We assume sAttributes is a data.frame, so a list of attributes
  21. nAtt[0]=length(sAttributes);
  22. nObj[0]=length(VECTOR_ELT(sAttributes,0));
  23. X[0]=(struct attribute*)R_alloc(sizeof(struct attribute),nAtt[0]);
  24. for(uint e=0;e<nAtt[0];e++){
  25. SEXP xe=VECTOR_ELT(sAttributes,e);
  26. switch(TYPEOF(xe)){
  27. case REALSXP:
  28. X[0][e].numCat=0;
  29. X[0][e].x=(void*)REAL(xe);
  30. break;
  31. case INTSXP:
  32. X[0][e].numCat=length(getAttrib(xe,R_LevelsSymbol));
  33. if(X[0][e].numCat==0) X[0][e].numCat=-1;
  34. X[0][e].x=(void*)INTEGER(xe);
  35. break;
  36. default:
  37. error("Bad input in predictors!");
  38. }
  39. }
  40. }
  41. SEXP random_ferns(SEXP sAttributes,SEXP sDecision,SEXP sD,SEXP sNumFerns,SEXP sCalcImp,SEXP sHoldForest,SEXP sMultilabel,SEXP sConsSeed,SEXP sThreads){
  42. struct attribute *X;
  43. uint nAtt,nObj,nClass,*Y;
  44. uint multi=INTEGER(sMultilabel)[0];
  45. loadAttributes(sAttributes,&X,&nAtt,&nObj);
  46. if(!multi){
  47. nClass=length(getAttrib(sDecision,R_LevelsSymbol));
  48. //Sadly, we need to copy
  49. Y=(uint*)R_alloc(sizeof(uint),nObj);
  50. for(uint e=0;e<nObj;e++)
  51. Y[e]=INTEGER(sDecision)[e]-1;
  52. }else{
  53. nClass=length(sDecision)/nObj;
  54. Y=(uint*)R_alloc(sizeof(uint),nObj*nClass);
  55. for(uint e=0;e<nObj*nClass;e++)
  56. Y[e]=LOGICAL(sDecision)[e];
  57. }
  58. //Now, let's make the RNG and seed from R's RNG
  59. EMERGE_R_FROM_R;
  60. //Parse Threads and consult with OMP
  61. if(isInteger(sThreads) && length(sThreads)!=1) error("Invalid threads argument");
  62. int nt=INTEGER(sThreads)[0];
  63. if(nt<0) error("Invalid threads argument");
  64. int mt=omp_get_max_threads();
  65. if(nt==0) nt=mt;
  66. if(nt>mt) warning("Thread count capped to %d",mt);
  67. nt=nt>mt?mt:nt;
  68. //Data loaded, time to load parameters
  69. params Q;
  70. Q.numClasses=nClass;
  71. Q.D=INTEGER(sD)[0];
  72. Q.twoToD=1<<(Q.D);
  73. Q.numFerns=INTEGER(sNumFerns)[0];
  74. Q.calcImp=INTEGER(sCalcImp)[0]; //0->none, 1->msl, 2->msl+sha
  75. Q.holdForest=INTEGER(sHoldForest)[0];
  76. Q.multilabel=multi;
  77. Q.threads=nt;
  78. if(Q.calcImp==2){
  79. Q.consSeed=((uint32_t*)INTEGER(sConsSeed))[0];
  80. }else{
  81. Q.consSeed=0;
  82. }
  83. //Start composing answer
  84. SEXP sAns; PROTECT(sAns=allocVector(VECSXP,4));
  85. //Allocating fern forest; the whole space is controlled by R
  86. ferns ferns;
  87. SEXP sfSplitAtts=R_NilValue;
  88. SEXP sfScores=R_NilValue;
  89. if(Q.holdForest){
  90. //To store the forest, we allocate vectors which will contain it
  91. // and build ferns out of their buffers. The rest is in saving forest.
  92. PROTECT(sfSplitAtts=allocVector(INTSXP,(Q.D)*(Q.numFerns)));
  93. ferns.splitAtts=INTEGER(sfSplitAtts);
  94. ferns.thresholds=(thresh*)R_alloc(sizeof(thresh),(Q.D)*(Q.numFerns));
  95. PROTECT(sfScores=allocVector(REALSXP,(Q.twoToD)*(Q.numClasses)*(Q.numFerns)));
  96. ferns.scores=(score_t*)REAL(sfScores);
  97. }else{
  98. //In the opposite case, we allocate a chunk for 1-fern forest on GC heap
  99. ferns.splitAtts=(int*)R_alloc(Q.D*nt,sizeof(int));
  100. ferns.thresholds=(thresh*)R_alloc(Q.D*nt,sizeof(thresh));
  101. ferns.scores=(double*)R_alloc((Q.numClasses)*(Q.twoToD)*nt,sizeof(score_t));
  102. }
  103. //Fire the code
  104. model *M=makeModel(X,nAtt,Y,nObj,&ferns,&Q,_R);
  105. //Saving forest
  106. if(Q.holdForest){
  107. SEXP sfThreReal; PROTECT(sfThreReal=allocVector(REALSXP,(Q.D)*(Q.numFerns)));
  108. SEXP sfThreInt; PROTECT(sfThreInt=allocVector(INTSXP,(Q.D)*(Q.numFerns)));
  109. for(uint e=0;e<(Q.D)*(Q.numFerns);e++){
  110. if(X[ferns.splitAtts[e]].numCat!=0){
  111. INTEGER(sfThreInt)[e]=ferns.thresholds[e].selection;
  112. REAL(sfThreReal)[e]=NAN;
  113. }else{
  114. INTEGER(sfThreInt)[e]=-1;
  115. REAL(sfThreReal)[e]=ferns.thresholds[e].value;
  116. }
  117. }
  118. SEXP sModel; PROTECT(sModel=allocVector(VECSXP,4));
  119. SET_VECTOR_ELT(sModel,0,sfSplitAtts);
  120. SET_VECTOR_ELT(sModel,1,sfThreReal);
  121. SET_VECTOR_ELT(sModel,2,sfThreInt);
  122. SET_VECTOR_ELT(sModel,3,sfScores);
  123. SEXP sModelNames; PROTECT(sModelNames=NEW_CHARACTER(4));
  124. SET_STRING_ELT(sModelNames,0,mkChar("splitAttIdxs"));
  125. SET_STRING_ELT(sModelNames,1,mkChar("threReal"));
  126. SET_STRING_ELT(sModelNames,2,mkChar("threInteger"));
  127. SET_STRING_ELT(sModelNames,3,mkChar("scores"));
  128. setAttrib(sModel,R_NamesSymbol,sModelNames);
  129. SET_VECTOR_ELT(sAns,0,sModel);
  130. UNPROTECT(6);
  131. //UPs: sModelNames, sModel, sfThreInt, sfThreReal, sfSplitAtts, sfScores
  132. //Left: sAns
  133. }else{
  134. SET_VECTOR_ELT(sAns,0,R_NilValue);
  135. }
  136. //Currently it always happens
  137. if(M->oobPreds){
  138. //Build score matrix for R, with NAs for object which were never OOB
  139. SEXP sOobScores; PROTECT(sOobScores=allocVector(REALSXP,(Q.numClasses)*nObj));
  140. SEXP sOobDim; PROTECT(sOobDim=allocVector(INTSXP,2));
  141. INTEGER(sOobDim)[0]=Q.numClasses;
  142. INTEGER(sOobDim)[1]=nObj;
  143. double *tmp=REAL(sOobScores);
  144. for(uint e=0;e<nObj;e++)
  145. if(M->oobOutOfBagC[e])
  146. for(uint ee=0;ee<Q.numClasses;ee++)
  147. tmp[e*Q.numClasses+ee]=M->oobPreds[e*Q.numClasses+ee];
  148. else
  149. for(uint ee=0;ee<Q.numClasses;ee++)
  150. tmp[e*Q.numClasses+ee]=NA_REAL;
  151. setAttrib(sOobScores,R_DimSymbol,sOobDim);
  152. SET_VECTOR_ELT(sAns,1,sOobScores);
  153. UNPROTECT(2);
  154. //UPs: sOobScores, sOobDim
  155. //Left: sAns
  156. if(!multi){
  157. //Do actual voting on this matrix; push NA for never-in-OOBs and
  158. //pseudo-random-of-max for ties.
  159. SEXP sOobPreds; PROTECT(sOobPreds=allocVector(INTSXP,nObj));
  160. sint *winningClass=INTEGER(sOobPreds);
  161. for(uint e=0;e<nObj;e++)
  162. if(M->oobOutOfBagC[e]){
  163. winningClass[e]=whichMaxTieAware(&(M->oobPreds[e*Q.numClasses]),Q.numClasses,e);
  164. } else winningClass[e]=NA_INTEGER;
  165. SET_VECTOR_ELT(sAns,3,sOobPreds);
  166. UNPROTECT(1);
  167. //UPs: sOobPreds
  168. //Left: sAns
  169. }else{
  170. SET_VECTOR_ELT(sAns,3,R_NilValue);
  171. }
  172. }else{
  173. SET_VECTOR_ELT(sAns,1,R_NilValue);
  174. SET_VECTOR_ELT(sAns,3,R_NilValue);
  175. }
  176. if(M->imp){
  177. SEXP sImp;
  178. if(Q.calcImp==1){
  179. PROTECT(sImp=allocVector(REALSXP,nAtt*2));
  180. double *tmp=REAL(sImp);
  181. for(uint e=0;e<nAtt;e++)
  182. tmp[e]=M->imp[e];
  183. for(uint e=0;e<nAtt;e++)
  184. tmp[e+nAtt]=M->try[e];
  185. }else{
  186. PROTECT(sImp=allocVector(REALSXP,nAtt*3));
  187. double *tmp=REAL(sImp);
  188. for(uint e=0;e<nAtt;e++)
  189. tmp[e]=M->imp[e];
  190. for(uint e=0;e<nAtt;e++)
  191. tmp[e+nAtt]=M->shimp[e];
  192. for(uint e=0;e<nAtt;e++)
  193. tmp[e+nAtt*2]=M->try[e];
  194. }
  195. SET_VECTOR_ELT(sAns,2,sImp);
  196. UNPROTECT(1);
  197. //UPs: sImp, one or another
  198. //Left: sAns
  199. }else{
  200. SET_VECTOR_ELT(sAns,2,R_NilValue);
  201. }
  202. //Set names
  203. SEXP sAnsNames;
  204. PROTECT(sAnsNames=NEW_CHARACTER(4));
  205. SET_STRING_ELT(sAnsNames,0,mkChar("model"));
  206. SET_STRING_ELT(sAnsNames,1,mkChar("oobScores"));
  207. SET_STRING_ELT(sAnsNames,2,mkChar("importance"));
  208. SET_STRING_ELT(sAnsNames,3,mkChar("oobPreds"));
  209. setAttrib(sAns,R_NamesSymbol,sAnsNames);
  210. UNPROTECT(2);
  211. //UPs: sAnsNames, sAns
  212. //Left: nothing
  213. killModel(M);
  214. return(sAns);
  215. }
  216. SEXP random_ferns_predict(SEXP sAttributes,SEXP sModel,SEXP sD,SEXP sNumFerns,SEXP sNumClasses,SEXP sMode,SEXP sMultilabel){
  217. struct attribute *X;
  218. uint nAtt,nObj;
  219. loadAttributes(sAttributes,&X,&nAtt,&nObj);
  220. //Data loaded, time to load parameters
  221. params Q;
  222. uint nClass=INTEGER(sNumClasses)[0];
  223. uint multi=INTEGER(sMultilabel)[0];
  224. Q.numClasses=nClass;
  225. Q.D=INTEGER(sD)[0];
  226. Q.twoToD=1<<(Q.D);
  227. Q.numFerns=INTEGER(sNumFerns)[0];
  228. Q.multilabel=multi;
  229. //Deciphering model -- WARNING, order of Model list is SIGNIFICANT!
  230. ferns ferns;
  231. ferns.splitAtts=INTEGER(VECTOR_ELT(sModel,0));
  232. ferns.scores=(score_t*)REAL(VECTOR_ELT(sModel,3));
  233. sint *tI=INTEGER(VECTOR_ELT(sModel,2));
  234. double *tR=REAL(VECTOR_ELT(sModel,1));
  235. ferns.thresholds=(thresh*)R_alloc(sizeof(thresh),(Q.D)*(Q.numFerns));
  236. for(uint e=0;e<(Q.D)*(Q.numFerns);e++)
  237. if(!ISNAN(tR[e]))
  238. ferns.thresholds[e].value=tR[e];
  239. else
  240. ferns.thresholds[e].selection=tI[e];
  241. if(INTEGER(sMode)[0]==0 && !multi){
  242. EMERGE_R_FROM_R;
  243. SEXP sAns; PROTECT(sAns=allocVector(INTSXP,nObj));
  244. sint *yp=INTEGER(sAns);
  245. double *buf_sans=(double*)R_alloc(sizeof(double),(Q.numClasses)*nObj);
  246. predictWithModelSimple(X,nAtt,nObj,&ferns,(uint*)yp,_SIMPPQ(Q),buf_sans,_R);
  247. UNPROTECT(1);
  248. return(sAns);
  249. }else{
  250. SEXP sAns; PROTECT(sAns=allocVector(REALSXP,nObj*(Q.numClasses)));
  251. double *yp=REAL(sAns);
  252. uint *buf_idx=(uint*)R_alloc(sizeof(double),nObj);
  253. predictWithModelScores(X,nAtt,nObj,&ferns,(double*)yp,_SIMPPQ(Q),buf_idx);
  254. UNPROTECT(1);
  255. return(sAns);
  256. }
  257. }
  258. #define CALLDEF(name, n) {#name, (DL_FUNC) &name, n}
  259. static const R_CallMethodDef R_CallDef[]={
  260. CALLDEF(random_ferns,9),
  261. CALLDEF(random_ferns_predict,7),
  262. {NULL,NULL,0}
  263. };
  264. void attribute_visible R_init_rFerns(DllInfo *dll){
  265. R_registerRoutines(dll,NULL,R_CallDef,NULL,NULL);
  266. R_useDynamicSymbols(dll,FALSE);
  267. R_forceSymbols(dll,TRUE);
  268. }