Lcp.cpp 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645
  1. /*
  2. ===========================================================================
  3. Doom 3 GPL Source Code
  4. Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
  6. Doom 3 Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #include "../precompiled.h"
  21. #pragma hdrstop
  22. static idCVar lcp_showFailures( "lcp_showFailures", "0", CVAR_SYSTEM | CVAR_BOOL, "show LCP solver failures" );
  23. const float LCP_BOUND_EPSILON = 1e-5f;
  24. const float LCP_ACCEL_EPSILON = 1e-5f;
  25. const float LCP_DELTA_ACCEL_EPSILON = 1e-9f;
  26. const float LCP_DELTA_FORCE_EPSILON = 1e-9f;
  27. #define IGNORE_UNSATISFIABLE_VARIABLES
  28. //===============================================================
  29. // M
  30. // idLCP_Square MrE
  31. // E
  32. //===============================================================
  33. class idLCP_Square : public idLCP {
  34. public:
  35. virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
  36. private:
  37. idMatX m; // original matrix
  38. idVecX b; // right hand side
  39. idVecX lo, hi; // low and high bounds
  40. idVecX f, a; // force and acceleration
  41. idVecX delta_f, delta_a; // delta force and delta acceleration
  42. idMatX clamped; // LU factored sub matrix for clamped variables
  43. idVecX diagonal; // reciprocal of diagonal of U of the LU factored sub matrix for clamped variables
  44. int numUnbounded; // number of unbounded variables
  45. int numClamped; // number of clamped variables
  46. float ** rowPtrs; // pointers to the rows of m
  47. int * boxIndex; // box index
  48. int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
  49. int * permuted; // index to keep track of the permutation
  50. bool padded; // set to true if the rows of the initial matrix are 16 byte padded
  51. private:
  52. bool FactorClamped( void );
  53. void SolveClamped( idVecX &x, const float *b );
  54. void Swap( int i, int j );
  55. void AddClamped( int r );
  56. void RemoveClamped( int r );
  57. void CalcForceDelta( int d, float dir );
  58. void CalcAccelDelta( int d );
  59. void ChangeForce( int d, float step );
  60. void ChangeAccel( int d, float step );
  61. void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
  62. };
  63. /*
  64. ============
  65. idLCP_Square::FactorClamped
  66. ============
  67. */
  68. bool idLCP_Square::FactorClamped( void ) {
  69. int i, j, k;
  70. float s, d;
  71. for ( i = 0; i < numClamped; i++ ) {
  72. memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
  73. }
  74. for ( i = 0; i < numClamped; i++ ) {
  75. s = idMath::Fabs( clamped[i][i] );
  76. if ( s == 0.0f ) {
  77. return false;
  78. }
  79. diagonal[i] = d = 1.0f / clamped[i][i];
  80. for ( j = i + 1; j < numClamped; j++ ) {
  81. clamped[j][i] *= d;
  82. }
  83. for ( j = i + 1; j < numClamped; j++ ) {
  84. d = clamped[j][i];
  85. for ( k = i + 1; k < numClamped; k++ ) {
  86. clamped[j][k] -= d * clamped[i][k];
  87. }
  88. }
  89. }
  90. return true;
  91. }
  92. /*
  93. ============
  94. idLCP_Square::SolveClamped
  95. ============
  96. */
  97. void idLCP_Square::SolveClamped( idVecX &x, const float *b ) {
  98. int i, j;
  99. float sum;
  100. // solve L
  101. for ( i = 0; i < numClamped; i++ ) {
  102. sum = b[i];
  103. for ( j = 0; j < i; j++ ) {
  104. sum -= clamped[i][j] * x[j];
  105. }
  106. x[i] = sum;
  107. }
  108. // solve U
  109. for ( i = numClamped - 1; i >= 0; i-- ) {
  110. sum = x[i];
  111. for ( j = i + 1; j < numClamped; j++ ) {
  112. sum -= clamped[i][j] * x[j];
  113. }
  114. x[i] = sum * diagonal[i];
  115. }
  116. }
  117. /*
  118. ============
  119. idLCP_Square::Swap
  120. ============
  121. */
  122. void idLCP_Square::Swap( int i, int j ) {
  123. if ( i == j ) {
  124. return;
  125. }
  126. idSwap( rowPtrs[i], rowPtrs[j] );
  127. m.SwapColumns( i, j );
  128. b.SwapElements( i, j );
  129. lo.SwapElements( i, j );
  130. hi.SwapElements( i, j );
  131. a.SwapElements( i, j );
  132. f.SwapElements( i, j );
  133. if ( boxIndex ) {
  134. idSwap( boxIndex[i], boxIndex[j] );
  135. }
  136. idSwap( side[i], side[j] );
  137. idSwap( permuted[i], permuted[j] );
  138. }
  139. /*
  140. ============
  141. idLCP_Square::AddClamped
  142. ============
  143. */
  144. void idLCP_Square::AddClamped( int r ) {
  145. int i, j;
  146. float sum;
  147. assert( r >= numClamped );
  148. // add a row at the bottom and a column at the right of the factored
  149. // matrix for the clamped variables
  150. Swap( numClamped, r );
  151. // add row to L
  152. for ( i = 0; i < numClamped; i++ ) {
  153. sum = rowPtrs[numClamped][i];
  154. for ( j = 0; j < i; j++ ) {
  155. sum -= clamped[numClamped][j] * clamped[j][i];
  156. }
  157. clamped[numClamped][i] = sum * diagonal[i];
  158. }
  159. // add column to U
  160. for ( i = 0; i <= numClamped; i++ ) {
  161. sum = rowPtrs[i][numClamped];
  162. for ( j = 0; j < i; j++ ) {
  163. sum -= clamped[i][j] * clamped[j][numClamped];
  164. }
  165. clamped[i][numClamped] = sum;
  166. }
  167. diagonal[numClamped] = 1.0f / clamped[numClamped][numClamped];
  168. numClamped++;
  169. }
  170. /*
  171. ============
  172. idLCP_Square::RemoveClamped
  173. ============
  174. */
  175. void idLCP_Square::RemoveClamped( int r ) {
  176. int i, j;
  177. float *y0, *y1, *z0, *z1;
  178. double diag, beta0, beta1, p0, p1, q0, q1, d;
  179. assert( r < numClamped );
  180. numClamped--;
  181. // no need to swap and update the factored matrix when the last row and column are removed
  182. if ( r == numClamped ) {
  183. return;
  184. }
  185. y0 = (float *) _alloca16( numClamped * sizeof( float ) );
  186. z0 = (float *) _alloca16( numClamped * sizeof( float ) );
  187. y1 = (float *) _alloca16( numClamped * sizeof( float ) );
  188. z1 = (float *) _alloca16( numClamped * sizeof( float ) );
  189. // the row/column need to be subtracted from the factorization
  190. for ( i = 0; i < numClamped; i++ ) {
  191. y0[i] = -rowPtrs[i][r];
  192. }
  193. memset( y1, 0, numClamped * sizeof( float ) );
  194. y1[r] = 1.0f;
  195. memset( z0, 0, numClamped * sizeof( float ) );
  196. z0[r] = 1.0f;
  197. for ( i = 0; i < numClamped; i++ ) {
  198. z1[i] = -rowPtrs[r][i];
  199. }
  200. // swap the to be removed row/column with the last row/column
  201. Swap( r, numClamped );
  202. // the swapped last row/column need to be added to the factorization
  203. for ( i = 0; i < numClamped; i++ ) {
  204. y0[i] += rowPtrs[i][r];
  205. }
  206. for ( i = 0; i < numClamped; i++ ) {
  207. z1[i] += rowPtrs[r][i];
  208. }
  209. z1[r] = 0.0f;
  210. // update the beginning of the to be updated row and column
  211. for ( i = 0; i < r; i++ ) {
  212. p0 = y0[i];
  213. beta1 = z1[i] * diagonal[i];
  214. clamped[i][r] += p0;
  215. for ( j = i+1; j < numClamped; j++ ) {
  216. z1[j] -= beta1 * clamped[i][j];
  217. }
  218. for ( j = i+1; j < numClamped; j++ ) {
  219. y0[j] -= p0 * clamped[j][i];
  220. }
  221. clamped[r][i] += beta1;
  222. }
  223. // update the lower right corner starting at r,r
  224. for ( i = r; i < numClamped; i++ ) {
  225. diag = clamped[i][i];
  226. p0 = y0[i];
  227. p1 = z0[i];
  228. diag += p0 * p1;
  229. if ( diag == 0.0f ) {
  230. idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
  231. return;
  232. }
  233. beta0 = p1 / diag;
  234. q0 = y1[i];
  235. q1 = z1[i];
  236. diag += q0 * q1;
  237. if ( diag == 0.0f ) {
  238. idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
  239. return;
  240. }
  241. d = 1.0f / diag;
  242. beta1 = q1 * d;
  243. clamped[i][i] = diag;
  244. diagonal[i] = d;
  245. for ( j = i+1; j < numClamped; j++ ) {
  246. d = clamped[i][j];
  247. d += p0 * z0[j];
  248. z0[j] -= beta0 * d;
  249. d += q0 * z1[j];
  250. z1[j] -= beta1 * d;
  251. clamped[i][j] = d;
  252. }
  253. for ( j = i+1; j < numClamped; j++ ) {
  254. d = clamped[j][i];
  255. y0[j] -= p0 * d;
  256. d += beta0 * y0[j];
  257. y1[j] -= q0 * d;
  258. d += beta1 * y1[j];
  259. clamped[j][i] = d;
  260. }
  261. }
  262. return;
  263. }
  264. /*
  265. ============
  266. idLCP_Square::CalcForceDelta
  267. modifies this->delta_f
  268. ============
  269. */
  270. ID_INLINE void idLCP_Square::CalcForceDelta( int d, float dir ) {
  271. int i;
  272. float *ptr;
  273. delta_f[d] = dir;
  274. if ( numClamped == 0 ) {
  275. return;
  276. }
  277. // get column d of matrix
  278. ptr = (float *) _alloca16( numClamped * sizeof( float ) );
  279. for ( i = 0; i < numClamped; i++ ) {
  280. ptr[i] = rowPtrs[i][d];
  281. }
  282. // solve force delta
  283. SolveClamped( delta_f, ptr );
  284. // flip force delta based on direction
  285. if ( dir > 0.0f ) {
  286. ptr = delta_f.ToFloatPtr();
  287. for ( i = 0; i < numClamped; i++ ) {
  288. ptr[i] = - ptr[i];
  289. }
  290. }
  291. }
  292. /*
  293. ============
  294. idLCP_Square::CalcAccelDelta
  295. modifies this->delta_a and uses this->delta_f
  296. ============
  297. */
  298. ID_INLINE void idLCP_Square::CalcAccelDelta( int d ) {
  299. int j;
  300. float dot;
  301. // only the not clamped variables, including the current variable, can have a change in acceleration
  302. for ( j = numClamped; j <= d; j++ ) {
  303. // only the clamped variables and the current variable have a force delta unequal zero
  304. SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
  305. delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
  306. }
  307. }
  308. /*
  309. ============
  310. idLCP_Square::ChangeForce
  311. modifies this->f and uses this->delta_f
  312. ============
  313. */
  314. ID_INLINE void idLCP_Square::ChangeForce( int d, float step ) {
  315. // only the clamped variables and current variable have a force delta unequal zero
  316. SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
  317. f[d] += step * delta_f[d];
  318. }
  319. /*
  320. ============
  321. idLCP_Square::ChangeAccel
  322. modifies this->a and uses this->delta_a
  323. ============
  324. */
  325. ID_INLINE void idLCP_Square::ChangeAccel( int d, float step ) {
  326. // only the not clamped variables, including the current variable, can have an acceleration unequal zero
  327. SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
  328. }
  329. /*
  330. ============
  331. idLCP_Square::GetMaxStep
  332. ============
  333. */
  334. void idLCP_Square::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
  335. int i;
  336. float s;
  337. // default to a full step for the current variable
  338. if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
  339. maxStep = -a[d] / delta_a[d];
  340. } else {
  341. maxStep = 0.0f;
  342. }
  343. limit = d;
  344. limitSide = 0;
  345. // test the current variable
  346. if ( dir < 0.0f ) {
  347. if ( lo[d] != -idMath::INFINITY ) {
  348. s = ( lo[d] - f[d] ) / dir;
  349. if ( s < maxStep ) {
  350. maxStep = s;
  351. limitSide = -1;
  352. }
  353. }
  354. } else {
  355. if ( hi[d] != idMath::INFINITY ) {
  356. s = ( hi[d] - f[d] ) / dir;
  357. if ( s < maxStep ) {
  358. maxStep = s;
  359. limitSide = 1;
  360. }
  361. }
  362. }
  363. // test the clamped bounded variables
  364. for ( i = numUnbounded; i < numClamped; i++ ) {
  365. if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
  366. // if there is a low boundary
  367. if ( lo[i] != -idMath::INFINITY ) {
  368. s = ( lo[i] - f[i] ) / delta_f[i];
  369. if ( s < maxStep ) {
  370. maxStep = s;
  371. limit = i;
  372. limitSide = -1;
  373. }
  374. }
  375. } else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
  376. // if there is a high boundary
  377. if ( hi[i] != idMath::INFINITY ) {
  378. s = ( hi[i] - f[i] ) / delta_f[i];
  379. if ( s < maxStep ) {
  380. maxStep = s;
  381. limit = i;
  382. limitSide = 1;
  383. }
  384. }
  385. }
  386. }
  387. // test the not clamped bounded variables
  388. for ( i = numClamped; i < d; i++ ) {
  389. if ( side[i] == -1 ) {
  390. if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
  391. continue;
  392. }
  393. } else if ( side[i] == 1 ) {
  394. if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
  395. continue;
  396. }
  397. } else {
  398. continue;
  399. }
  400. // ignore variables for which the force is not allowed to take any substantial value
  401. if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
  402. continue;
  403. }
  404. s = -a[i] / delta_a[i];
  405. if ( s < maxStep ) {
  406. maxStep = s;
  407. limit = i;
  408. limitSide = 0;
  409. }
  410. }
  411. }
  412. /*
  413. ============
  414. idLCP_Square::Solve
  415. ============
  416. */
  417. bool idLCP_Square::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
  418. int i, j, n, limit, limitSide, boxStartIndex;
  419. float dir, maxStep, dot, s;
  420. char *failed;
  421. // true when the matrix rows are 16 byte padded
  422. padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
  423. assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
  424. assert( o_x.GetSize() == o_m.GetNumRows() );
  425. assert( o_b.GetSize() == o_m.GetNumRows() );
  426. assert( o_lo.GetSize() == o_m.GetNumRows() );
  427. assert( o_hi.GetSize() == o_m.GetNumRows() );
  428. // allocate memory for permuted input
  429. f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
  430. a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
  431. b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
  432. lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
  433. hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
  434. if ( o_boxIndex ) {
  435. boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
  436. memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
  437. } else {
  438. boxIndex = NULL;
  439. }
  440. // we override the const on o_m here but on exit the matrix is unchanged
  441. m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
  442. f.Zero();
  443. a.Zero();
  444. b = o_b;
  445. lo = o_lo;
  446. hi = o_hi;
  447. // pointers to the rows of m
  448. rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
  449. for ( i = 0; i < m.GetNumRows(); i++ ) {
  450. rowPtrs[i] = m[i];
  451. }
  452. // tells if a variable is at the low boundary, high boundary or inbetween
  453. side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
  454. // index to keep track of the permutation
  455. permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
  456. for ( i = 0; i < m.GetNumRows(); i++ ) {
  457. permuted[i] = i;
  458. }
  459. // permute input so all unbounded variables come first
  460. numUnbounded = 0;
  461. for ( i = 0; i < m.GetNumRows(); i++ ) {
  462. if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
  463. if ( numUnbounded != i ) {
  464. Swap( numUnbounded, i );
  465. }
  466. numUnbounded++;
  467. }
  468. }
  469. // permute input so all variables using the boxIndex come last
  470. boxStartIndex = m.GetNumRows();
  471. if ( boxIndex ) {
  472. for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
  473. if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
  474. boxStartIndex--;
  475. if ( boxStartIndex != i ) {
  476. Swap( boxStartIndex, i );
  477. }
  478. }
  479. }
  480. }
  481. // sub matrix for factorization
  482. clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
  483. diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  484. // all unbounded variables are clamped
  485. numClamped = numUnbounded;
  486. // if there are unbounded variables
  487. if ( numUnbounded ) {
  488. // factor and solve for unbounded variables
  489. if ( !FactorClamped() ) {
  490. idLib::common->Printf( "idLCP_Square::Solve: unbounded factorization failed\n" );
  491. return false;
  492. }
  493. SolveClamped( f, b.ToFloatPtr() );
  494. // if there are no bounded variables we are done
  495. if ( numUnbounded == m.GetNumRows() ) {
  496. o_x = f; // the vector is not permuted
  497. return true;
  498. }
  499. }
  500. #ifdef IGNORE_UNSATISFIABLE_VARIABLES
  501. int numIgnored = 0;
  502. #endif
  503. // allocate for delta force and delta acceleration
  504. delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  505. delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  506. // solve for bounded variables
  507. failed = NULL;
  508. for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
  509. // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
  510. if ( i == boxStartIndex ) {
  511. for ( j = 0; j < boxStartIndex; j++ ) {
  512. o_x[permuted[j]] = f[j];
  513. }
  514. for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
  515. s = o_x[boxIndex[j]];
  516. if ( lo[j] != -idMath::INFINITY ) {
  517. lo[j] = - idMath::Fabs( lo[j] * s );
  518. }
  519. if ( hi[j] != idMath::INFINITY ) {
  520. hi[j] = idMath::Fabs( hi[j] * s );
  521. }
  522. }
  523. }
  524. // calculate acceleration for current variable
  525. SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
  526. a[i] = dot - b[i];
  527. // if already at the low boundary
  528. if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
  529. side[i] = -1;
  530. continue;
  531. }
  532. // if already at the high boundary
  533. if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
  534. side[i] = 1;
  535. continue;
  536. }
  537. // if inside the clamped region
  538. if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
  539. side[i] = 0;
  540. AddClamped( i );
  541. continue;
  542. }
  543. // drive the current variable into a valid region
  544. for ( n = 0; n < maxIterations; n++ ) {
  545. // direction to move
  546. if ( a[i] <= 0.0f ) {
  547. dir = 1.0f;
  548. } else {
  549. dir = -1.0f;
  550. }
  551. // calculate force delta
  552. CalcForceDelta( i, dir );
  553. // calculate acceleration delta: delta_a = m * delta_f;
  554. CalcAccelDelta( i );
  555. // maximum step we can take
  556. GetMaxStep( i, dir, maxStep, limit, limitSide );
  557. if ( maxStep <= 0.0f ) {
  558. #ifdef IGNORE_UNSATISFIABLE_VARIABLES
  559. // ignore the current variable completely
  560. lo[i] = hi[i] = 0.0f;
  561. f[i] = 0.0f;
  562. side[i] = -1;
  563. numIgnored++;
  564. #else
  565. failed = va( "invalid step size %.4f", maxStep );
  566. #endif
  567. break;
  568. }
  569. // change force
  570. ChangeForce( i, maxStep );
  571. // change acceleration
  572. ChangeAccel( i, maxStep );
  573. // clamp/unclamp the variable that limited this step
  574. side[limit] = limitSide;
  575. switch( limitSide ) {
  576. case 0: {
  577. a[limit] = 0.0f;
  578. AddClamped( limit );
  579. break;
  580. }
  581. case -1: {
  582. f[limit] = lo[limit];
  583. if ( limit != i ) {
  584. RemoveClamped( limit );
  585. }
  586. break;
  587. }
  588. case 1: {
  589. f[limit] = hi[limit];
  590. if ( limit != i ) {
  591. RemoveClamped( limit );
  592. }
  593. break;
  594. }
  595. }
  596. // if the current variable limited the step we can continue with the next variable
  597. if ( limit == i ) {
  598. break;
  599. }
  600. }
  601. if ( n >= maxIterations ) {
  602. failed = va( "max iterations %d", maxIterations );
  603. break;
  604. }
  605. if ( failed ) {
  606. break;
  607. }
  608. }
  609. #ifdef IGNORE_UNSATISFIABLE_VARIABLES
  610. if ( numIgnored ) {
  611. if ( lcp_showFailures.GetBool() ) {
  612. idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
  613. }
  614. }
  615. #endif
  616. // if failed clear remaining forces
  617. if ( failed ) {
  618. if ( lcp_showFailures.GetBool() ) {
  619. idLib::common->Printf( "idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
  620. }
  621. for ( j = i; j < m.GetNumRows(); j++ ) {
  622. f[j] = 0.0f;
  623. }
  624. }
  625. #if defined(_DEBUG) && 0
  626. if ( !failed ) {
  627. // test whether or not the solution satisfies the complementarity conditions
  628. for ( i = 0; i < m.GetNumRows(); i++ ) {
  629. a[i] = -b[i];
  630. for ( j = 0; j < m.GetNumRows(); j++ ) {
  631. a[i] += rowPtrs[i][j] * f[j];
  632. }
  633. if ( f[i] == lo[i] ) {
  634. if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
  635. int bah1 = 1;
  636. }
  637. } else if ( f[i] == hi[i] ) {
  638. if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
  639. int bah2 = 1;
  640. }
  641. } else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
  642. int bah3 = 1;
  643. }
  644. }
  645. }
  646. #endif
  647. // unpermute result
  648. for ( i = 0; i < f.GetSize(); i++ ) {
  649. o_x[permuted[i]] = f[i];
  650. }
  651. // unpermute original matrix
  652. for ( i = 0; i < m.GetNumRows(); i++ ) {
  653. for ( j = 0; j < m.GetNumRows(); j++ ) {
  654. if ( permuted[j] == i ) {
  655. break;
  656. }
  657. }
  658. if ( i != j ) {
  659. m.SwapColumns( i, j );
  660. idSwap( permuted[i], permuted[j] );
  661. }
  662. }
  663. return true;
  664. }
  665. //===============================================================
  666. // M
  667. // idLCP_Symmetric MrE
  668. // E
  669. //===============================================================
  670. class idLCP_Symmetric : public idLCP {
  671. public:
  672. virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
  673. private:
  674. idMatX m; // original matrix
  675. idVecX b; // right hand side
  676. idVecX lo, hi; // low and high bounds
  677. idVecX f, a; // force and acceleration
  678. idVecX delta_f, delta_a; // delta force and delta acceleration
  679. idMatX clamped; // LDLt factored sub matrix for clamped variables
  680. idVecX diagonal; // reciprocal of diagonal of LDLt factored sub matrix for clamped variables
  681. idVecX solveCache1; // intermediate result cached in SolveClamped
  682. idVecX solveCache2; // "
  683. int numUnbounded; // number of unbounded variables
  684. int numClamped; // number of clamped variables
  685. int clampedChangeStart; // lowest row/column changed in the clamped matrix during an iteration
  686. float ** rowPtrs; // pointers to the rows of m
  687. int * boxIndex; // box index
  688. int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
  689. int * permuted; // index to keep track of the permutation
  690. bool padded; // set to true if the rows of the initial matrix are 16 byte padded
  691. private:
  692. bool FactorClamped( void );
  693. void SolveClamped( idVecX &x, const float *b );
  694. void Swap( int i, int j );
  695. void AddClamped( int r, bool useSolveCache );
  696. void RemoveClamped( int r );
  697. void CalcForceDelta( int d, float dir );
  698. void CalcAccelDelta( int d );
  699. void ChangeForce( int d, float step );
  700. void ChangeAccel( int d, float step );
  701. void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
  702. };
  703. /*
  704. ============
  705. idLCP_Symmetric::FactorClamped
  706. ============
  707. */
  708. bool idLCP_Symmetric::FactorClamped( void ) {
  709. clampedChangeStart = 0;
  710. for ( int i = 0; i < numClamped; i++ ) {
  711. memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
  712. }
  713. return SIMDProcessor->MatX_LDLTFactor( clamped, diagonal, numClamped );
  714. }
  715. /*
  716. ============
  717. idLCP_Symmetric::SolveClamped
  718. ============
  719. */
  720. void idLCP_Symmetric::SolveClamped( idVecX &x, const float *b ) {
  721. // solve L
  722. SIMDProcessor->MatX_LowerTriangularSolve( clamped, solveCache1.ToFloatPtr(), b, numClamped, clampedChangeStart );
  723. // solve D
  724. SIMDProcessor->Mul( solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), diagonal.ToFloatPtr(), numClamped );
  725. // solve Lt
  726. SIMDProcessor->MatX_LowerTriangularSolveTranspose( clamped, x.ToFloatPtr(), solveCache2.ToFloatPtr(), numClamped );
  727. clampedChangeStart = numClamped;
  728. }
  729. /*
  730. ============
  731. idLCP_Symmetric::Swap
  732. ============
  733. */
  734. void idLCP_Symmetric::Swap( int i, int j ) {
  735. if ( i == j ) {
  736. return;
  737. }
  738. idSwap( rowPtrs[i], rowPtrs[j] );
  739. m.SwapColumns( i, j );
  740. b.SwapElements( i, j );
  741. lo.SwapElements( i, j );
  742. hi.SwapElements( i, j );
  743. a.SwapElements( i, j );
  744. f.SwapElements( i, j );
  745. if ( boxIndex ) {
  746. idSwap( boxIndex[i], boxIndex[j] );
  747. }
  748. idSwap( side[i], side[j] );
  749. idSwap( permuted[i], permuted[j] );
  750. }
  751. /*
  752. ============
  753. idLCP_Symmetric::AddClamped
  754. ============
  755. */
  756. void idLCP_Symmetric::AddClamped( int r, bool useSolveCache ) {
  757. float d, dot;
  758. assert( r >= numClamped );
  759. if ( numClamped < clampedChangeStart ) {
  760. clampedChangeStart = numClamped;
  761. }
  762. // add a row at the bottom and a column at the right of the factored
  763. // matrix for the clamped variables
  764. Swap( numClamped, r );
  765. // solve for v in L * v = rowPtr[numClamped]
  766. if ( useSolveCache ) {
  767. // the lower triangular solve was cached in SolveClamped called by CalcForceDelta
  768. memcpy( clamped[numClamped], solveCache2.ToFloatPtr(), numClamped * sizeof( float ) );
  769. // calculate row dot product
  770. SIMDProcessor->Dot( dot, solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), numClamped );
  771. } else {
  772. float *v = (float *) _alloca16( numClamped * sizeof( float ) );
  773. SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[numClamped], numClamped );
  774. // add bottom row to L
  775. SIMDProcessor->Mul( clamped[numClamped], v, diagonal.ToFloatPtr(), numClamped );
  776. // calculate row dot product
  777. SIMDProcessor->Dot( dot, clamped[numClamped], v, numClamped );
  778. }
  779. // update diagonal[numClamped]
  780. d = rowPtrs[numClamped][numClamped] - dot;
  781. if ( d == 0.0f ) {
  782. idLib::common->Printf( "idLCP_Symmetric::AddClamped: updating factorization failed\n" );
  783. numClamped++;
  784. return;
  785. }
  786. clamped[numClamped][numClamped] = d;
  787. diagonal[numClamped] = 1.0f / d;
  788. numClamped++;
  789. }
  790. /*
  791. ============
  792. idLCP_Symmetric::RemoveClamped
  793. ============
  794. */
  795. void idLCP_Symmetric::RemoveClamped( int r ) {
  796. int i, j, n;
  797. float *addSub, *original, *v, *ptr, *v1, *v2, dot;
  798. double sum, diag, newDiag, invNewDiag, p1, p2, alpha1, alpha2, beta1, beta2;
  799. assert( r < numClamped );
  800. if ( r < clampedChangeStart ) {
  801. clampedChangeStart = r;
  802. }
  803. numClamped--;
  804. // no need to swap and update the factored matrix when the last row and column are removed
  805. if ( r == numClamped ) {
  806. return;
  807. }
  808. // swap the to be removed row/column with the last row/column
  809. Swap( r, numClamped );
  810. // update the factored matrix
  811. addSub = (float *) _alloca16( numClamped * sizeof( float ) );
  812. if ( r == 0 ) {
  813. if ( numClamped == 1 ) {
  814. diag = rowPtrs[0][0];
  815. if ( diag == 0.0f ) {
  816. idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
  817. return;
  818. }
  819. clamped[0][0] = diag;
  820. diagonal[0] = 1.0f / diag;
  821. return;
  822. }
  823. // calculate the row/column to be added to the lower right sub matrix starting at (r, r)
  824. original = rowPtrs[numClamped];
  825. ptr = rowPtrs[r];
  826. addSub[0] = ptr[0] - original[numClamped];
  827. for ( i = 1; i < numClamped; i++ ) {
  828. addSub[i] = ptr[i] - original[i];
  829. }
  830. } else {
  831. v = (float *) _alloca16( numClamped * sizeof( float ) );
  832. // solve for v in L * v = rowPtr[r]
  833. SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[r], r );
  834. // update removed row
  835. SIMDProcessor->Mul( clamped[r], v, diagonal.ToFloatPtr(), r );
  836. // if the last row/column of the matrix is updated
  837. if ( r == numClamped - 1 ) {
  838. // only calculate new diagonal
  839. SIMDProcessor->Dot( dot, clamped[r], v, r );
  840. diag = rowPtrs[r][r] - dot;
  841. if ( diag == 0.0f ) {
  842. idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
  843. return;
  844. }
  845. clamped[r][r] = diag;
  846. diagonal[r] = 1.0f / diag;
  847. return;
  848. }
  849. // calculate the row/column to be added to the lower right sub matrix starting at (r, r)
  850. for ( i = 0; i < r; i++ ) {
  851. v[i] = clamped[r][i] * clamped[i][i];
  852. }
  853. for ( i = r; i < numClamped; i++ ) {
  854. if ( i == r ) {
  855. sum = clamped[r][r];
  856. } else {
  857. sum = clamped[r][r] * clamped[i][r];
  858. }
  859. ptr = clamped[i];
  860. for ( j = 0; j < r; j++ ) {
  861. sum += ptr[j] * v[j];
  862. }
  863. addSub[i] = rowPtrs[r][i] - sum;
  864. }
  865. }
  866. // add row/column to the lower right sub matrix starting at (r, r)
  867. v1 = (float *) _alloca16( numClamped * sizeof( float ) );
  868. v2 = (float *) _alloca16( numClamped * sizeof( float ) );
  869. diag = idMath::SQRT_1OVER2;
  870. v1[r] = ( 0.5f * addSub[r] + 1.0f ) * diag;
  871. v2[r] = ( 0.5f * addSub[r] - 1.0f ) * diag;
  872. for ( i = r+1; i < numClamped; i++ ) {
  873. v1[i] = v2[i] = addSub[i] * diag;
  874. }
  875. alpha1 = 1.0f;
  876. alpha2 = -1.0f;
  877. // simultaneous update/downdate of the sub matrix starting at (r, r)
  878. n = clamped.GetNumColumns();
  879. for ( i = r; i < numClamped; i++ ) {
  880. diag = clamped[i][i];
  881. p1 = v1[i];
  882. newDiag = diag + alpha1 * p1 * p1;
  883. if ( newDiag == 0.0f ) {
  884. idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
  885. return;
  886. }
  887. alpha1 /= newDiag;
  888. beta1 = p1 * alpha1;
  889. alpha1 *= diag;
  890. diag = newDiag;
  891. p2 = v2[i];
  892. newDiag = diag + alpha2 * p2 * p2;
  893. if ( newDiag == 0.0f ) {
  894. idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
  895. return;
  896. }
  897. clamped[i][i] = newDiag;
  898. diagonal[i] = invNewDiag = 1.0f / newDiag;
  899. alpha2 *= invNewDiag;
  900. beta2 = p2 * alpha2;
  901. alpha2 *= diag;
  902. // update column below diagonal (i,i)
  903. ptr = clamped.ToFloatPtr() + i;
  904. for ( j = i+1; j < numClamped - 1; j += 2 ) {
  905. float sum0 = ptr[(j+0)*n];
  906. float sum1 = ptr[(j+1)*n];
  907. v1[j+0] -= p1 * sum0;
  908. v1[j+1] -= p1 * sum1;
  909. sum0 += beta1 * v1[j+0];
  910. sum1 += beta1 * v1[j+1];
  911. v2[j+0] -= p2 * sum0;
  912. v2[j+1] -= p2 * sum1;
  913. sum0 += beta2 * v2[j+0];
  914. sum1 += beta2 * v2[j+1];
  915. ptr[(j+0)*n] = sum0;
  916. ptr[(j+1)*n] = sum1;
  917. }
  918. for ( ; j < numClamped; j++ ) {
  919. sum = ptr[j*n];
  920. v1[j] -= p1 * sum;
  921. sum += beta1 * v1[j];
  922. v2[j] -= p2 * sum;
  923. sum += beta2 * v2[j];
  924. ptr[j*n] = sum;
  925. }
  926. }
  927. }
  928. /*
  929. ============
  930. idLCP_Symmetric::CalcForceDelta
  931. modifies this->delta_f
  932. ============
  933. */
  934. ID_INLINE void idLCP_Symmetric::CalcForceDelta( int d, float dir ) {
  935. int i;
  936. float *ptr;
  937. delta_f[d] = dir;
  938. if ( numClamped == 0 ) {
  939. return;
  940. }
  941. // solve force delta
  942. SolveClamped( delta_f, rowPtrs[d] );
  943. // flip force delta based on direction
  944. if ( dir > 0.0f ) {
  945. ptr = delta_f.ToFloatPtr();
  946. for ( i = 0; i < numClamped; i++ ) {
  947. ptr[i] = - ptr[i];
  948. }
  949. }
  950. }
  951. /*
  952. ============
  953. idLCP_Symmetric::CalcAccelDelta
  954. modifies this->delta_a and uses this->delta_f
  955. ============
  956. */
  957. ID_INLINE void idLCP_Symmetric::CalcAccelDelta( int d ) {
  958. int j;
  959. float dot;
  960. // only the not clamped variables, including the current variable, can have a change in acceleration
  961. for ( j = numClamped; j <= d; j++ ) {
  962. // only the clamped variables and the current variable have a force delta unequal zero
  963. SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
  964. delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
  965. }
  966. }
  967. /*
  968. ============
  969. idLCP_Symmetric::ChangeForce
  970. modifies this->f and uses this->delta_f
  971. ============
  972. */
  973. ID_INLINE void idLCP_Symmetric::ChangeForce( int d, float step ) {
  974. // only the clamped variables and current variable have a force delta unequal zero
  975. SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
  976. f[d] += step * delta_f[d];
  977. }
  978. /*
  979. ============
  980. idLCP_Symmetric::ChangeAccel
  981. modifies this->a and uses this->delta_a
  982. ============
  983. */
  984. ID_INLINE void idLCP_Symmetric::ChangeAccel( int d, float step ) {
  985. // only the not clamped variables, including the current variable, can have an acceleration unequal zero
  986. SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
  987. }
  988. /*
  989. ============
  990. idLCP_Symmetric::GetMaxStep
  991. ============
  992. */
  993. void idLCP_Symmetric::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
  994. int i;
  995. float s;
  996. // default to a full step for the current variable
  997. if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
  998. maxStep = -a[d] / delta_a[d];
  999. } else {
  1000. maxStep = 0.0f;
  1001. }
  1002. limit = d;
  1003. limitSide = 0;
  1004. // test the current variable
  1005. if ( dir < 0.0f ) {
  1006. if ( lo[d] != -idMath::INFINITY ) {
  1007. s = ( lo[d] - f[d] ) / dir;
  1008. if ( s < maxStep ) {
  1009. maxStep = s;
  1010. limitSide = -1;
  1011. }
  1012. }
  1013. } else {
  1014. if ( hi[d] != idMath::INFINITY ) {
  1015. s = ( hi[d] - f[d] ) / dir;
  1016. if ( s < maxStep ) {
  1017. maxStep = s;
  1018. limitSide = 1;
  1019. }
  1020. }
  1021. }
  1022. // test the clamped bounded variables
  1023. for ( i = numUnbounded; i < numClamped; i++ ) {
  1024. if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
  1025. // if there is a low boundary
  1026. if ( lo[i] != -idMath::INFINITY ) {
  1027. s = ( lo[i] - f[i] ) / delta_f[i];
  1028. if ( s < maxStep ) {
  1029. maxStep = s;
  1030. limit = i;
  1031. limitSide = -1;
  1032. }
  1033. }
  1034. } else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
  1035. // if there is a high boundary
  1036. if ( hi[i] != idMath::INFINITY ) {
  1037. s = ( hi[i] - f[i] ) / delta_f[i];
  1038. if ( s < maxStep ) {
  1039. maxStep = s;
  1040. limit = i;
  1041. limitSide = 1;
  1042. }
  1043. }
  1044. }
  1045. }
  1046. // test the not clamped bounded variables
  1047. for ( i = numClamped; i < d; i++ ) {
  1048. if ( side[i] == -1 ) {
  1049. if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
  1050. continue;
  1051. }
  1052. } else if ( side[i] == 1 ) {
  1053. if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
  1054. continue;
  1055. }
  1056. } else {
  1057. continue;
  1058. }
  1059. // ignore variables for which the force is not allowed to take any substantial value
  1060. if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
  1061. continue;
  1062. }
  1063. s = -a[i] / delta_a[i];
  1064. if ( s < maxStep ) {
  1065. maxStep = s;
  1066. limit = i;
  1067. limitSide = 0;
  1068. }
  1069. }
  1070. }
  1071. /*
  1072. ============
  1073. idLCP_Symmetric::Solve
  1074. ============
  1075. */
  1076. bool idLCP_Symmetric::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
  1077. int i, j, n, limit, limitSide, boxStartIndex;
  1078. float dir, maxStep, dot, s;
  1079. char *failed;
  1080. // true when the matrix rows are 16 byte padded
  1081. padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
  1082. assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
  1083. assert( o_x.GetSize() == o_m.GetNumRows() );
  1084. assert( o_b.GetSize() == o_m.GetNumRows() );
  1085. assert( o_lo.GetSize() == o_m.GetNumRows() );
  1086. assert( o_hi.GetSize() == o_m.GetNumRows() );
  1087. // allocate memory for permuted input
  1088. f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
  1089. a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
  1090. b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
  1091. lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
  1092. hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
  1093. if ( o_boxIndex ) {
  1094. boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
  1095. memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
  1096. } else {
  1097. boxIndex = NULL;
  1098. }
  1099. // we override the const on o_m here but on exit the matrix is unchanged
  1100. m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
  1101. f.Zero();
  1102. a.Zero();
  1103. b = o_b;
  1104. lo = o_lo;
  1105. hi = o_hi;
  1106. // pointers to the rows of m
  1107. rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
  1108. for ( i = 0; i < m.GetNumRows(); i++ ) {
  1109. rowPtrs[i] = m[i];
  1110. }
  1111. // tells if a variable is at the low boundary, high boundary or inbetween
  1112. side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
  1113. // index to keep track of the permutation
  1114. permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
  1115. for ( i = 0; i < m.GetNumRows(); i++ ) {
  1116. permuted[i] = i;
  1117. }
  1118. // permute input so all unbounded variables come first
  1119. numUnbounded = 0;
  1120. for ( i = 0; i < m.GetNumRows(); i++ ) {
  1121. if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
  1122. if ( numUnbounded != i ) {
  1123. Swap( numUnbounded, i );
  1124. }
  1125. numUnbounded++;
  1126. }
  1127. }
  1128. // permute input so all variables using the boxIndex come last
  1129. boxStartIndex = m.GetNumRows();
  1130. if ( boxIndex ) {
  1131. for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
  1132. if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
  1133. boxStartIndex--;
  1134. if ( boxStartIndex != i ) {
  1135. Swap( boxStartIndex, i );
  1136. }
  1137. }
  1138. }
  1139. }
  1140. // sub matrix for factorization
  1141. clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
  1142. diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  1143. solveCache1.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  1144. solveCache2.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  1145. // all unbounded variables are clamped
  1146. numClamped = numUnbounded;
  1147. // if there are unbounded variables
  1148. if ( numUnbounded ) {
  1149. // factor and solve for unbounded variables
  1150. if ( !FactorClamped() ) {
  1151. idLib::common->Printf( "idLCP_Symmetric::Solve: unbounded factorization failed\n" );
  1152. return false;
  1153. }
  1154. SolveClamped( f, b.ToFloatPtr() );
  1155. // if there are no bounded variables we are done
  1156. if ( numUnbounded == m.GetNumRows() ) {
  1157. o_x = f; // the vector is not permuted
  1158. return true;
  1159. }
  1160. }
  1161. #ifdef IGNORE_UNSATISFIABLE_VARIABLES
  1162. int numIgnored = 0;
  1163. #endif
  1164. // allocate for delta force and delta acceleration
  1165. delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  1166. delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
  1167. // solve for bounded variables
  1168. failed = NULL;
  1169. for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
  1170. clampedChangeStart = 0;
  1171. // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
  1172. if ( i == boxStartIndex ) {
  1173. for ( j = 0; j < boxStartIndex; j++ ) {
  1174. o_x[permuted[j]] = f[j];
  1175. }
  1176. for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
  1177. s = o_x[boxIndex[j]];
  1178. if ( lo[j] != -idMath::INFINITY ) {
  1179. lo[j] = - idMath::Fabs( lo[j] * s );
  1180. }
  1181. if ( hi[j] != idMath::INFINITY ) {
  1182. hi[j] = idMath::Fabs( hi[j] * s );
  1183. }
  1184. }
  1185. }
  1186. // calculate acceleration for current variable
  1187. SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
  1188. a[i] = dot - b[i];
  1189. // if already at the low boundary
  1190. if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
  1191. side[i] = -1;
  1192. continue;
  1193. }
  1194. // if already at the high boundary
  1195. if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
  1196. side[i] = 1;
  1197. continue;
  1198. }
  1199. // if inside the clamped region
  1200. if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
  1201. side[i] = 0;
  1202. AddClamped( i, false );
  1203. continue;
  1204. }
  1205. // drive the current variable into a valid region
  1206. for ( n = 0; n < maxIterations; n++ ) {
  1207. // direction to move
  1208. if ( a[i] <= 0.0f ) {
  1209. dir = 1.0f;
  1210. } else {
  1211. dir = -1.0f;
  1212. }
  1213. // calculate force delta
  1214. CalcForceDelta( i, dir );
  1215. // calculate acceleration delta: delta_a = m * delta_f;
  1216. CalcAccelDelta( i );
  1217. // maximum step we can take
  1218. GetMaxStep( i, dir, maxStep, limit, limitSide );
  1219. if ( maxStep <= 0.0f ) {
  1220. #ifdef IGNORE_UNSATISFIABLE_VARIABLES
  1221. // ignore the current variable completely
  1222. lo[i] = hi[i] = 0.0f;
  1223. f[i] = 0.0f;
  1224. side[i] = -1;
  1225. numIgnored++;
  1226. #else
  1227. failed = va( "invalid step size %.4f", maxStep );
  1228. #endif
  1229. break;
  1230. }
  1231. // change force
  1232. ChangeForce( i, maxStep );
  1233. // change acceleration
  1234. ChangeAccel( i, maxStep );
  1235. // clamp/unclamp the variable that limited this step
  1236. side[limit] = limitSide;
  1237. switch( limitSide ) {
  1238. case 0: {
  1239. a[limit] = 0.0f;
  1240. AddClamped( limit, ( limit == i ) );
  1241. break;
  1242. }
  1243. case -1: {
  1244. f[limit] = lo[limit];
  1245. if ( limit != i ) {
  1246. RemoveClamped( limit );
  1247. }
  1248. break;
  1249. }
  1250. case 1: {
  1251. f[limit] = hi[limit];
  1252. if ( limit != i ) {
  1253. RemoveClamped( limit );
  1254. }
  1255. break;
  1256. }
  1257. }
  1258. // if the current variable limited the step we can continue with the next variable
  1259. if ( limit == i ) {
  1260. break;
  1261. }
  1262. }
  1263. if ( n >= maxIterations ) {
  1264. failed = va( "max iterations %d", maxIterations );
  1265. break;
  1266. }
  1267. if ( failed ) {
  1268. break;
  1269. }
  1270. }
  1271. #ifdef IGNORE_UNSATISFIABLE_VARIABLES
  1272. if ( numIgnored ) {
  1273. if ( lcp_showFailures.GetBool() ) {
  1274. idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
  1275. }
  1276. }
  1277. #endif
  1278. // if failed clear remaining forces
  1279. if ( failed ) {
  1280. if ( lcp_showFailures.GetBool() ) {
  1281. idLib::common->Printf( "idLCP_Symmetric::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
  1282. }
  1283. for ( j = i; j < m.GetNumRows(); j++ ) {
  1284. f[j] = 0.0f;
  1285. }
  1286. }
  1287. #if defined(_DEBUG) && 0
  1288. if ( !failed ) {
  1289. // test whether or not the solution satisfies the complementarity conditions
  1290. for ( i = 0; i < m.GetNumRows(); i++ ) {
  1291. a[i] = -b[i];
  1292. for ( j = 0; j < m.GetNumRows(); j++ ) {
  1293. a[i] += rowPtrs[i][j] * f[j];
  1294. }
  1295. if ( f[i] == lo[i] ) {
  1296. if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
  1297. int bah1 = 1;
  1298. }
  1299. } else if ( f[i] == hi[i] ) {
  1300. if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
  1301. int bah2 = 1;
  1302. }
  1303. } else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
  1304. int bah3 = 1;
  1305. }
  1306. }
  1307. }
  1308. #endif
  1309. // unpermute result
  1310. for ( i = 0; i < f.GetSize(); i++ ) {
  1311. o_x[permuted[i]] = f[i];
  1312. }
  1313. // unpermute original matrix
  1314. for ( i = 0; i < m.GetNumRows(); i++ ) {
  1315. for ( j = 0; j < m.GetNumRows(); j++ ) {
  1316. if ( permuted[j] == i ) {
  1317. break;
  1318. }
  1319. }
  1320. if ( i != j ) {
  1321. m.SwapColumns( i, j );
  1322. idSwap( permuted[i], permuted[j] );
  1323. }
  1324. }
  1325. return true;
  1326. }
  1327. //===============================================================
  1328. //
  1329. // idLCP
  1330. //
  1331. //===============================================================
  1332. /*
  1333. ============
  1334. idLCP::AllocSquare
  1335. ============
  1336. */
  1337. idLCP *idLCP::AllocSquare( void ) {
  1338. idLCP *lcp = new idLCP_Square;
  1339. lcp->SetMaxIterations( 32 );
  1340. return lcp;
  1341. }
  1342. /*
  1343. ============
  1344. idLCP::AllocSymmetric
  1345. ============
  1346. */
  1347. idLCP *idLCP::AllocSymmetric( void ) {
  1348. idLCP *lcp = new idLCP_Symmetric;
  1349. lcp->SetMaxIterations( 32 );
  1350. return lcp;
  1351. }
  1352. /*
  1353. ============
  1354. idLCP::~idLCP
  1355. ============
  1356. */
  1357. idLCP::~idLCP( void ) {
  1358. }
  1359. /*
  1360. ============
  1361. idLCP::SetMaxIterations
  1362. ============
  1363. */
  1364. void idLCP::SetMaxIterations( int max ) {
  1365. maxIterations = max;
  1366. }
  1367. /*
  1368. ============
  1369. idLCP::GetMaxIterations
  1370. ============
  1371. */
  1372. int idLCP::GetMaxIterations( void ) {
  1373. return maxIterations;
  1374. }