tr_trace.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. /*
  2. ===========================================================================
  3. Doom 3 BFG Edition GPL Source Code
  4. Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
  6. Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #pragma hdrstop
  21. #include "../idlib/precompiled.h"
  22. #include "tr_local.h"
  23. #include "Model_local.h"
  24. #include "../idlib/geometry/DrawVert_intrinsics.h"
  25. /*
  26. ====================
  27. R_TracePointCullStatic
  28. ====================
  29. */
  30. static void R_TracePointCullStatic( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts ) {
  31. assert_16_byte_aligned( cullBits );
  32. assert_16_byte_aligned( verts );
  33. #ifdef ID_WIN_X86_SSE2_INTRIN
  34. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
  35. const __m128 vector_float_radius = _mm_splat_ps( _mm_load_ss( &radius ), 0 );
  36. const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f };
  37. const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 );
  38. const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 );
  39. const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 );
  40. const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 );
  41. const __m128i vector_int_mask4 = _mm_set1_epi32( 1 << 4 );
  42. const __m128i vector_int_mask5 = _mm_set1_epi32( 1 << 5 );
  43. const __m128i vector_int_mask6 = _mm_set1_epi32( 1 << 6 );
  44. const __m128i vector_int_mask7 = _mm_set1_epi32( 1 << 7 );
  45. const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() );
  46. const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() );
  47. const __m128 p2 = _mm_loadu_ps( planes[2].ToFloatPtr() );
  48. const __m128 p3 = _mm_loadu_ps( planes[3].ToFloatPtr() );
  49. const __m128 p0X = _mm_splat_ps( p0, 0 );
  50. const __m128 p0Y = _mm_splat_ps( p0, 1 );
  51. const __m128 p0Z = _mm_splat_ps( p0, 2 );
  52. const __m128 p0W = _mm_splat_ps( p0, 3 );
  53. const __m128 p1X = _mm_splat_ps( p1, 0 );
  54. const __m128 p1Y = _mm_splat_ps( p1, 1 );
  55. const __m128 p1Z = _mm_splat_ps( p1, 2 );
  56. const __m128 p1W = _mm_splat_ps( p1, 3 );
  57. const __m128 p2X = _mm_splat_ps( p2, 0 );
  58. const __m128 p2Y = _mm_splat_ps( p2, 1 );
  59. const __m128 p2Z = _mm_splat_ps( p2, 2 );
  60. const __m128 p2W = _mm_splat_ps( p2, 3 );
  61. const __m128 p3X = _mm_splat_ps( p3, 0 );
  62. const __m128 p3Y = _mm_splat_ps( p3, 1 );
  63. const __m128 p3Z = _mm_splat_ps( p3, 2 );
  64. const __m128 p3W = _mm_splat_ps( p3, 3 );
  65. __m128i vecTotalOrInt = { 0, 0, 0, 0 };
  66. for ( int i = 0; i < numVerts; ) {
  67. const int nextNumVerts = vertsODS.FetchNextBatch() - 4;
  68. for ( ; i <= nextNumVerts; i += 4 ) {
  69. const __m128 v0 = _mm_load_ps( vertsODS[i + 0].xyz.ToFloatPtr() );
  70. const __m128 v1 = _mm_load_ps( vertsODS[i + 1].xyz.ToFloatPtr() );
  71. const __m128 v2 = _mm_load_ps( vertsODS[i + 2].xyz.ToFloatPtr() );
  72. const __m128 v3 = _mm_load_ps( vertsODS[i + 3].xyz.ToFloatPtr() );
  73. const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z
  74. const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w
  75. const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z
  76. const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w
  77. const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x
  78. const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y
  79. const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z
  80. const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) );
  81. const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) );
  82. const __m128 d2 = _mm_madd_ps( vX, p2X, _mm_madd_ps( vY, p2Y, _mm_madd_ps( vZ, p2Z, p2W ) ) );
  83. const __m128 d3 = _mm_madd_ps( vX, p3X, _mm_madd_ps( vY, p3Y, _mm_madd_ps( vZ, p3Z, p3W ) ) );
  84. const __m128 t0 = _mm_add_ps( d0, vector_float_radius );
  85. const __m128 t1 = _mm_add_ps( d1, vector_float_radius );
  86. const __m128 t2 = _mm_add_ps( d2, vector_float_radius );
  87. const __m128 t3 = _mm_add_ps( d3, vector_float_radius );
  88. const __m128 t4 = _mm_sub_ps( d0, vector_float_radius );
  89. const __m128 t5 = _mm_sub_ps( d1, vector_float_radius );
  90. const __m128 t6 = _mm_sub_ps( d2, vector_float_radius );
  91. const __m128 t7 = _mm_sub_ps( d3, vector_float_radius );
  92. __m128i c0 = __m128c( _mm_cmpgt_ps( t0, vector_float_zero ) );
  93. __m128i c1 = __m128c( _mm_cmpgt_ps( t1, vector_float_zero ) );
  94. __m128i c2 = __m128c( _mm_cmpgt_ps( t2, vector_float_zero ) );
  95. __m128i c3 = __m128c( _mm_cmpgt_ps( t3, vector_float_zero ) );
  96. __m128i c4 = __m128c( _mm_cmplt_ps( t4, vector_float_zero ) );
  97. __m128i c5 = __m128c( _mm_cmplt_ps( t5, vector_float_zero ) );
  98. __m128i c6 = __m128c( _mm_cmplt_ps( t6, vector_float_zero ) );
  99. __m128i c7 = __m128c( _mm_cmplt_ps( t7, vector_float_zero ) );
  100. c0 = _mm_and_si128( c0, vector_int_mask0 );
  101. c1 = _mm_and_si128( c1, vector_int_mask1 );
  102. c2 = _mm_and_si128( c2, vector_int_mask2 );
  103. c3 = _mm_and_si128( c3, vector_int_mask3 );
  104. c4 = _mm_and_si128( c4, vector_int_mask4 );
  105. c5 = _mm_and_si128( c5, vector_int_mask5 );
  106. c6 = _mm_and_si128( c6, vector_int_mask6 );
  107. c7 = _mm_and_si128( c7, vector_int_mask7 );
  108. c0 = _mm_or_si128( c0, c1 );
  109. c2 = _mm_or_si128( c2, c3 );
  110. c4 = _mm_or_si128( c4, c5 );
  111. c6 = _mm_or_si128( c6, c7 );
  112. c0 = _mm_or_si128( c0, c2 );
  113. c4 = _mm_or_si128( c4, c6 );
  114. c0 = _mm_or_si128( c0, c4 );
  115. vecTotalOrInt = _mm_or_si128( vecTotalOrInt, c0 );
  116. __m128i s0 = _mm_packs_epi32( c0, c0 );
  117. __m128i b0 = _mm_packus_epi16( s0, s0 );
  118. *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( b0 );
  119. }
  120. }
  121. vecTotalOrInt = _mm_or_si128( vecTotalOrInt, _mm_shuffle_epi32( vecTotalOrInt, _MM_SHUFFLE( 1, 0, 3, 2 ) ) );
  122. vecTotalOrInt = _mm_or_si128( vecTotalOrInt, _mm_shuffle_epi32( vecTotalOrInt, _MM_SHUFFLE( 2, 3, 0, 1 ) ) );
  123. __m128i vecTotalOrShort = _mm_packs_epi32( vecTotalOrInt, vecTotalOrInt );
  124. __m128i vecTotalOrByte = _mm_packus_epi16( vecTotalOrShort, vecTotalOrShort );
  125. totalOr = (byte) _mm_cvtsi128_si32( vecTotalOrByte );
  126. #else
  127. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
  128. byte tOr = 0;
  129. for ( int i = 0; i < numVerts; ) {
  130. const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
  131. for ( ; i <= nextNumVerts; i++ ) {
  132. const idVec3 & v = vertsODS[i].xyz;
  133. const float d0 = planes[0].Distance( v );
  134. const float d1 = planes[1].Distance( v );
  135. const float d2 = planes[2].Distance( v );
  136. const float d3 = planes[3].Distance( v );
  137. const float t0 = d0 + radius;
  138. const float t1 = d1 + radius;
  139. const float t2 = d2 + radius;
  140. const float t3 = d3 + radius;
  141. const float s0 = d0 - radius;
  142. const float s1 = d1 - radius;
  143. const float s2 = d2 - radius;
  144. const float s3 = d3 - radius;
  145. byte bits;
  146. bits = IEEE_FLT_SIGNBITSET( t0 ) << 0;
  147. bits |= IEEE_FLT_SIGNBITSET( t1 ) << 1;
  148. bits |= IEEE_FLT_SIGNBITSET( t2 ) << 2;
  149. bits |= IEEE_FLT_SIGNBITSET( t3 ) << 3;
  150. bits |= IEEE_FLT_SIGNBITSET( s0 ) << 4;
  151. bits |= IEEE_FLT_SIGNBITSET( s1 ) << 5;
  152. bits |= IEEE_FLT_SIGNBITSET( s2 ) << 6;
  153. bits |= IEEE_FLT_SIGNBITSET( s3 ) << 7;
  154. bits ^= 0x0F; // flip lower four bits
  155. tOr |= bits;
  156. cullBits[i] = bits;
  157. }
  158. }
  159. totalOr = tOr;
  160. #endif
  161. }
  162. /*
  163. ====================
  164. R_TracePointCullSkinned
  165. ====================
  166. */
  167. static void R_TracePointCullSkinned( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts, const idJointMat * joints ) {
  168. assert_16_byte_aligned( cullBits );
  169. assert_16_byte_aligned( verts );
  170. #ifdef ID_WIN_X86_SSE2_INTRIN
  171. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
  172. const __m128 vector_float_radius = _mm_splat_ps( _mm_load_ss( &radius ), 0 );
  173. const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f };
  174. const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 );
  175. const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 );
  176. const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 );
  177. const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 );
  178. const __m128i vector_int_mask4 = _mm_set1_epi32( 1 << 4 );
  179. const __m128i vector_int_mask5 = _mm_set1_epi32( 1 << 5 );
  180. const __m128i vector_int_mask6 = _mm_set1_epi32( 1 << 6 );
  181. const __m128i vector_int_mask7 = _mm_set1_epi32( 1 << 7 );
  182. const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() );
  183. const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() );
  184. const __m128 p2 = _mm_loadu_ps( planes[2].ToFloatPtr() );
  185. const __m128 p3 = _mm_loadu_ps( planes[3].ToFloatPtr() );
  186. const __m128 p0X = _mm_splat_ps( p0, 0 );
  187. const __m128 p0Y = _mm_splat_ps( p0, 1 );
  188. const __m128 p0Z = _mm_splat_ps( p0, 2 );
  189. const __m128 p0W = _mm_splat_ps( p0, 3 );
  190. const __m128 p1X = _mm_splat_ps( p1, 0 );
  191. const __m128 p1Y = _mm_splat_ps( p1, 1 );
  192. const __m128 p1Z = _mm_splat_ps( p1, 2 );
  193. const __m128 p1W = _mm_splat_ps( p1, 3 );
  194. const __m128 p2X = _mm_splat_ps( p2, 0 );
  195. const __m128 p2Y = _mm_splat_ps( p2, 1 );
  196. const __m128 p2Z = _mm_splat_ps( p2, 2 );
  197. const __m128 p2W = _mm_splat_ps( p2, 3 );
  198. const __m128 p3X = _mm_splat_ps( p3, 0 );
  199. const __m128 p3Y = _mm_splat_ps( p3, 1 );
  200. const __m128 p3Z = _mm_splat_ps( p3, 2 );
  201. const __m128 p3W = _mm_splat_ps( p3, 3 );
  202. __m128i vecTotalOrInt = { 0, 0, 0, 0 };
  203. for ( int i = 0; i < numVerts; ) {
  204. const int nextNumVerts = vertsODS.FetchNextBatch() - 4;
  205. for ( ; i <= nextNumVerts; i += 4 ) {
  206. const __m128 v0 = LoadSkinnedDrawVertPosition( vertsODS[i + 0], joints );
  207. const __m128 v1 = LoadSkinnedDrawVertPosition( vertsODS[i + 1], joints );
  208. const __m128 v2 = LoadSkinnedDrawVertPosition( vertsODS[i + 2], joints );
  209. const __m128 v3 = LoadSkinnedDrawVertPosition( vertsODS[i + 3], joints );
  210. const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z
  211. const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w
  212. const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z
  213. const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w
  214. const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x
  215. const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y
  216. const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z
  217. const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) );
  218. const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) );
  219. const __m128 d2 = _mm_madd_ps( vX, p2X, _mm_madd_ps( vY, p2Y, _mm_madd_ps( vZ, p2Z, p2W ) ) );
  220. const __m128 d3 = _mm_madd_ps( vX, p3X, _mm_madd_ps( vY, p3Y, _mm_madd_ps( vZ, p3Z, p3W ) ) );
  221. const __m128 t0 = _mm_add_ps( d0, vector_float_radius );
  222. const __m128 t1 = _mm_add_ps( d1, vector_float_radius );
  223. const __m128 t2 = _mm_add_ps( d2, vector_float_radius );
  224. const __m128 t3 = _mm_add_ps( d3, vector_float_radius );
  225. const __m128 t4 = _mm_sub_ps( d0, vector_float_radius );
  226. const __m128 t5 = _mm_sub_ps( d1, vector_float_radius );
  227. const __m128 t6 = _mm_sub_ps( d2, vector_float_radius );
  228. const __m128 t7 = _mm_sub_ps( d3, vector_float_radius );
  229. __m128i c0 = __m128c( _mm_cmpgt_ps( t0, vector_float_zero ) );
  230. __m128i c1 = __m128c( _mm_cmpgt_ps( t1, vector_float_zero ) );
  231. __m128i c2 = __m128c( _mm_cmpgt_ps( t2, vector_float_zero ) );
  232. __m128i c3 = __m128c( _mm_cmpgt_ps( t3, vector_float_zero ) );
  233. __m128i c4 = __m128c( _mm_cmplt_ps( t4, vector_float_zero ) );
  234. __m128i c5 = __m128c( _mm_cmplt_ps( t5, vector_float_zero ) );
  235. __m128i c6 = __m128c( _mm_cmplt_ps( t6, vector_float_zero ) );
  236. __m128i c7 = __m128c( _mm_cmplt_ps( t7, vector_float_zero ) );
  237. c0 = _mm_and_si128( c0, vector_int_mask0 );
  238. c1 = _mm_and_si128( c1, vector_int_mask1 );
  239. c2 = _mm_and_si128( c2, vector_int_mask2 );
  240. c3 = _mm_and_si128( c3, vector_int_mask3 );
  241. c4 = _mm_and_si128( c4, vector_int_mask4 );
  242. c5 = _mm_and_si128( c5, vector_int_mask5 );
  243. c6 = _mm_and_si128( c6, vector_int_mask6 );
  244. c7 = _mm_and_si128( c7, vector_int_mask7 );
  245. c0 = _mm_or_si128( c0, c1 );
  246. c2 = _mm_or_si128( c2, c3 );
  247. c4 = _mm_or_si128( c4, c5 );
  248. c6 = _mm_or_si128( c6, c7 );
  249. c0 = _mm_or_si128( c0, c2 );
  250. c4 = _mm_or_si128( c4, c6 );
  251. c0 = _mm_or_si128( c0, c4 );
  252. vecTotalOrInt = _mm_or_si128( vecTotalOrInt, c0 );
  253. __m128i s0 = _mm_packs_epi32( c0, c0 );
  254. __m128i b0 = _mm_packus_epi16( s0, s0 );
  255. *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( b0 );
  256. }
  257. }
  258. vecTotalOrInt = _mm_or_si128( vecTotalOrInt, _mm_shuffle_epi32( vecTotalOrInt, _MM_SHUFFLE( 1, 0, 3, 2 ) ) );
  259. vecTotalOrInt = _mm_or_si128( vecTotalOrInt, _mm_shuffle_epi32( vecTotalOrInt, _MM_SHUFFLE( 2, 3, 0, 1 ) ) );
  260. __m128i vecTotalOrShort = _mm_packs_epi32( vecTotalOrInt, vecTotalOrInt );
  261. __m128i vecTotalOrByte = _mm_packus_epi16( vecTotalOrShort, vecTotalOrShort );
  262. totalOr = (byte) _mm_cvtsi128_si32( vecTotalOrByte );
  263. #else
  264. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
  265. byte tOr = 0;
  266. for ( int i = 0; i < numVerts; ) {
  267. const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
  268. for ( ; i <= nextNumVerts; i++ ) {
  269. const idVec3 v = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints );
  270. const float d0 = planes[0].Distance( v );
  271. const float d1 = planes[1].Distance( v );
  272. const float d2 = planes[2].Distance( v );
  273. const float d3 = planes[3].Distance( v );
  274. const float t0 = d0 + radius;
  275. const float t1 = d1 + radius;
  276. const float t2 = d2 + radius;
  277. const float t3 = d3 + radius;
  278. const float s0 = d0 - radius;
  279. const float s1 = d1 - radius;
  280. const float s2 = d2 - radius;
  281. const float s3 = d3 - radius;
  282. byte bits;
  283. bits = IEEE_FLT_SIGNBITSET( t0 ) << 0;
  284. bits |= IEEE_FLT_SIGNBITSET( t1 ) << 1;
  285. bits |= IEEE_FLT_SIGNBITSET( t2 ) << 2;
  286. bits |= IEEE_FLT_SIGNBITSET( t3 ) << 3;
  287. bits |= IEEE_FLT_SIGNBITSET( s0 ) << 4;
  288. bits |= IEEE_FLT_SIGNBITSET( s1 ) << 5;
  289. bits |= IEEE_FLT_SIGNBITSET( s2 ) << 6;
  290. bits |= IEEE_FLT_SIGNBITSET( s3 ) << 7;
  291. bits ^= 0x0F; // flip lower four bits
  292. tOr |= bits;
  293. cullBits[i] = bits;
  294. }
  295. }
  296. totalOr = tOr;
  297. #endif
  298. }
  299. /*
  300. ====================
  301. R_LineIntersectsTriangleExpandedWithCircle
  302. The triangle is expanded in the plane with a circle of the given radius.
  303. ====================
  304. */
  305. static bool R_LineIntersectsTriangleExpandedWithCircle( localTrace_t & hit, const idVec3 & start, const idVec3 & end, const float circleRadius, const idVec3 & triVert0, const idVec3 & triVert1, const idVec3 & triVert2 ) {
  306. const idPlane plane( triVert0, triVert1, triVert2 );
  307. const float planeDistStart = plane.Distance( start );
  308. const float planeDistEnd = plane.Distance( end );
  309. if ( planeDistStart < 0.0f ) {
  310. return false; // starts past the triangle
  311. }
  312. if ( planeDistEnd > 0.0f ) {
  313. return false; // finishes in front of the triangle
  314. }
  315. const float planeDelta = planeDistStart - planeDistEnd;
  316. if ( planeDelta < idMath::FLT_SMALLEST_NON_DENORMAL ) {
  317. return false; // coming at the triangle from behind or parallel
  318. }
  319. const float fraction = planeDistStart / planeDelta;
  320. if ( fraction < 0.0f ) {
  321. return false; // shouldn't happen
  322. }
  323. if ( fraction >= hit.fraction ) {
  324. return false; // have already hit something closer
  325. }
  326. // find the exact point of impact with the plane
  327. const idVec3 point = start + fraction * ( end - start );
  328. // see if the point is within the three edges
  329. // if radius > 0 the triangle is expanded with a circle in the triangle plane
  330. const float radiusSqr = circleRadius * circleRadius;
  331. const idVec3 dir0 = triVert0 - point;
  332. const idVec3 dir1 = triVert1 - point;
  333. const idVec3 cross0 = dir0.Cross( dir1 );
  334. float d0 = plane.Normal() * cross0;
  335. if ( d0 > 0.0f ) {
  336. if ( radiusSqr <= 0.0f ) {
  337. return false;
  338. }
  339. idVec3 edge = triVert0 - triVert1;
  340. const float edgeLengthSqr = edge.LengthSqr();
  341. if ( cross0.LengthSqr() > edgeLengthSqr * radiusSqr ) {
  342. return false;
  343. }
  344. d0 = edge * dir0;
  345. if ( d0 < 0.0f ) {
  346. edge = triVert0 - triVert2;
  347. d0 = edge * dir0;
  348. if ( d0 < 0.0f ) {
  349. if ( dir0.LengthSqr() > radiusSqr ) {
  350. return false;
  351. }
  352. }
  353. } else if ( d0 > edgeLengthSqr ) {
  354. edge = triVert1 - triVert2;
  355. d0 = edge * dir1;
  356. if ( d0 < 0.0f ) {
  357. if ( dir1.LengthSqr() > radiusSqr ) {
  358. return false;
  359. }
  360. }
  361. }
  362. }
  363. const idVec3 dir2 = triVert2 - point;
  364. const idVec3 cross1 = dir1.Cross( dir2 );
  365. float d1 = plane.Normal() * cross1;
  366. if ( d1 > 0.0f ) {
  367. if ( radiusSqr <= 0.0f ) {
  368. return false;
  369. }
  370. idVec3 edge = triVert1 - triVert2;
  371. const float edgeLengthSqr = edge.LengthSqr();
  372. if ( cross1.LengthSqr() > edgeLengthSqr * radiusSqr ) {
  373. return false;
  374. }
  375. d1 = edge * dir1;
  376. if ( d1 < 0.0f ) {
  377. edge = triVert1 - triVert0;
  378. d1 = edge * dir1;
  379. if ( d1 < 0.0f ) {
  380. if ( dir1.LengthSqr() > radiusSqr ) {
  381. return false;
  382. }
  383. }
  384. } else if ( d1 > edgeLengthSqr ) {
  385. edge = triVert2 - triVert0;
  386. d1 = edge * dir2;
  387. if ( d1 < 0.0f ) {
  388. if ( dir2.LengthSqr() > radiusSqr ) {
  389. return false;
  390. }
  391. }
  392. }
  393. }
  394. const idVec3 cross2 = dir2.Cross( dir0 );
  395. float d2 = plane.Normal() * cross2;
  396. if ( d2 > 0.0f ) {
  397. if ( radiusSqr <= 0.0f ) {
  398. return false;
  399. }
  400. idVec3 edge = triVert2 - triVert0;
  401. const float edgeLengthSqr = edge.LengthSqr();
  402. if ( cross2.LengthSqr() > edgeLengthSqr * radiusSqr ) {
  403. return false;
  404. }
  405. d2 = edge * dir2;
  406. if ( d2 < 0.0f ) {
  407. edge = triVert2 - triVert1;
  408. d2 = edge * dir2;
  409. if ( d2 < 0.0f ) {
  410. if ( dir2.LengthSqr() > radiusSqr ) {
  411. return false;
  412. }
  413. }
  414. } else if ( d2 > edgeLengthSqr ) {
  415. edge = triVert0 - triVert1;
  416. d2 = edge * dir0;
  417. if ( d2 < 0.0f ) {
  418. if ( dir0.LengthSqr() > radiusSqr ) {
  419. return false;
  420. }
  421. }
  422. }
  423. }
  424. // we hit this triangle
  425. hit.fraction = fraction;
  426. hit.normal = plane.Normal();
  427. hit.point = point;
  428. return true;
  429. }
  430. /*
  431. ====================
  432. R_LocalTrace
  433. ====================
  434. */
  435. localTrace_t R_LocalTrace( const idVec3 &start, const idVec3 &end, const float radius, const srfTriangles_t *tri ) {
  436. localTrace_t hit;
  437. hit.fraction = 1.0f;
  438. ALIGNTYPE16 idPlane planes[4];
  439. // create two planes orthogonal to each other that intersect along the trace
  440. idVec3 startDir = end - start;
  441. startDir.Normalize();
  442. startDir.NormalVectors( planes[0].Normal(), planes[1].Normal() );
  443. planes[0][3] = - start * planes[0].Normal();
  444. planes[1][3] = - start * planes[1].Normal();
  445. // create front and end planes so the trace is on the positive sides of both
  446. planes[2] = startDir;
  447. planes[2][3] = - start * planes[2].Normal();
  448. planes[3] = -startDir;
  449. planes[3][3] = - end * planes[3].Normal();
  450. // catagorize each point against the four planes
  451. byte * cullBits = (byte *) _alloca16( ALIGN( tri->numVerts, 4 ) ); // round up to a multiple of 4 for SIMD
  452. byte totalOr = 0;
  453. const idJointMat * joints = ( tri->staticModelWithJoints != NULL && r_useGPUSkinning.GetBool() ) ? tri->staticModelWithJoints->jointsInverted : NULL;
  454. if ( joints != NULL ) {
  455. R_TracePointCullSkinned( cullBits, totalOr, radius, planes, tri->verts, tri->numVerts, joints );
  456. } else {
  457. R_TracePointCullStatic( cullBits, totalOr, radius, planes, tri->verts, tri->numVerts );
  458. }
  459. // if we don't have points on both sides of both the ray planes, no intersection
  460. if ( ( totalOr ^ ( totalOr >> 4 ) ) & 3 ) {
  461. return hit;
  462. }
  463. // if we don't have any points between front and end, no intersection
  464. if ( ( totalOr ^ ( totalOr >> 1 ) ) & 4 ) {
  465. return hit;
  466. }
  467. // start streaming the indexes
  468. idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 3 > indexesODS( tri->indexes, tri->numIndexes );
  469. for ( int i = 0; i < tri->numIndexes; ) {
  470. const int nextNumIndexes = indexesODS.FetchNextBatch() - 3;
  471. for ( ; i <= nextNumIndexes; i += 3 ) {
  472. const int i0 = indexesODS[i + 0];
  473. const int i1 = indexesODS[i + 1];
  474. const int i2 = indexesODS[i + 2];
  475. // get sidedness info for the triangle
  476. const byte triOr = cullBits[i0] | cullBits[i1] | cullBits[i2];
  477. // if we don't have points on both sides of both the ray planes, no intersection
  478. if ( likely( ( triOr ^ ( triOr >> 4 ) ) & 3 ) ) {
  479. continue;
  480. }
  481. // if we don't have any points between front and end, no intersection
  482. if ( unlikely( ( triOr ^ ( triOr >> 1 ) ) & 4 ) ) {
  483. continue;
  484. }
  485. const idVec3 triVert0 = idDrawVert::GetSkinnedDrawVertPosition( idODSObject< idDrawVert > ( & tri->verts[i0] ), joints );
  486. const idVec3 triVert1 = idDrawVert::GetSkinnedDrawVertPosition( idODSObject< idDrawVert > ( & tri->verts[i1] ), joints );
  487. const idVec3 triVert2 = idDrawVert::GetSkinnedDrawVertPosition( idODSObject< idDrawVert > ( & tri->verts[i2] ), joints );
  488. if ( R_LineIntersectsTriangleExpandedWithCircle( hit, start, end, radius, triVert0, triVert1, triVert2 ) ) {
  489. hit.indexes[0] = i0;
  490. hit.indexes[1] = i1;
  491. hit.indexes[2] = i2;
  492. }
  493. }
  494. }
  495. return hit;
  496. }