ModelOverlay.cpp 25 KB

  1. /*
  2. ===========================================================================
  3. Doom 3 BFG Edition GPL Source Code
  4. Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
  6. Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 BFG Edition Source Code. If not, see <>.
  16. In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #pragma hdrstop
  21. #include "../idlib/precompiled.h"
  22. #include "tr_local.h"
  23. #include "Model_local.h"
  24. #include "../idlib/geometry/DrawVert_intrinsics.h"
  25. /*
  26. ====================
  27. idRenderModelOverlay::idRenderModelOverlay
  28. ====================
  29. */
  30. idRenderModelOverlay::idRenderModelOverlay() :
  31. firstOverlay( 0 ),
  32. nextOverlay( 0 ),
  33. firstDeferredOverlay( 0 ),
  34. nextDeferredOverlay( 0 ),
  35. numOverlayMaterials( 0 ) {
  36. memset( overlays, 0, sizeof( overlays ) );
  37. }
  38. /*
  39. ====================
  40. idRenderModelOverlay::~idRenderModelOverlay
  41. ====================
  42. */
  43. idRenderModelOverlay::~idRenderModelOverlay() {
  44. for ( unsigned int i = 0; i < MAX_OVERLAYS; i++ ) {
  45. FreeOverlay( overlays[i] );
  46. }
  47. }
  48. /*
  49. =================
  50. idRenderModelOverlay::ReUse
  51. =================
  52. */
  53. void idRenderModelOverlay::ReUse() {
  54. firstOverlay = 0;
  55. nextOverlay = 0;
  56. firstDeferredOverlay = 0;
  57. nextDeferredOverlay = 0;
  58. numOverlayMaterials = 0;
  59. for ( unsigned int i = 0; i < MAX_OVERLAYS; i++ ) {
  60. FreeOverlay( overlays[i] );
  61. }
  62. }
  63. /*
  64. ====================
  65. idRenderModelOverlay::FreeOverlay
  66. ====================
  67. */
  68. void idRenderModelOverlay::FreeOverlay( overlay_t & overlay ) {
  69. if ( overlay.verts != NULL ) {
  70. Mem_Free( overlay.verts );
  71. }
  72. if ( overlay.indexes != NULL ) {
  73. Mem_Free( overlay.indexes );
  74. }
  75. memset( &overlay, 0, sizeof( overlay ) );
  76. }
  77. /*
  78. ====================
  79. R_OverlayPointCullStatic
  80. ====================
  81. */
  82. static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS, halfFloat_t * texCoordT, const idPlane * planes, const idDrawVert * verts, const int numVerts ) {
  83. assert_16_byte_aligned( cullBits );
  84. assert_16_byte_aligned( texCoordS );
  85. assert_16_byte_aligned( texCoordT );
  86. assert_16_byte_aligned( verts );
  87. #ifdef ID_WIN_X86_SSE2_INTRIN
  88. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
  89. const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f };
  90. const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f };
  91. const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 );
  92. const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 );
  93. const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 );
  94. const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 );
  95. const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() );
  96. const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() );
  97. const __m128 p0X = _mm_splat_ps( p0, 0 );
  98. const __m128 p0Y = _mm_splat_ps( p0, 1 );
  99. const __m128 p0Z = _mm_splat_ps( p0, 2 );
  100. const __m128 p0W = _mm_splat_ps( p0, 3 );
  101. const __m128 p1X = _mm_splat_ps( p1, 0 );
  102. const __m128 p1Y = _mm_splat_ps( p1, 1 );
  103. const __m128 p1Z = _mm_splat_ps( p1, 2 );
  104. const __m128 p1W = _mm_splat_ps( p1, 3 );
  105. for ( int i = 0; i < numVerts; ) {
  106. const int nextNumVerts = vertsODS.FetchNextBatch() - 4;
  107. for ( ; i <= nextNumVerts; i += 4 ) {
  108. const __m128 v0 = _mm_load_ps( vertsODS[i + 0].xyz.ToFloatPtr() );
  109. const __m128 v1 = _mm_load_ps( vertsODS[i + 1].xyz.ToFloatPtr() );
  110. const __m128 v2 = _mm_load_ps( vertsODS[i + 2].xyz.ToFloatPtr() );
  111. const __m128 v3 = _mm_load_ps( vertsODS[i + 3].xyz.ToFloatPtr() );
  112. const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z
  113. const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w
  114. const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z
  115. const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w
  116. const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x
  117. const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y
  118. const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z
  119. const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) );
  120. const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) );
  121. const __m128 d2 = _mm_sub_ps( vector_float_one, d0 );
  122. const __m128 d3 = _mm_sub_ps( vector_float_one, d1 );
  123. __m128i flt16S = FastF32toF16( __m128c( d0 ) );
  124. __m128i flt16T = FastF32toF16( __m128c( d1 ) );
  125. _mm_storel_epi64( (__m128i *)&texCoordS[i], flt16S );
  126. _mm_storel_epi64( (__m128i *)&texCoordT[i], flt16T );
  127. __m128i c0 = __m128c( _mm_cmplt_ps( d0, vector_float_zero ) );
  128. __m128i c1 = __m128c( _mm_cmplt_ps( d1, vector_float_zero ) );
  129. __m128i c2 = __m128c( _mm_cmplt_ps( d2, vector_float_zero ) );
  130. __m128i c3 = __m128c( _mm_cmplt_ps( d3, vector_float_zero ) );
  131. c0 = _mm_and_si128( c0, vector_int_mask0 );
  132. c1 = _mm_and_si128( c1, vector_int_mask1 );
  133. c2 = _mm_and_si128( c2, vector_int_mask2 );
  134. c3 = _mm_and_si128( c3, vector_int_mask3 );
  135. c0 = _mm_or_si128( c0, c1 );
  136. c2 = _mm_or_si128( c2, c3 );
  137. c0 = _mm_or_si128( c0, c2 );
  138. c0 = _mm_packs_epi32( c0, c0 );
  139. c0 = _mm_packus_epi16( c0, c0 );
  140. *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( c0 );
  141. }
  142. }
  143. #else
  144. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
  145. for ( int i = 0; i < numVerts; ) {
  146. const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
  147. for ( ; i <= nextNumVerts; i++ ) {
  148. const idVec3 & v = vertsODS[i].xyz;
  149. const float d0 = planes[0].Distance( v );
  150. const float d1 = planes[1].Distance( v );
  151. const float d2 = 1.0f - d0;
  152. const float d3 = 1.0f - d1;
  153. halfFloat_t s = Scalar_FastF32toF16( d0 );
  154. halfFloat_t t = Scalar_FastF32toF16( d1 );
  155. texCoordS[i] = s;
  156. texCoordT[i] = t;
  157. byte bits;
  158. bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
  159. bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
  160. bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
  161. bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
  162. cullBits[i] = bits;
  163. }
  164. }
  165. #endif
  166. }
  167. /*
  168. ====================
  169. R_OverlayPointCullSkinned
  170. ====================
  171. */
  172. static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS, halfFloat_t * texCoordT, const idPlane * planes, const idDrawVert * verts, const int numVerts, const idJointMat * joints ) {
  173. assert_16_byte_aligned( cullBits );
  174. assert_16_byte_aligned( texCoordS );
  175. assert_16_byte_aligned( texCoordT );
  176. assert_16_byte_aligned( verts );
  177. #ifdef ID_WIN_X86_SSE2_INTRIN
  178. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
  179. const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f };
  180. const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f };
  181. const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 );
  182. const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 );
  183. const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 );
  184. const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 );
  185. const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() );
  186. const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() );
  187. const __m128 p0X = _mm_splat_ps( p0, 0 );
  188. const __m128 p0Y = _mm_splat_ps( p0, 1 );
  189. const __m128 p0Z = _mm_splat_ps( p0, 2 );
  190. const __m128 p0W = _mm_splat_ps( p0, 3 );
  191. const __m128 p1X = _mm_splat_ps( p1, 0 );
  192. const __m128 p1Y = _mm_splat_ps( p1, 1 );
  193. const __m128 p1Z = _mm_splat_ps( p1, 2 );
  194. const __m128 p1W = _mm_splat_ps( p1, 3 );
  195. for ( int i = 0; i < numVerts; ) {
  196. const int nextNumVerts = vertsODS.FetchNextBatch() - 4;
  197. for ( ; i <= nextNumVerts; i += 4 ) {
  198. const __m128 v0 = LoadSkinnedDrawVertPosition( vertsODS[i + 0], joints );
  199. const __m128 v1 = LoadSkinnedDrawVertPosition( vertsODS[i + 1], joints );
  200. const __m128 v2 = LoadSkinnedDrawVertPosition( vertsODS[i + 2], joints );
  201. const __m128 v3 = LoadSkinnedDrawVertPosition( vertsODS[i + 3], joints );
  202. const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z
  203. const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w
  204. const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z
  205. const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w
  206. const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x
  207. const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y
  208. const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z
  209. const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) );
  210. const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) );
  211. const __m128 d2 = _mm_sub_ps( vector_float_one, d0 );
  212. const __m128 d3 = _mm_sub_ps( vector_float_one, d1 );
  213. __m128i flt16S = FastF32toF16( __m128c( d0 ) );
  214. __m128i flt16T = FastF32toF16( __m128c( d1 ) );
  215. _mm_storel_epi64( (__m128i *)&texCoordS[i], flt16S );
  216. _mm_storel_epi64( (__m128i *)&texCoordT[i], flt16T );
  217. __m128i c0 = __m128c( _mm_cmplt_ps( d0, vector_float_zero ) );
  218. __m128i c1 = __m128c( _mm_cmplt_ps( d1, vector_float_zero ) );
  219. __m128i c2 = __m128c( _mm_cmplt_ps( d2, vector_float_zero ) );
  220. __m128i c3 = __m128c( _mm_cmplt_ps( d3, vector_float_zero ) );
  221. c0 = _mm_and_si128( c0, vector_int_mask0 );
  222. c1 = _mm_and_si128( c1, vector_int_mask1 );
  223. c2 = _mm_and_si128( c2, vector_int_mask2 );
  224. c3 = _mm_and_si128( c3, vector_int_mask3 );
  225. c0 = _mm_or_si128( c0, c1 );
  226. c2 = _mm_or_si128( c2, c3 );
  227. c0 = _mm_or_si128( c0, c2 );
  228. c0 = _mm_packs_epi32( c0, c0 );
  229. c0 = _mm_packus_epi16( c0, c0 );
  230. *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( c0 );
  231. }
  232. }
  233. #else
  234. idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
  235. for ( int i = 0; i < numVerts; ) {
  236. const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
  237. for ( ; i <= nextNumVerts; i++ ) {
  238. const idVec3 transformed = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints );
  239. const float d0 = planes[0].Distance( transformed );
  240. const float d1 = planes[1].Distance( transformed );
  241. const float d2 = 1.0f - d0;
  242. const float d3 = 1.0f - d1;
  243. halfFloat_t s = Scalar_FastF32toF16( d0 );
  244. halfFloat_t t = Scalar_FastF32toF16( d1 );
  245. texCoordS[i] = s;
  246. texCoordT[i] = t;
  247. byte bits;
  248. bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
  249. bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
  250. bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
  251. bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
  252. cullBits[i] = bits;
  253. }
  254. }
  255. #endif
  256. }
  257. /*
  258. =====================
  259. idRenderModelOverlay::CreateOverlay
  260. This projects on both front and back sides to avoid seams
  261. The material should be clamped, because entire triangles are added, some of which
  262. may extend well past the 0.0 to 1.0 texture range
  263. =====================
  264. */
  265. void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPlane localTextureAxis[2], const idMaterial *material ) {
  266. // count up the maximum possible vertices and indexes per surface
  267. int maxVerts = 0;
  268. int maxIndexes = 0;
  269. for ( int surfNum = 0; surfNum < model->NumSurfaces(); surfNum++ ) {
  270. const modelSurface_t *surf = model->Surface( surfNum );
  271. if ( surf->geometry->numVerts > maxVerts ) {
  272. maxVerts = surf->geometry->numVerts;
  273. }
  274. if ( surf->geometry->numIndexes > maxIndexes ) {
  275. maxIndexes = surf->geometry->numIndexes;
  276. }
  277. }
  278. maxIndexes += 3 * 16 / sizeof( triIndex_t ); // to allow the index size to be a multiple of 16 bytes
  279. // make temporary buffers for the building process
  280. idTempArray< byte > cullBits( maxVerts );
  281. idTempArray< halfFloat_t > texCoordS( maxVerts );
  282. idTempArray< halfFloat_t > texCoordT( maxVerts );
  283. idTempArray< triIndex_t > vertexRemap( maxVerts );
  284. idTempArray< overlayVertex_t > overlayVerts( maxVerts );
  285. idTempArray< triIndex_t > overlayIndexes( maxIndexes );
  286. // pull out the triangles we need from the base surfaces
  287. for ( int surfNum = 0; surfNum < model->NumBaseSurfaces(); surfNum++ ) {
  288. const modelSurface_t *surf = model->Surface( surfNum );
  289. if ( surf->geometry == NULL || surf->shader == NULL ) {
  290. continue;
  291. }
  292. // some surfaces can explicitly disallow overlays
  293. if ( !surf->shader->AllowOverlays() ) {
  294. continue;
  295. }
  296. const srfTriangles_t *tri = surf->geometry;
  297. // try to cull the whole surface along the first texture axis
  298. const float d0 = tri->bounds.PlaneDistance( localTextureAxis[0] );
  299. if ( d0 < 0.0f || d0 > 1.0f ) {
  300. continue;
  301. }
  302. // try to cull the whole surface along the second texture axis
  303. const float d1 = tri->bounds.PlaneDistance( localTextureAxis[1] );
  304. if ( d1 < 0.0f || d1 > 1.0f ) {
  305. continue;
  306. }
  307. if ( tri->staticModelWithJoints != NULL && r_useGPUSkinning.GetBool() ) {
  308. R_OverlayPointCullSkinned( cullBits.Ptr(), texCoordS.Ptr(), texCoordT.Ptr(), localTextureAxis, tri->verts, tri->numVerts, tri->staticModelWithJoints->jointsInverted );
  309. } else {
  310. R_OverlayPointCullStatic( cullBits.Ptr(), texCoordS.Ptr(), texCoordT.Ptr(), localTextureAxis, tri->verts, tri->numVerts );
  311. }
  312. // start streaming the indexes
  313. idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 3 > indexesODS( tri->indexes, tri->numIndexes );
  314. memset( vertexRemap.Ptr(), -1, vertexRemap.Size() );
  315. int numIndexes = 0;
  316. int numVerts = 0;
  317. int maxReferencedVertex = 0;
  318. // find triangles that need the overlay
  319. for ( int i = 0; i < tri->numIndexes; ) {
  320. const int nextNumIndexes = indexesODS.FetchNextBatch() - 3;
  321. for ( ; i <= nextNumIndexes; i += 3 ) {
  322. const int i0 = indexesODS[i + 0];
  323. const int i1 = indexesODS[i + 1];
  324. const int i2 = indexesODS[i + 2];
  325. // skip triangles completely off one side
  326. if ( cullBits[i0] & cullBits[i1] & cullBits[i2] ) {
  327. continue;
  328. }
  329. // we could do more precise triangle culling, like a light interaction does, but it's not worth it
  330. // keep this triangle
  331. for ( int j = 0; j < 3; j++ ) {
  332. int index = tri->indexes[i + j];
  333. if ( vertexRemap[index] == (triIndex_t) -1 ) {
  334. vertexRemap[index] = numVerts;
  335. overlayVerts[numVerts].vertexNum = index;
  336. overlayVerts[numVerts].st[0] = texCoordS[index];
  337. overlayVerts[numVerts].st[1] = texCoordT[index];
  338. numVerts++;
  339. maxReferencedVertex = Max( maxReferencedVertex, index );
  340. }
  341. overlayIndexes[numIndexes] = vertexRemap[index];
  342. numIndexes++;
  343. }
  344. }
  345. }
  346. if ( numIndexes == 0 ) {
  347. continue;
  348. }
  349. // add degenerate triangles until the index size is a multiple of 16 bytes
  350. for ( ; ( ( ( numIndexes * sizeof( triIndex_t ) ) & 15 ) != 0 ); numIndexes += 3 ) {
  351. overlayIndexes[numIndexes + 0] = 0;
  352. overlayIndexes[numIndexes + 1] = 0;
  353. overlayIndexes[numIndexes + 2] = 0;
  354. }
  355. // allocate a new overlay
  356. overlay_t & overlay = overlays[nextOverlay++ & ( MAX_OVERLAYS - 1 )];
  357. FreeOverlay( overlay );
  358. overlay.material = material;
  359. overlay.surfaceNum = surfNum;
  360. overlay.surfaceId = surf->id;
  361. overlay.numIndexes = numIndexes;
  362. overlay.indexes = (triIndex_t *)Mem_Alloc( numIndexes * sizeof( overlay.indexes[0] ), TAG_MODEL );
  363. memcpy( overlay.indexes, overlayIndexes.Ptr(), numIndexes * sizeof( overlay.indexes[0] ) );
  364. overlay.numVerts = numVerts;
  365. overlay.verts = (overlayVertex_t *)Mem_Alloc( numVerts * sizeof( overlay.verts[0] ), TAG_MODEL );
  366. memcpy( overlay.verts, overlayVerts.Ptr(), numVerts * sizeof( overlay.verts[0] ) );
  367. overlay.maxReferencedVertex = maxReferencedVertex;
  368. if ( nextOverlay - firstOverlay > MAX_OVERLAYS ) {
  369. firstOverlay = nextOverlay - MAX_OVERLAYS;
  370. }
  371. }
  372. }
  373. /*
  374. ====================
  375. idRenderModelOverlay::CreateDeferredOverlays
  376. ====================
  377. */
  378. void idRenderModelOverlay::CreateDeferredOverlays( const idRenderModel * model ) {
  379. for ( unsigned int i = firstDeferredOverlay; i < nextDeferredOverlay; i++ ) {
  380. const overlayProjectionParms_t & parms = deferredOverlays[i & ( MAX_DEFERRED_OVERLAYS - 1 )];
  381. if ( parms.startTime > tr.viewDef->renderView.time[0] - DEFFERED_OVERLAY_TIMEOUT ) {
  382. CreateOverlay( model, parms.localTextureAxis, parms.material );
  383. }
  384. }
  385. firstDeferredOverlay = 0;
  386. nextDeferredOverlay = 0;
  387. }
  388. /*
  389. ====================
  390. idRenderModelOverlay::AddDeferredOverlay
  391. ====================
  392. */
  393. void idRenderModelOverlay::AddDeferredOverlay( const overlayProjectionParms_t & localParms ) {
  394. deferredOverlays[nextDeferredOverlay++ & ( MAX_DEFERRED_OVERLAYS - 1 )] = localParms;
  395. if ( nextDeferredOverlay - firstDeferredOverlay > MAX_DEFERRED_OVERLAYS ) {
  396. firstDeferredOverlay = nextDeferredOverlay - MAX_DEFERRED_OVERLAYS;
  397. }
  398. }
  399. /*
  400. ====================
  401. R_CopyOverlaySurface
  402. ====================
  403. */
  404. static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t * indexes, int numIndexes, const overlay_t * overlay, const idDrawVert * sourceVerts ) {
  405. assert_16_byte_aligned( &verts[numVerts] );
  406. assert_16_byte_aligned( &indexes[numIndexes] );
  407. assert_16_byte_aligned( overlay->verts );
  408. assert_16_byte_aligned( overlay->indexes );
  409. assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 );
  410. assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 );
  411. #ifdef ID_WIN_X86_SSE2_INTRIN
  412. const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 );
  413. const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 );
  414. const __m128i vector_short_num_verts = _mm_packs_epi32( vector_int_num_verts, vector_int_num_verts );
  415. // copy vertices
  416. for ( int i = 0; i < overlay->numVerts; i++ ) {
  417. const overlayVertex_t &overlayVert = overlay->verts[i];
  418. const idDrawVert &srcVert = sourceVerts[overlayVert.vertexNum];
  419. idDrawVert &dstVert = verts[numVerts + i];
  420. __m128i v0 = _mm_load_si128( (const __m128i *)( (byte *)&srcVert + 0 ) );
  421. __m128i v1 = _mm_load_si128( (const __m128i *)( (byte *)&srcVert + 16 ) );
  422. __m128i st = _mm_cvtsi32_si128( *(unsigned int *) );
  423. st = _mm_shuffle_epi32( st, _MM_SHUFFLE( 0, 1, 2, 3 ) );
  424. v0 = _mm_and_si128( v0, vector_int_clear_last );
  425. v0 = _mm_or_si128( v0, st );
  426. _mm_stream_si128( (__m128i *)( (byte *)&dstVert + 0 ), v0 );
  427. _mm_stream_si128( (__m128i *)( (byte *)&dstVert + 16 ), v1 );
  428. }
  429. // copy indexes
  430. assert( ( overlay->numIndexes & 7 ) == 0 );
  431. assert( sizeof( triIndex_t ) == 2 );
  432. for ( int i = 0; i < overlay->numIndexes; i += 8 ) {
  433. __m128i vi = _mm_load_si128( (const __m128i *)&overlay->indexes[i] );
  434. vi = _mm_add_epi16( vi, vector_short_num_verts );
  435. _mm_stream_si128( (__m128i *)&indexes[numIndexes + i], vi );
  436. }
  437. _mm_sfence();
  438. #else
  439. // copy vertices
  440. for ( int i = 0; i < overlay->numVerts; i++ ) {
  441. const overlayVertex_t &overlayVert = overlay->verts[i];
  442. // NOTE: bad out-of-order write-combined write, SIMD code does the right thing
  443. verts[numVerts + i] = sourceVerts[overlayVert.vertexNum];
  444. verts[numVerts + i].st[0] =[0];
  445. verts[numVerts + i].st[1] =[1];
  446. }
  447. // copy indexes
  448. for ( int i = 0; i < overlay->numIndexes; i += 2 ) {
  449. assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts );
  450. WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] );
  451. }
  452. #endif
  453. }
  454. /*
  455. =====================
  456. idRenderModelOverlay::GetNumOverlayDrawSurfs
  457. =====================
  458. */
  459. unsigned int idRenderModelOverlay::GetNumOverlayDrawSurfs() {
  460. numOverlayMaterials = 0;
  461. for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) {
  462. const overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )];
  463. unsigned int j = 0;
  464. for ( ; j < numOverlayMaterials; j++ ) {
  465. if ( overlayMaterials[j] == overlay.material ) {
  466. break;
  467. }
  468. }
  469. if ( j >= numOverlayMaterials ) {
  470. overlayMaterials[numOverlayMaterials++] = overlay.material;
  471. }
  472. }
  473. return numOverlayMaterials;
  474. }
  475. /*
  476. ====================
  477. idRenderModelOverlay::CreateOverlayDrawSurf
  478. ====================
  479. */
  480. drawSurf_t * idRenderModelOverlay::CreateOverlayDrawSurf( const viewEntity_t *space, const idRenderModel *baseModel, unsigned int index ) {
  481. if ( index < 0 || index >= numOverlayMaterials ) {
  482. return NULL;
  483. }
  484. // md5 models won't have any surfaces when r_showSkel is set
  485. if ( baseModel == NULL || baseModel->IsDefaultModel() || baseModel->NumSurfaces() == 0 ) {
  486. return NULL;
  487. }
  488. assert( baseModel->IsDynamicModel() == DM_STATIC );
  489. const idRenderModelStatic * staticModel = static_cast< const idRenderModelStatic * >( baseModel );
  490. const idMaterial * material = overlayMaterials[index];
  491. int maxVerts = 0;
  492. int maxIndexes = 0;
  493. for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) {
  494. const overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )];
  495. if ( overlay.material == material ) {
  496. maxVerts += overlay.numVerts;
  497. maxIndexes += overlay.numIndexes;
  498. }
  499. }
  500. if ( maxVerts == 0 || maxIndexes == 0 ) {
  501. return NULL;
  502. }
  503. // create a new triangle surface in frame memory so it gets automatically disposed of
  504. srfTriangles_t *newTri = (srfTriangles_t *)R_ClearedFrameAlloc( sizeof( *newTri ), FRAME_ALLOC_SURFACE_TRIANGLES );
  505. newTri->staticModelWithJoints = ( staticModel->jointsInverted != NULL ) ? const_cast< idRenderModelStatic * >( staticModel ) : NULL; // allow GPU skinning
  506. newTri->ambientCache = vertexCache.AllocVertex( NULL, ALIGN( maxVerts * sizeof( idDrawVert ), VERTEX_CACHE_ALIGN ) );
  507. newTri->indexCache = vertexCache.AllocIndex( NULL, ALIGN( maxIndexes * sizeof( triIndex_t ), INDEX_CACHE_ALIGN ) );
  508. idDrawVert * mappedVerts = (idDrawVert *)vertexCache.MappedVertexBuffer( newTri->ambientCache );
  509. triIndex_t * mappedIndexes = (triIndex_t *)vertexCache.MappedIndexBuffer( newTri->indexCache );
  510. int numVerts = 0;
  511. int numIndexes = 0;
  512. for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) {
  513. overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )];
  514. if ( overlay.numVerts == 0 ) {
  515. if ( i == firstOverlay ) {
  516. firstOverlay++;
  517. }
  518. continue;
  519. }
  520. if ( overlay.material != material ) {
  521. continue;
  522. }
  523. // get the source model surface for this overlay surface
  524. const modelSurface_t * baseSurf = ( overlay.surfaceNum < staticModel->NumSurfaces() ) ? staticModel->Surface( overlay.surfaceNum ) : NULL;
  525. // if the surface ids no longer match
  526. if ( baseSurf == NULL || baseSurf->id != overlay.surfaceId ) {
  527. // find the surface with the correct id
  528. if ( staticModel->FindSurfaceWithId( overlay.surfaceId, overlay.surfaceNum ) ) {
  529. baseSurf = staticModel->Surface( overlay.surfaceNum );
  530. } else {
  531. // the surface with this id no longer exists
  532. FreeOverlay( overlay );
  533. if ( i == firstOverlay ) {
  534. firstOverlay++;
  535. }
  536. continue;
  537. }
  538. }
  539. // check for out of range vertex references
  540. const srfTriangles_t * baseTri = baseSurf->geometry;
  541. if ( overlay.maxReferencedVertex >= baseTri->numVerts ) {
  542. // This can happen when playing a demofile and a model has been changed since it was recorded, so just issue a warning and go on.
  543. common->Warning( "idRenderModelOverlay::CreateOverlayDrawSurf: overlay vertex out of range. Model has probably changed since generating the overlay." );
  544. FreeOverlay( overlay );
  545. if ( i == firstOverlay ) {
  546. firstOverlay++;
  547. }
  548. continue;
  549. }
  550. // use SIMD optimized routine to copy the vertices and indices directly to write-combined memory
  551. R_CopyOverlaySurface( mappedVerts, numVerts, mappedIndexes, numIndexes, &overlay, baseTri->verts );
  552. numIndexes += overlay.numIndexes;
  553. numVerts += overlay.numVerts;
  554. }
  555. newTri->numVerts = numVerts;
  556. newTri->numIndexes = numIndexes;
  557. // create the drawsurf
  558. drawSurf_t * drawSurf = (drawSurf_t *)R_FrameAlloc( sizeof( *drawSurf ), FRAME_ALLOC_DRAW_SURFACE );
  559. drawSurf->frontEndGeo = newTri;
  560. drawSurf->numIndexes = newTri->numIndexes;
  561. drawSurf->ambientCache = newTri->ambientCache;
  562. drawSurf->indexCache = newTri->indexCache;
  563. drawSurf->shadowCache = 0;
  564. drawSurf->space = space;
  565. drawSurf->scissorRect = space->scissorRect;
  566. drawSurf->extraGLState = 0;
  567. drawSurf->renderZFail = 0;
  568. R_SetupDrawSurfShader( drawSurf, material, &space->entityDef->parms );
  569. R_SetupDrawSurfJoints( drawSurf, newTri, NULL );
  570. return drawSurf;
  571. }
  572. /*
  573. ====================
  574. idRenderModelOverlay::ReadFromDemoFile
  575. ====================
  576. */
  577. void idRenderModelOverlay::ReadFromDemoFile( idDemoFile *f ) {
  578. // FIXME: implement
  579. }
  580. /*
  581. ====================
  582. idRenderModelOverlay::WriteToDemoFile
  583. ====================
  584. */
  585. void idRenderModelOverlay::WriteToDemoFile( idDemoFile *f ) const {
  586. // FIXME: implement
  587. }