123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737 |
- /*
- ===========================================================================
- Doom 3 BFG Edition GPL Source Code
- Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
- This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
- Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>.
- In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
- If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
- ===========================================================================
- */
- #pragma hdrstop
- #include "../idlib/precompiled.h"
- #include "tr_local.h"
- #include "Model_local.h"
- #include "../idlib/geometry/DrawVert_intrinsics.h"
- /*
- ====================
- idRenderModelOverlay::idRenderModelOverlay
- ====================
- */
- idRenderModelOverlay::idRenderModelOverlay() :
- firstOverlay( 0 ),
- nextOverlay( 0 ),
- firstDeferredOverlay( 0 ),
- nextDeferredOverlay( 0 ),
- numOverlayMaterials( 0 ) {
- memset( overlays, 0, sizeof( overlays ) );
- }
- /*
- ====================
- idRenderModelOverlay::~idRenderModelOverlay
- ====================
- */
- idRenderModelOverlay::~idRenderModelOverlay() {
- for ( unsigned int i = 0; i < MAX_OVERLAYS; i++ ) {
- FreeOverlay( overlays[i] );
- }
- }
- /*
- =================
- idRenderModelOverlay::ReUse
- =================
- */
- void idRenderModelOverlay::ReUse() {
- firstOverlay = 0;
- nextOverlay = 0;
- firstDeferredOverlay = 0;
- nextDeferredOverlay = 0;
- numOverlayMaterials = 0;
- for ( unsigned int i = 0; i < MAX_OVERLAYS; i++ ) {
- FreeOverlay( overlays[i] );
- }
- }
- /*
- ====================
- idRenderModelOverlay::FreeOverlay
- ====================
- */
- void idRenderModelOverlay::FreeOverlay( overlay_t & overlay ) {
- if ( overlay.verts != NULL ) {
- Mem_Free( overlay.verts );
- }
- if ( overlay.indexes != NULL ) {
- Mem_Free( overlay.indexes );
- }
- memset( &overlay, 0, sizeof( overlay ) );
- }
- /*
- ====================
- R_OverlayPointCullStatic
- ====================
- */
- static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS, halfFloat_t * texCoordT, const idPlane * planes, const idDrawVert * verts, const int numVerts ) {
- assert_16_byte_aligned( cullBits );
- assert_16_byte_aligned( texCoordS );
- assert_16_byte_aligned( texCoordT );
- assert_16_byte_aligned( verts );
- #ifdef ID_WIN_X86_SSE2_INTRIN
- idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
- const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f };
- const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f };
- const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 );
- const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 );
- const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 );
- const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 );
- const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() );
- const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() );
- const __m128 p0X = _mm_splat_ps( p0, 0 );
- const __m128 p0Y = _mm_splat_ps( p0, 1 );
- const __m128 p0Z = _mm_splat_ps( p0, 2 );
- const __m128 p0W = _mm_splat_ps( p0, 3 );
- const __m128 p1X = _mm_splat_ps( p1, 0 );
- const __m128 p1Y = _mm_splat_ps( p1, 1 );
- const __m128 p1Z = _mm_splat_ps( p1, 2 );
- const __m128 p1W = _mm_splat_ps( p1, 3 );
- for ( int i = 0; i < numVerts; ) {
- const int nextNumVerts = vertsODS.FetchNextBatch() - 4;
- for ( ; i <= nextNumVerts; i += 4 ) {
- const __m128 v0 = _mm_load_ps( vertsODS[i + 0].xyz.ToFloatPtr() );
- const __m128 v1 = _mm_load_ps( vertsODS[i + 1].xyz.ToFloatPtr() );
- const __m128 v2 = _mm_load_ps( vertsODS[i + 2].xyz.ToFloatPtr() );
- const __m128 v3 = _mm_load_ps( vertsODS[i + 3].xyz.ToFloatPtr() );
- const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z
- const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w
- const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z
- const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w
- const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x
- const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y
- const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z
- const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) );
- const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) );
- const __m128 d2 = _mm_sub_ps( vector_float_one, d0 );
- const __m128 d3 = _mm_sub_ps( vector_float_one, d1 );
- __m128i flt16S = FastF32toF16( __m128c( d0 ) );
- __m128i flt16T = FastF32toF16( __m128c( d1 ) );
- _mm_storel_epi64( (__m128i *)&texCoordS[i], flt16S );
- _mm_storel_epi64( (__m128i *)&texCoordT[i], flt16T );
- __m128i c0 = __m128c( _mm_cmplt_ps( d0, vector_float_zero ) );
- __m128i c1 = __m128c( _mm_cmplt_ps( d1, vector_float_zero ) );
- __m128i c2 = __m128c( _mm_cmplt_ps( d2, vector_float_zero ) );
- __m128i c3 = __m128c( _mm_cmplt_ps( d3, vector_float_zero ) );
- c0 = _mm_and_si128( c0, vector_int_mask0 );
- c1 = _mm_and_si128( c1, vector_int_mask1 );
- c2 = _mm_and_si128( c2, vector_int_mask2 );
- c3 = _mm_and_si128( c3, vector_int_mask3 );
- c0 = _mm_or_si128( c0, c1 );
- c2 = _mm_or_si128( c2, c3 );
- c0 = _mm_or_si128( c0, c2 );
- c0 = _mm_packs_epi32( c0, c0 );
- c0 = _mm_packus_epi16( c0, c0 );
- *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( c0 );
- }
- }
- #else
- idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
- for ( int i = 0; i < numVerts; ) {
- const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
- for ( ; i <= nextNumVerts; i++ ) {
- const idVec3 & v = vertsODS[i].xyz;
- const float d0 = planes[0].Distance( v );
- const float d1 = planes[1].Distance( v );
- const float d2 = 1.0f - d0;
- const float d3 = 1.0f - d1;
- halfFloat_t s = Scalar_FastF32toF16( d0 );
- halfFloat_t t = Scalar_FastF32toF16( d1 );
- texCoordS[i] = s;
- texCoordT[i] = t;
- byte bits;
- bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
- bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
- bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
- bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
- cullBits[i] = bits;
- }
- }
- #endif
- }
- /*
- ====================
- R_OverlayPointCullSkinned
- ====================
- */
- static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS, halfFloat_t * texCoordT, const idPlane * planes, const idDrawVert * verts, const int numVerts, const idJointMat * joints ) {
- assert_16_byte_aligned( cullBits );
- assert_16_byte_aligned( texCoordS );
- assert_16_byte_aligned( texCoordT );
- assert_16_byte_aligned( verts );
- #ifdef ID_WIN_X86_SSE2_INTRIN
- idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
- const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f };
- const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f };
- const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 );
- const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 );
- const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 );
- const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 );
- const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() );
- const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() );
- const __m128 p0X = _mm_splat_ps( p0, 0 );
- const __m128 p0Y = _mm_splat_ps( p0, 1 );
- const __m128 p0Z = _mm_splat_ps( p0, 2 );
- const __m128 p0W = _mm_splat_ps( p0, 3 );
- const __m128 p1X = _mm_splat_ps( p1, 0 );
- const __m128 p1Y = _mm_splat_ps( p1, 1 );
- const __m128 p1Z = _mm_splat_ps( p1, 2 );
- const __m128 p1W = _mm_splat_ps( p1, 3 );
- for ( int i = 0; i < numVerts; ) {
- const int nextNumVerts = vertsODS.FetchNextBatch() - 4;
- for ( ; i <= nextNumVerts; i += 4 ) {
- const __m128 v0 = LoadSkinnedDrawVertPosition( vertsODS[i + 0], joints );
- const __m128 v1 = LoadSkinnedDrawVertPosition( vertsODS[i + 1], joints );
- const __m128 v2 = LoadSkinnedDrawVertPosition( vertsODS[i + 2], joints );
- const __m128 v3 = LoadSkinnedDrawVertPosition( vertsODS[i + 3], joints );
- const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z
- const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w
- const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z
- const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w
- const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x
- const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y
- const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z
- const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) );
- const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) );
- const __m128 d2 = _mm_sub_ps( vector_float_one, d0 );
- const __m128 d3 = _mm_sub_ps( vector_float_one, d1 );
- __m128i flt16S = FastF32toF16( __m128c( d0 ) );
- __m128i flt16T = FastF32toF16( __m128c( d1 ) );
- _mm_storel_epi64( (__m128i *)&texCoordS[i], flt16S );
- _mm_storel_epi64( (__m128i *)&texCoordT[i], flt16T );
- __m128i c0 = __m128c( _mm_cmplt_ps( d0, vector_float_zero ) );
- __m128i c1 = __m128c( _mm_cmplt_ps( d1, vector_float_zero ) );
- __m128i c2 = __m128c( _mm_cmplt_ps( d2, vector_float_zero ) );
- __m128i c3 = __m128c( _mm_cmplt_ps( d3, vector_float_zero ) );
- c0 = _mm_and_si128( c0, vector_int_mask0 );
- c1 = _mm_and_si128( c1, vector_int_mask1 );
- c2 = _mm_and_si128( c2, vector_int_mask2 );
- c3 = _mm_and_si128( c3, vector_int_mask3 );
- c0 = _mm_or_si128( c0, c1 );
- c2 = _mm_or_si128( c2, c3 );
- c0 = _mm_or_si128( c0, c2 );
- c0 = _mm_packs_epi32( c0, c0 );
- c0 = _mm_packus_epi16( c0, c0 );
- *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( c0 );
- }
- }
- #else
- idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
- for ( int i = 0; i < numVerts; ) {
- const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
- for ( ; i <= nextNumVerts; i++ ) {
- const idVec3 transformed = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints );
- const float d0 = planes[0].Distance( transformed );
- const float d1 = planes[1].Distance( transformed );
- const float d2 = 1.0f - d0;
- const float d3 = 1.0f - d1;
- halfFloat_t s = Scalar_FastF32toF16( d0 );
- halfFloat_t t = Scalar_FastF32toF16( d1 );
- texCoordS[i] = s;
- texCoordT[i] = t;
- byte bits;
- bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
- bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
- bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
- bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
- cullBits[i] = bits;
- }
- }
- #endif
- }
- /*
- =====================
- idRenderModelOverlay::CreateOverlay
- This projects on both front and back sides to avoid seams
- The material should be clamped, because entire triangles are added, some of which
- may extend well past the 0.0 to 1.0 texture range
- =====================
- */
- void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPlane localTextureAxis[2], const idMaterial *material ) {
- // count up the maximum possible vertices and indexes per surface
- int maxVerts = 0;
- int maxIndexes = 0;
- for ( int surfNum = 0; surfNum < model->NumSurfaces(); surfNum++ ) {
- const modelSurface_t *surf = model->Surface( surfNum );
- if ( surf->geometry->numVerts > maxVerts ) {
- maxVerts = surf->geometry->numVerts;
- }
- if ( surf->geometry->numIndexes > maxIndexes ) {
- maxIndexes = surf->geometry->numIndexes;
- }
- }
- maxIndexes += 3 * 16 / sizeof( triIndex_t ); // to allow the index size to be a multiple of 16 bytes
- // make temporary buffers for the building process
- idTempArray< byte > cullBits( maxVerts );
- idTempArray< halfFloat_t > texCoordS( maxVerts );
- idTempArray< halfFloat_t > texCoordT( maxVerts );
- idTempArray< triIndex_t > vertexRemap( maxVerts );
- idTempArray< overlayVertex_t > overlayVerts( maxVerts );
- idTempArray< triIndex_t > overlayIndexes( maxIndexes );
- // pull out the triangles we need from the base surfaces
- for ( int surfNum = 0; surfNum < model->NumBaseSurfaces(); surfNum++ ) {
- const modelSurface_t *surf = model->Surface( surfNum );
- if ( surf->geometry == NULL || surf->shader == NULL ) {
- continue;
- }
- // some surfaces can explicitly disallow overlays
- if ( !surf->shader->AllowOverlays() ) {
- continue;
- }
- const srfTriangles_t *tri = surf->geometry;
- // try to cull the whole surface along the first texture axis
- const float d0 = tri->bounds.PlaneDistance( localTextureAxis[0] );
- if ( d0 < 0.0f || d0 > 1.0f ) {
- continue;
- }
- // try to cull the whole surface along the second texture axis
- const float d1 = tri->bounds.PlaneDistance( localTextureAxis[1] );
- if ( d1 < 0.0f || d1 > 1.0f ) {
- continue;
- }
- if ( tri->staticModelWithJoints != NULL && r_useGPUSkinning.GetBool() ) {
- R_OverlayPointCullSkinned( cullBits.Ptr(), texCoordS.Ptr(), texCoordT.Ptr(), localTextureAxis, tri->verts, tri->numVerts, tri->staticModelWithJoints->jointsInverted );
- } else {
- R_OverlayPointCullStatic( cullBits.Ptr(), texCoordS.Ptr(), texCoordT.Ptr(), localTextureAxis, tri->verts, tri->numVerts );
- }
- // start streaming the indexes
- idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 3 > indexesODS( tri->indexes, tri->numIndexes );
- memset( vertexRemap.Ptr(), -1, vertexRemap.Size() );
- int numIndexes = 0;
- int numVerts = 0;
- int maxReferencedVertex = 0;
- // find triangles that need the overlay
- for ( int i = 0; i < tri->numIndexes; ) {
- const int nextNumIndexes = indexesODS.FetchNextBatch() - 3;
- for ( ; i <= nextNumIndexes; i += 3 ) {
- const int i0 = indexesODS[i + 0];
- const int i1 = indexesODS[i + 1];
- const int i2 = indexesODS[i + 2];
- // skip triangles completely off one side
- if ( cullBits[i0] & cullBits[i1] & cullBits[i2] ) {
- continue;
- }
- // we could do more precise triangle culling, like a light interaction does, but it's not worth it
- // keep this triangle
- for ( int j = 0; j < 3; j++ ) {
- int index = tri->indexes[i + j];
- if ( vertexRemap[index] == (triIndex_t) -1 ) {
- vertexRemap[index] = numVerts;
- overlayVerts[numVerts].vertexNum = index;
- overlayVerts[numVerts].st[0] = texCoordS[index];
- overlayVerts[numVerts].st[1] = texCoordT[index];
- numVerts++;
- maxReferencedVertex = Max( maxReferencedVertex, index );
- }
- overlayIndexes[numIndexes] = vertexRemap[index];
- numIndexes++;
- }
- }
- }
- if ( numIndexes == 0 ) {
- continue;
- }
- // add degenerate triangles until the index size is a multiple of 16 bytes
- for ( ; ( ( ( numIndexes * sizeof( triIndex_t ) ) & 15 ) != 0 ); numIndexes += 3 ) {
- overlayIndexes[numIndexes + 0] = 0;
- overlayIndexes[numIndexes + 1] = 0;
- overlayIndexes[numIndexes + 2] = 0;
- }
- // allocate a new overlay
- overlay_t & overlay = overlays[nextOverlay++ & ( MAX_OVERLAYS - 1 )];
- FreeOverlay( overlay );
- overlay.material = material;
- overlay.surfaceNum = surfNum;
- overlay.surfaceId = surf->id;
- overlay.numIndexes = numIndexes;
- overlay.indexes = (triIndex_t *)Mem_Alloc( numIndexes * sizeof( overlay.indexes[0] ), TAG_MODEL );
- memcpy( overlay.indexes, overlayIndexes.Ptr(), numIndexes * sizeof( overlay.indexes[0] ) );
- overlay.numVerts = numVerts;
- overlay.verts = (overlayVertex_t *)Mem_Alloc( numVerts * sizeof( overlay.verts[0] ), TAG_MODEL );
- memcpy( overlay.verts, overlayVerts.Ptr(), numVerts * sizeof( overlay.verts[0] ) );
- overlay.maxReferencedVertex = maxReferencedVertex;
- if ( nextOverlay - firstOverlay > MAX_OVERLAYS ) {
- firstOverlay = nextOverlay - MAX_OVERLAYS;
- }
- }
- }
- /*
- ====================
- idRenderModelOverlay::CreateDeferredOverlays
- ====================
- */
- void idRenderModelOverlay::CreateDeferredOverlays( const idRenderModel * model ) {
- for ( unsigned int i = firstDeferredOverlay; i < nextDeferredOverlay; i++ ) {
- const overlayProjectionParms_t & parms = deferredOverlays[i & ( MAX_DEFERRED_OVERLAYS - 1 )];
- if ( parms.startTime > tr.viewDef->renderView.time[0] - DEFFERED_OVERLAY_TIMEOUT ) {
- CreateOverlay( model, parms.localTextureAxis, parms.material );
- }
- }
- firstDeferredOverlay = 0;
- nextDeferredOverlay = 0;
- }
- /*
- ====================
- idRenderModelOverlay::AddDeferredOverlay
- ====================
- */
- void idRenderModelOverlay::AddDeferredOverlay( const overlayProjectionParms_t & localParms ) {
- deferredOverlays[nextDeferredOverlay++ & ( MAX_DEFERRED_OVERLAYS - 1 )] = localParms;
- if ( nextDeferredOverlay - firstDeferredOverlay > MAX_DEFERRED_OVERLAYS ) {
- firstDeferredOverlay = nextDeferredOverlay - MAX_DEFERRED_OVERLAYS;
- }
- }
- /*
- ====================
- R_CopyOverlaySurface
- ====================
- */
- static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t * indexes, int numIndexes, const overlay_t * overlay, const idDrawVert * sourceVerts ) {
- assert_16_byte_aligned( &verts[numVerts] );
- assert_16_byte_aligned( &indexes[numIndexes] );
- assert_16_byte_aligned( overlay->verts );
- assert_16_byte_aligned( overlay->indexes );
- assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 );
- assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 );
- #ifdef ID_WIN_X86_SSE2_INTRIN
- const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 );
- const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 );
- const __m128i vector_short_num_verts = _mm_packs_epi32( vector_int_num_verts, vector_int_num_verts );
- // copy vertices
- for ( int i = 0; i < overlay->numVerts; i++ ) {
- const overlayVertex_t &overlayVert = overlay->verts[i];
- const idDrawVert &srcVert = sourceVerts[overlayVert.vertexNum];
- idDrawVert &dstVert = verts[numVerts + i];
- __m128i v0 = _mm_load_si128( (const __m128i *)( (byte *)&srcVert + 0 ) );
- __m128i v1 = _mm_load_si128( (const __m128i *)( (byte *)&srcVert + 16 ) );
- __m128i st = _mm_cvtsi32_si128( *(unsigned int *)overlayVert.st );
- st = _mm_shuffle_epi32( st, _MM_SHUFFLE( 0, 1, 2, 3 ) );
- v0 = _mm_and_si128( v0, vector_int_clear_last );
- v0 = _mm_or_si128( v0, st );
- _mm_stream_si128( (__m128i *)( (byte *)&dstVert + 0 ), v0 );
- _mm_stream_si128( (__m128i *)( (byte *)&dstVert + 16 ), v1 );
- }
- // copy indexes
- assert( ( overlay->numIndexes & 7 ) == 0 );
- assert( sizeof( triIndex_t ) == 2 );
- for ( int i = 0; i < overlay->numIndexes; i += 8 ) {
- __m128i vi = _mm_load_si128( (const __m128i *)&overlay->indexes[i] );
- vi = _mm_add_epi16( vi, vector_short_num_verts );
- _mm_stream_si128( (__m128i *)&indexes[numIndexes + i], vi );
- }
- _mm_sfence();
- #else
- // copy vertices
- for ( int i = 0; i < overlay->numVerts; i++ ) {
- const overlayVertex_t &overlayVert = overlay->verts[i];
- // NOTE: bad out-of-order write-combined write, SIMD code does the right thing
- verts[numVerts + i] = sourceVerts[overlayVert.vertexNum];
- verts[numVerts + i].st[0] = overlayVert.st[0];
- verts[numVerts + i].st[1] = overlayVert.st[1];
- }
- // copy indexes
- for ( int i = 0; i < overlay->numIndexes; i += 2 ) {
- assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts );
- WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] );
- }
- #endif
- }
- /*
- =====================
- idRenderModelOverlay::GetNumOverlayDrawSurfs
- =====================
- */
- unsigned int idRenderModelOverlay::GetNumOverlayDrawSurfs() {
- numOverlayMaterials = 0;
- for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) {
- const overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )];
- unsigned int j = 0;
- for ( ; j < numOverlayMaterials; j++ ) {
- if ( overlayMaterials[j] == overlay.material ) {
- break;
- }
- }
- if ( j >= numOverlayMaterials ) {
- overlayMaterials[numOverlayMaterials++] = overlay.material;
- }
- }
- return numOverlayMaterials;
- }
- /*
- ====================
- idRenderModelOverlay::CreateOverlayDrawSurf
- ====================
- */
- drawSurf_t * idRenderModelOverlay::CreateOverlayDrawSurf( const viewEntity_t *space, const idRenderModel *baseModel, unsigned int index ) {
- if ( index < 0 || index >= numOverlayMaterials ) {
- return NULL;
- }
- // md5 models won't have any surfaces when r_showSkel is set
- if ( baseModel == NULL || baseModel->IsDefaultModel() || baseModel->NumSurfaces() == 0 ) {
- return NULL;
- }
- assert( baseModel->IsDynamicModel() == DM_STATIC );
- const idRenderModelStatic * staticModel = static_cast< const idRenderModelStatic * >( baseModel );
- const idMaterial * material = overlayMaterials[index];
- int maxVerts = 0;
- int maxIndexes = 0;
- for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) {
- const overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )];
- if ( overlay.material == material ) {
- maxVerts += overlay.numVerts;
- maxIndexes += overlay.numIndexes;
- }
- }
- if ( maxVerts == 0 || maxIndexes == 0 ) {
- return NULL;
- }
- // create a new triangle surface in frame memory so it gets automatically disposed of
- srfTriangles_t *newTri = (srfTriangles_t *)R_ClearedFrameAlloc( sizeof( *newTri ), FRAME_ALLOC_SURFACE_TRIANGLES );
- newTri->staticModelWithJoints = ( staticModel->jointsInverted != NULL ) ? const_cast< idRenderModelStatic * >( staticModel ) : NULL; // allow GPU skinning
- newTri->ambientCache = vertexCache.AllocVertex( NULL, ALIGN( maxVerts * sizeof( idDrawVert ), VERTEX_CACHE_ALIGN ) );
- newTri->indexCache = vertexCache.AllocIndex( NULL, ALIGN( maxIndexes * sizeof( triIndex_t ), INDEX_CACHE_ALIGN ) );
- idDrawVert * mappedVerts = (idDrawVert *)vertexCache.MappedVertexBuffer( newTri->ambientCache );
- triIndex_t * mappedIndexes = (triIndex_t *)vertexCache.MappedIndexBuffer( newTri->indexCache );
- int numVerts = 0;
- int numIndexes = 0;
- for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) {
- overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )];
- if ( overlay.numVerts == 0 ) {
- if ( i == firstOverlay ) {
- firstOverlay++;
- }
- continue;
- }
- if ( overlay.material != material ) {
- continue;
- }
- // get the source model surface for this overlay surface
- const modelSurface_t * baseSurf = ( overlay.surfaceNum < staticModel->NumSurfaces() ) ? staticModel->Surface( overlay.surfaceNum ) : NULL;
- // if the surface ids no longer match
- if ( baseSurf == NULL || baseSurf->id != overlay.surfaceId ) {
- // find the surface with the correct id
- if ( staticModel->FindSurfaceWithId( overlay.surfaceId, overlay.surfaceNum ) ) {
- baseSurf = staticModel->Surface( overlay.surfaceNum );
- } else {
- // the surface with this id no longer exists
- FreeOverlay( overlay );
- if ( i == firstOverlay ) {
- firstOverlay++;
- }
- continue;
- }
- }
- // check for out of range vertex references
- const srfTriangles_t * baseTri = baseSurf->geometry;
- if ( overlay.maxReferencedVertex >= baseTri->numVerts ) {
- // This can happen when playing a demofile and a model has been changed since it was recorded, so just issue a warning and go on.
- common->Warning( "idRenderModelOverlay::CreateOverlayDrawSurf: overlay vertex out of range. Model has probably changed since generating the overlay." );
- FreeOverlay( overlay );
- if ( i == firstOverlay ) {
- firstOverlay++;
- }
- continue;
- }
- // use SIMD optimized routine to copy the vertices and indices directly to write-combined memory
- R_CopyOverlaySurface( mappedVerts, numVerts, mappedIndexes, numIndexes, &overlay, baseTri->verts );
- numIndexes += overlay.numIndexes;
- numVerts += overlay.numVerts;
- }
- newTri->numVerts = numVerts;
- newTri->numIndexes = numIndexes;
-
- // create the drawsurf
- drawSurf_t * drawSurf = (drawSurf_t *)R_FrameAlloc( sizeof( *drawSurf ), FRAME_ALLOC_DRAW_SURFACE );
- drawSurf->frontEndGeo = newTri;
- drawSurf->numIndexes = newTri->numIndexes;
- drawSurf->ambientCache = newTri->ambientCache;
- drawSurf->indexCache = newTri->indexCache;
- drawSurf->shadowCache = 0;
- drawSurf->space = space;
- drawSurf->scissorRect = space->scissorRect;
- drawSurf->extraGLState = 0;
- drawSurf->renderZFail = 0;
- R_SetupDrawSurfShader( drawSurf, material, &space->entityDef->parms );
- R_SetupDrawSurfJoints( drawSurf, newTri, NULL );
- return drawSurf;
- }
- /*
- ====================
- idRenderModelOverlay::ReadFromDemoFile
- ====================
- */
- void idRenderModelOverlay::ReadFromDemoFile( idDemoFile *f ) {
- // FIXME: implement
- }
- /*
- ====================
- idRenderModelOverlay::WriteToDemoFile
- ====================
- */
- void idRenderModelOverlay::WriteToDemoFile( idDemoFile *f ) const {
- // FIXME: implement
- }
|