123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639 |
- /*
- Copyright (C) 1997-2001 Id Software, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See the GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- #include "r_local.h"
- vec3_t r_pright, r_pup, r_ppn;
- #define PARTICLE_33 0
- #define PARTICLE_66 1
- #define PARTICLE_OPAQUE 2
- typedef struct
- {
- particle_t *particle;
- int level;
- int color;
- } partparms_t;
- static partparms_t partparms;
- #if id386 && !defined __linux__
- static unsigned s_prefetch_address;
- /*
- ** BlendParticleXX
- **
- ** Inputs:
- ** EAX = color
- ** EDI = pdest
- **
- ** Scratch:
- ** EBX = scratch (dstcolor)
- ** EBP = scratch
- **
- ** Outputs:
- ** none
- */
- __declspec(naked) void BlendParticle33( void )
- {
- // return vid.alphamap[color + dstcolor*256];
- __asm mov ebp, vid.alphamap
- __asm xor ebx, ebx
- __asm mov bl, byte ptr [edi]
- __asm shl ebx, 8
- __asm add ebp, ebx
- __asm add ebp, eax
- __asm mov al, byte ptr [ebp]
- __asm mov byte ptr [edi], al
- __asm ret
- }
- __declspec(naked) void BlendParticle66( void )
- {
- // return vid.alphamap[pcolor*256 + dstcolor];
- __asm mov ebp, vid.alphamap
- __asm xor ebx, ebx
- __asm shl eax, 8
- __asm mov bl, byte ptr [edi]
- __asm add ebp, ebx
- __asm add ebp, eax
- __asm mov al, byte ptr [ebp]
- __asm mov byte ptr [edi], al
- __asm ret
- }
- __declspec(naked) void BlendParticle100( void )
- {
- __asm mov byte ptr [edi], al
- __asm ret
- }
- /*
- ** R_DrawParticle (asm version)
- **
- ** Since we use __declspec( naked ) we don't have a stack frame
- ** that we can use. Since I want to reserve EBP anyway, I tossed
- ** all the important variables into statics. This routine isn't
- ** meant to be re-entrant, so this shouldn't cause any problems
- ** other than a slightly higher global memory footprint.
- **
- */
- __declspec(naked) void R_DrawParticle( void )
- {
- static vec3_t local, transformed;
- static float zi;
- static int u, v, tmp;
- static short izi;
- static int ebpsave;
- static byte (*blendfunc)(void);
- /*
- ** must be memvars since x86 can't load constants
- ** directly. I guess I could use fld1, but that
- ** actually costs one more clock than fld [one]!
- */
- static float particle_z_clip = PARTICLE_Z_CLIP;
- static float one = 1.0F;
- static float point_five = 0.5F;
- static float eight_thousand_hex = 0x8000;
- /*
- ** save trashed variables
- */
- __asm mov ebpsave, ebp
- __asm push esi
- __asm push edi
- /*
- ** transform the particle
- */
- // VectorSubtract (pparticle->origin, r_origin, local);
- __asm mov esi, partparms.particle
- __asm fld dword ptr [esi+0] ; p_o.x
- __asm fsub dword ptr [r_origin+0] ; p_o.x-r_o.x
- __asm fld dword ptr [esi+4] ; p_o.y | p_o.x-r_o.x
- __asm fsub dword ptr [r_origin+4] ; p_o.y-r_o.y | p_o.x-r_o.x
- __asm fld dword ptr [esi+8] ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x
- __asm fsub dword ptr [r_origin+8] ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x
- __asm fxch st(2) ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z
- __asm fstp dword ptr [local+0] ; p_o.y-r_o.y | p_o.z-r_o.z
- __asm fstp dword ptr [local+4] ; p_o.z-r_o.z
- __asm fstp dword ptr [local+8] ; (empty)
- // transformed[0] = DotProduct(local, r_pright);
- // transformed[1] = DotProduct(local, r_pup);
- // transformed[2] = DotProduct(local, r_ppn);
- __asm fld dword ptr [local+0] ; l.x
- __asm fmul dword ptr [r_pright+0] ; l.x*pr.x
- __asm fld dword ptr [local+4] ; l.y | l.x*pr.x
- __asm fmul dword ptr [r_pright+4] ; l.y*pr.y | l.x*pr.x
- __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x
- __asm fmul dword ptr [r_pright+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x
- __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z
- __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z
- __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z
- __asm fstp dword ptr [transformed+0] ; (empty)
- __asm fld dword ptr [local+0] ; l.x
- __asm fmul dword ptr [r_pup+0] ; l.x*pr.x
- __asm fld dword ptr [local+4] ; l.y | l.x*pr.x
- __asm fmul dword ptr [r_pup+4] ; l.y*pr.y | l.x*pr.x
- __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x
- __asm fmul dword ptr [r_pup+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x
- __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z
- __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z
- __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z
- __asm fstp dword ptr [transformed+4] ; (empty)
- __asm fld dword ptr [local+0] ; l.x
- __asm fmul dword ptr [r_ppn+0] ; l.x*pr.x
- __asm fld dword ptr [local+4] ; l.y | l.x*pr.x
- __asm fmul dword ptr [r_ppn+4] ; l.y*pr.y | l.x*pr.x
- __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x
- __asm fmul dword ptr [r_ppn+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x
- __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z
- __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z
- __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z
- __asm fstp dword ptr [transformed+8] ; (empty)
- /*
- ** make sure that the transformed particle is not in front of
- ** the particle Z clip plane. We can do the comparison in
- ** integer space since we know the sign of one of the inputs
- ** and can figure out the sign of the other easily enough.
- */
- // if (transformed[2] < PARTICLE_Z_CLIP)
- // return;
- __asm mov eax, dword ptr [transformed+8]
- __asm and eax, eax
- __asm js end
- __asm cmp eax, particle_z_clip
- __asm jl end
- /*
- ** project the point by initiating the 1/z calc
- */
- // zi = 1.0 / transformed[2];
- __asm fld one
- __asm fdiv dword ptr [transformed+8]
- /*
- ** bind the blend function pointer to the appropriate blender
- ** while we're dividing
- */
- //if ( level == PARTICLE_33 )
- // blendparticle = BlendParticle33;
- //else if ( level == PARTICLE_66 )
- // blendparticle = BlendParticle66;
- //else
- // blendparticle = BlendParticle100;
- __asm cmp partparms.level, PARTICLE_66
- __asm je blendfunc_66
- __asm jl blendfunc_33
- __asm lea ebx, BlendParticle100
- __asm jmp done_selecting_blend_func
- blendfunc_33:
- __asm lea ebx, BlendParticle33
- __asm jmp done_selecting_blend_func
- blendfunc_66:
- __asm lea ebx, BlendParticle66
- done_selecting_blend_func:
- __asm mov blendfunc, ebx
- // prefetch the next particle
- __asm mov ebp, s_prefetch_address
- __asm mov ebp, [ebp]
- // finish the above divide
- __asm fstp zi
- // u = (int)(xcenter + zi * transformed[0] + 0.5);
- // v = (int)(ycenter - zi * transformed[1] + 0.5);
- __asm fld zi ; zi
- __asm fmul dword ptr [transformed+0] ; zi * transformed[0]
- __asm fld zi ; zi | zi * transformed[0]
- __asm fmul dword ptr [transformed+4] ; zi * transformed[1] | zi * transformed[0]
- __asm fxch st(1) ; zi * transformed[0] | zi * transformed[1]
- __asm fadd xcenter ; xcenter + zi * transformed[0] | zi * transformed[1]
- __asm fxch st(1) ; zi * transformed[1] | xcenter + zi * transformed[0]
- __asm fld ycenter ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0]
- __asm fsubrp st(1), st(0) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0]
- __asm fxch st(1) ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1]
- __asm fadd point_five ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1]
- __asm fxch st(1) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5
- __asm fadd point_five ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5
- __asm fxch st(1) ; u | v
- __asm fistp dword ptr [u] ; v
- __asm fistp dword ptr [v] ; (empty)
- /*
- ** clip out the particle
- */
- // if ((v > d_vrectbottom_particle) ||
- // (u > d_vrectright_particle) ||
- // (v < d_vrecty) ||
- // (u < d_vrectx))
- // {
- // return;
- // }
- __asm mov ebx, u
- __asm mov ecx, v
- __asm cmp ecx, d_vrectbottom_particle
- __asm jg end
- __asm cmp ecx, d_vrecty
- __asm jl end
- __asm cmp ebx, d_vrectright_particle
- __asm jg end
- __asm cmp ebx, d_vrectx
- __asm jl end
- /*
- ** compute addresses of zbuffer, framebuffer, and
- ** compute the Z-buffer reference value.
- **
- ** EBX = U
- ** ECX = V
- **
- ** Outputs:
- ** ESI = Z-buffer address
- ** EDI = framebuffer address
- */
- // ESI = d_pzbuffer + (d_zwidth * v) + u;
- __asm mov esi, d_pzbuffer ; esi = d_pzbuffer
- __asm mov eax, d_zwidth ; eax = d_zwidth
- __asm mul ecx ; eax = d_zwidth*v
- __asm add eax, ebx ; eax = d_zwidth*v+u
- __asm shl eax, 1 ; eax = 2*(d_zwidth*v+u)
- __asm add esi, eax ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u )
- // initiate
- // izi = (int)(zi * 0x8000);
- __asm fld zi
- __asm fmul eight_thousand_hex
- // EDI = pdest = d_viewbuffer + d_scantable[v] + u;
- __asm lea edi, [d_scantable+ecx*4]
- __asm mov edi, [edi]
- __asm add edi, d_viewbuffer
- __asm add edi, ebx
- // complete
- // izi = (int)(zi * 0x8000);
- __asm fistp tmp
- __asm mov eax, tmp
- __asm mov izi, ax
- /*
- ** determine the screen area covered by the particle,
- ** which also means clamping to a min and max
- */
- // pix = izi >> d_pix_shift;
- __asm xor edx, edx
- __asm mov dx, izi
- __asm mov ecx, d_pix_shift
- __asm shr dx, cl
- // if (pix < d_pix_min)
- // pix = d_pix_min;
- __asm cmp edx, d_pix_min
- __asm jge check_pix_max
- __asm mov edx, d_pix_min
- __asm jmp skip_pix_clamp
- // else if (pix > d_pix_max)
- // pix = d_pix_max;
- check_pix_max:
- __asm cmp edx, d_pix_max
- __asm jle skip_pix_clamp
- __asm mov edx, d_pix_max
- skip_pix_clamp:
- /*
- ** render the appropriate pixels
- **
- ** ECX = count (used for inner loop)
- ** EDX = count (used for outer loop)
- ** ESI = zbuffer
- ** EDI = framebuffer
- */
- __asm mov ecx, edx
- __asm cmp ecx, 1
- __asm ja over
- over:
- /*
- ** at this point:
- **
- ** ECX = count
- */
- __asm push ecx
- __asm push edi
- __asm push esi
- top_of_pix_vert_loop:
- top_of_pix_horiz_loop:
- // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
- // {
- // for (i=0 ; i<pix ; i++)
- // {
- // if (pz[i] <= izi)
- // {
- // pdest[i] = blendparticle( color, pdest[i] );
- // }
- // }
- // }
- __asm xor eax, eax
- __asm mov ax, word ptr [esi]
- __asm cmp ax, izi
- __asm jg end_of_horiz_loop
- #if ENABLE_ZWRITES_FOR_PARTICLES
- __asm mov bp, izi
- __asm mov word ptr [esi], bp
- #endif
- __asm mov eax, partparms.color
- __asm call [blendfunc]
- __asm add edi, 1
- __asm add esi, 2
- end_of_horiz_loop:
- __asm dec ecx
- __asm jnz top_of_pix_horiz_loop
- __asm pop esi
- __asm pop edi
- __asm mov ebp, d_zwidth
- __asm shl ebp, 1
- __asm add esi, ebp
- __asm add edi, [r_screenwidth]
- __asm pop ecx
- __asm push ecx
- __asm push edi
- __asm push esi
- __asm dec edx
- __asm jnz top_of_pix_vert_loop
- __asm pop ecx
- __asm pop ecx
- __asm pop ecx
- end:
- __asm pop edi
- __asm pop esi
- __asm mov ebp, ebpsave
- __asm ret
- }
- #else
- static byte BlendParticle33( int pcolor, int dstcolor )
- {
- return vid.alphamap[pcolor + dstcolor*256];
- }
- static byte BlendParticle66( int pcolor, int dstcolor )
- {
- return vid.alphamap[pcolor*256+dstcolor];
- }
- static byte BlendParticle100( int pcolor, int dstcolor )
- {
- dstcolor = dstcolor;
- return pcolor;
- }
- /*
- ** R_DrawParticle
- **
- ** Yes, this is amazingly slow, but it's the C reference
- ** implementation and should be both robust and vaguely
- ** understandable. The only time this path should be
- ** executed is if we're debugging on x86 or if we're
- ** recompiling and deploying on a non-x86 platform.
- **
- ** To minimize error and improve readability I went the
- ** function pointer route. This exacts some overhead, but
- ** it pays off in clean and easy to understand code.
- */
- void R_DrawParticle( void )
- {
- particle_t *pparticle = partparms.particle;
- int level = partparms.level;
- vec3_t local, transformed;
- float zi;
- byte *pdest;
- short *pz;
- int color = pparticle->color;
- int i, izi, pix, count, u, v;
- byte (*blendparticle)( int, int );
- /*
- ** transform the particle
- */
- VectorSubtract (pparticle->origin, r_origin, local);
- transformed[0] = DotProduct(local, r_pright);
- transformed[1] = DotProduct(local, r_pup);
- transformed[2] = DotProduct(local, r_ppn);
- if (transformed[2] < PARTICLE_Z_CLIP)
- return;
- /*
- ** bind the blend function pointer to the appropriate blender
- */
- if ( level == PARTICLE_33 )
- blendparticle = BlendParticle33;
- else if ( level == PARTICLE_66 )
- blendparticle = BlendParticle66;
- else
- blendparticle = BlendParticle100;
- /*
- ** project the point
- */
- // FIXME: preadjust xcenter and ycenter
- zi = 1.0 / transformed[2];
- u = (int)(xcenter + zi * transformed[0] + 0.5);
- v = (int)(ycenter - zi * transformed[1] + 0.5);
- if ((v > d_vrectbottom_particle) ||
- (u > d_vrectright_particle) ||
- (v < d_vrecty) ||
- (u < d_vrectx))
- {
- return;
- }
- /*
- ** compute addresses of zbuffer, framebuffer, and
- ** compute the Z-buffer reference value.
- */
- pz = d_pzbuffer + (d_zwidth * v) + u;
- pdest = d_viewbuffer + d_scantable[v] + u;
- izi = (int)(zi * 0x8000);
- /*
- ** determine the screen area covered by the particle,
- ** which also means clamping to a min and max
- */
- pix = izi >> d_pix_shift;
- if (pix < d_pix_min)
- pix = d_pix_min;
- else if (pix > d_pix_max)
- pix = d_pix_max;
- /*
- ** render the appropriate pixels
- */
- count = pix;
- switch (level) {
- case PARTICLE_33 :
- for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth)
- {
- //FIXME--do it in blocks of 8?
- for (i=0 ; i<pix ; i++)
- {
- if (pz[i] <= izi)
- {
- pz[i] = izi;
- pdest[i] = vid.alphamap[color + ((int)pdest[i]<<8)];
- }
- }
- }
- break;
- case PARTICLE_66 :
- for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth)
- {
- for (i=0 ; i<pix ; i++)
- {
- if (pz[i] <= izi)
- {
- pz[i] = izi;
- pdest[i] = vid.alphamap[(color<<8) + (int)pdest[i]];
- }
- }
- }
- break;
- default: //100
- for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth)
- {
- for (i=0 ; i<pix ; i++)
- {
- if (pz[i] <= izi)
- {
- pz[i] = izi;
- pdest[i] = color;
- }
- }
- }
- break;
- }
- }
- #endif // !id386
- /*
- ** R_DrawParticles
- **
- ** Responsible for drawing all of the particles in the particle list
- ** throughout the world. Doesn't care if we're using the C path or
- ** if we're using the asm path, it simply assigns a function pointer
- ** and goes.
- */
- void R_DrawParticles (void)
- {
- particle_t *p;
- int i;
- extern unsigned long fpu_sp24_cw, fpu_chop_cw;
- VectorScale( vright, xscaleshrink, r_pright );
- VectorScale( vup, yscaleshrink, r_pup );
- VectorCopy( vpn, r_ppn );
- #if id386 && !defined __linux__
- __asm fldcw word ptr [fpu_sp24_cw]
- #endif
- for (p=r_newrefdef.particles, i=0 ; i<r_newrefdef.num_particles ; i++,p++)
- {
- if ( p->alpha > 0.66 )
- partparms.level = PARTICLE_OPAQUE;
- else if ( p->alpha > 0.33 )
- partparms.level = PARTICLE_66;
- else
- partparms.level = PARTICLE_33;
- partparms.particle = p;
- partparms.color = p->color;
- #if id386 && !defined __linux__
- if ( i < r_newrefdef.num_particles-1 )
- s_prefetch_address = ( unsigned int ) ( p + 1 );
- else
- s_prefetch_address = ( unsigned int ) r_newrefdef.particles;
- #endif
- R_DrawParticle();
- }
- #if id386 && !defined __linux__
- __asm fldcw word ptr [fpu_chop_cw]
- #endif
- }
|