r_part.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. /*
  2. Copyright (C) 1997-2001 Id Software, Inc.
  3. This program is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public License
  5. as published by the Free Software Foundation; either version 2
  6. of the License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. See the GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. */
  15. #include "r_local.h"
  16. vec3_t r_pright, r_pup, r_ppn;
  17. #define PARTICLE_33 0
  18. #define PARTICLE_66 1
  19. #define PARTICLE_OPAQUE 2
  20. typedef struct
  21. {
  22. particle_t *particle;
  23. int level;
  24. int color;
  25. } partparms_t;
  26. static partparms_t partparms;
  27. #if id386 && !defined __linux__
  28. static unsigned s_prefetch_address;
  29. /*
  30. ** BlendParticleXX
  31. **
  32. ** Inputs:
  33. ** EAX = color
  34. ** EDI = pdest
  35. **
  36. ** Scratch:
  37. ** EBX = scratch (dstcolor)
  38. ** EBP = scratch
  39. **
  40. ** Outputs:
  41. ** none
  42. */
  43. __declspec(naked) void BlendParticle33( void )
  44. {
  45. // return vid.alphamap[color + dstcolor*256];
  46. __asm mov ebp, vid.alphamap
  47. __asm xor ebx, ebx
  48. __asm mov bl, byte ptr [edi]
  49. __asm shl ebx, 8
  50. __asm add ebp, ebx
  51. __asm add ebp, eax
  52. __asm mov al, byte ptr [ebp]
  53. __asm mov byte ptr [edi], al
  54. __asm ret
  55. }
  56. __declspec(naked) void BlendParticle66( void )
  57. {
  58. // return vid.alphamap[pcolor*256 + dstcolor];
  59. __asm mov ebp, vid.alphamap
  60. __asm xor ebx, ebx
  61. __asm shl eax, 8
  62. __asm mov bl, byte ptr [edi]
  63. __asm add ebp, ebx
  64. __asm add ebp, eax
  65. __asm mov al, byte ptr [ebp]
  66. __asm mov byte ptr [edi], al
  67. __asm ret
  68. }
  69. __declspec(naked) void BlendParticle100( void )
  70. {
  71. __asm mov byte ptr [edi], al
  72. __asm ret
  73. }
  74. /*
  75. ** R_DrawParticle (asm version)
  76. **
  77. ** Since we use __declspec( naked ) we don't have a stack frame
  78. ** that we can use. Since I want to reserve EBP anyway, I tossed
  79. ** all the important variables into statics. This routine isn't
  80. ** meant to be re-entrant, so this shouldn't cause any problems
  81. ** other than a slightly higher global memory footprint.
  82. **
  83. */
  84. __declspec(naked) void R_DrawParticle( void )
  85. {
  86. static vec3_t local, transformed;
  87. static float zi;
  88. static int u, v, tmp;
  89. static short izi;
  90. static int ebpsave;
  91. static byte (*blendfunc)(void);
  92. /*
  93. ** must be memvars since x86 can't load constants
  94. ** directly. I guess I could use fld1, but that
  95. ** actually costs one more clock than fld [one]!
  96. */
  97. static float particle_z_clip = PARTICLE_Z_CLIP;
  98. static float one = 1.0F;
  99. static float point_five = 0.5F;
  100. static float eight_thousand_hex = 0x8000;
  101. /*
  102. ** save trashed variables
  103. */
  104. __asm mov ebpsave, ebp
  105. __asm push esi
  106. __asm push edi
  107. /*
  108. ** transform the particle
  109. */
  110. // VectorSubtract (pparticle->origin, r_origin, local);
  111. __asm mov esi, partparms.particle
  112. __asm fld dword ptr [esi+0] ; p_o.x
  113. __asm fsub dword ptr [r_origin+0] ; p_o.x-r_o.x
  114. __asm fld dword ptr [esi+4] ; p_o.y | p_o.x-r_o.x
  115. __asm fsub dword ptr [r_origin+4] ; p_o.y-r_o.y | p_o.x-r_o.x
  116. __asm fld dword ptr [esi+8] ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x
  117. __asm fsub dword ptr [r_origin+8] ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x
  118. __asm fxch st(2) ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z
  119. __asm fstp dword ptr [local+0] ; p_o.y-r_o.y | p_o.z-r_o.z
  120. __asm fstp dword ptr [local+4] ; p_o.z-r_o.z
  121. __asm fstp dword ptr [local+8] ; (empty)
  122. // transformed[0] = DotProduct(local, r_pright);
  123. // transformed[1] = DotProduct(local, r_pup);
  124. // transformed[2] = DotProduct(local, r_ppn);
  125. __asm fld dword ptr [local+0] ; l.x
  126. __asm fmul dword ptr [r_pright+0] ; l.x*pr.x
  127. __asm fld dword ptr [local+4] ; l.y | l.x*pr.x
  128. __asm fmul dword ptr [r_pright+4] ; l.y*pr.y | l.x*pr.x
  129. __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x
  130. __asm fmul dword ptr [r_pright+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x
  131. __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z
  132. __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z
  133. __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z
  134. __asm fstp dword ptr [transformed+0] ; (empty)
  135. __asm fld dword ptr [local+0] ; l.x
  136. __asm fmul dword ptr [r_pup+0] ; l.x*pr.x
  137. __asm fld dword ptr [local+4] ; l.y | l.x*pr.x
  138. __asm fmul dword ptr [r_pup+4] ; l.y*pr.y | l.x*pr.x
  139. __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x
  140. __asm fmul dword ptr [r_pup+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x
  141. __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z
  142. __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z
  143. __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z
  144. __asm fstp dword ptr [transformed+4] ; (empty)
  145. __asm fld dword ptr [local+0] ; l.x
  146. __asm fmul dword ptr [r_ppn+0] ; l.x*pr.x
  147. __asm fld dword ptr [local+4] ; l.y | l.x*pr.x
  148. __asm fmul dword ptr [r_ppn+4] ; l.y*pr.y | l.x*pr.x
  149. __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x
  150. __asm fmul dword ptr [r_ppn+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x
  151. __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z
  152. __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z
  153. __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z
  154. __asm fstp dword ptr [transformed+8] ; (empty)
  155. /*
  156. ** make sure that the transformed particle is not in front of
  157. ** the particle Z clip plane. We can do the comparison in
  158. ** integer space since we know the sign of one of the inputs
  159. ** and can figure out the sign of the other easily enough.
  160. */
  161. // if (transformed[2] < PARTICLE_Z_CLIP)
  162. // return;
  163. __asm mov eax, dword ptr [transformed+8]
  164. __asm and eax, eax
  165. __asm js end
  166. __asm cmp eax, particle_z_clip
  167. __asm jl end
  168. /*
  169. ** project the point by initiating the 1/z calc
  170. */
  171. // zi = 1.0 / transformed[2];
  172. __asm fld one
  173. __asm fdiv dword ptr [transformed+8]
  174. /*
  175. ** bind the blend function pointer to the appropriate blender
  176. ** while we're dividing
  177. */
  178. //if ( level == PARTICLE_33 )
  179. // blendparticle = BlendParticle33;
  180. //else if ( level == PARTICLE_66 )
  181. // blendparticle = BlendParticle66;
  182. //else
  183. // blendparticle = BlendParticle100;
  184. __asm cmp partparms.level, PARTICLE_66
  185. __asm je blendfunc_66
  186. __asm jl blendfunc_33
  187. __asm lea ebx, BlendParticle100
  188. __asm jmp done_selecting_blend_func
  189. blendfunc_33:
  190. __asm lea ebx, BlendParticle33
  191. __asm jmp done_selecting_blend_func
  192. blendfunc_66:
  193. __asm lea ebx, BlendParticle66
  194. done_selecting_blend_func:
  195. __asm mov blendfunc, ebx
  196. // prefetch the next particle
  197. __asm mov ebp, s_prefetch_address
  198. __asm mov ebp, [ebp]
  199. // finish the above divide
  200. __asm fstp zi
  201. // u = (int)(xcenter + zi * transformed[0] + 0.5);
  202. // v = (int)(ycenter - zi * transformed[1] + 0.5);
  203. __asm fld zi ; zi
  204. __asm fmul dword ptr [transformed+0] ; zi * transformed[0]
  205. __asm fld zi ; zi | zi * transformed[0]
  206. __asm fmul dword ptr [transformed+4] ; zi * transformed[1] | zi * transformed[0]
  207. __asm fxch st(1) ; zi * transformed[0] | zi * transformed[1]
  208. __asm fadd xcenter ; xcenter + zi * transformed[0] | zi * transformed[1]
  209. __asm fxch st(1) ; zi * transformed[1] | xcenter + zi * transformed[0]
  210. __asm fld ycenter ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0]
  211. __asm fsubrp st(1), st(0) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0]
  212. __asm fxch st(1) ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1]
  213. __asm fadd point_five ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1]
  214. __asm fxch st(1) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5
  215. __asm fadd point_five ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5
  216. __asm fxch st(1) ; u | v
  217. __asm fistp dword ptr [u] ; v
  218. __asm fistp dword ptr [v] ; (empty)
  219. /*
  220. ** clip out the particle
  221. */
  222. // if ((v > d_vrectbottom_particle) ||
  223. // (u > d_vrectright_particle) ||
  224. // (v < d_vrecty) ||
  225. // (u < d_vrectx))
  226. // {
  227. // return;
  228. // }
  229. __asm mov ebx, u
  230. __asm mov ecx, v
  231. __asm cmp ecx, d_vrectbottom_particle
  232. __asm jg end
  233. __asm cmp ecx, d_vrecty
  234. __asm jl end
  235. __asm cmp ebx, d_vrectright_particle
  236. __asm jg end
  237. __asm cmp ebx, d_vrectx
  238. __asm jl end
  239. /*
  240. ** compute addresses of zbuffer, framebuffer, and
  241. ** compute the Z-buffer reference value.
  242. **
  243. ** EBX = U
  244. ** ECX = V
  245. **
  246. ** Outputs:
  247. ** ESI = Z-buffer address
  248. ** EDI = framebuffer address
  249. */
  250. // ESI = d_pzbuffer + (d_zwidth * v) + u;
  251. __asm mov esi, d_pzbuffer ; esi = d_pzbuffer
  252. __asm mov eax, d_zwidth ; eax = d_zwidth
  253. __asm mul ecx ; eax = d_zwidth*v
  254. __asm add eax, ebx ; eax = d_zwidth*v+u
  255. __asm shl eax, 1 ; eax = 2*(d_zwidth*v+u)
  256. __asm add esi, eax ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u )
  257. // initiate
  258. // izi = (int)(zi * 0x8000);
  259. __asm fld zi
  260. __asm fmul eight_thousand_hex
  261. // EDI = pdest = d_viewbuffer + d_scantable[v] + u;
  262. __asm lea edi, [d_scantable+ecx*4]
  263. __asm mov edi, [edi]
  264. __asm add edi, d_viewbuffer
  265. __asm add edi, ebx
  266. // complete
  267. // izi = (int)(zi * 0x8000);
  268. __asm fistp tmp
  269. __asm mov eax, tmp
  270. __asm mov izi, ax
  271. /*
  272. ** determine the screen area covered by the particle,
  273. ** which also means clamping to a min and max
  274. */
  275. // pix = izi >> d_pix_shift;
  276. __asm xor edx, edx
  277. __asm mov dx, izi
  278. __asm mov ecx, d_pix_shift
  279. __asm shr dx, cl
  280. // if (pix < d_pix_min)
  281. // pix = d_pix_min;
  282. __asm cmp edx, d_pix_min
  283. __asm jge check_pix_max
  284. __asm mov edx, d_pix_min
  285. __asm jmp skip_pix_clamp
  286. // else if (pix > d_pix_max)
  287. // pix = d_pix_max;
  288. check_pix_max:
  289. __asm cmp edx, d_pix_max
  290. __asm jle skip_pix_clamp
  291. __asm mov edx, d_pix_max
  292. skip_pix_clamp:
  293. /*
  294. ** render the appropriate pixels
  295. **
  296. ** ECX = count (used for inner loop)
  297. ** EDX = count (used for outer loop)
  298. ** ESI = zbuffer
  299. ** EDI = framebuffer
  300. */
  301. __asm mov ecx, edx
  302. __asm cmp ecx, 1
  303. __asm ja over
  304. over:
  305. /*
  306. ** at this point:
  307. **
  308. ** ECX = count
  309. */
  310. __asm push ecx
  311. __asm push edi
  312. __asm push esi
  313. top_of_pix_vert_loop:
  314. top_of_pix_horiz_loop:
  315. // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
  316. // {
  317. // for (i=0 ; i<pix ; i++)
  318. // {
  319. // if (pz[i] <= izi)
  320. // {
  321. // pdest[i] = blendparticle( color, pdest[i] );
  322. // }
  323. // }
  324. // }
  325. __asm xor eax, eax
  326. __asm mov ax, word ptr [esi]
  327. __asm cmp ax, izi
  328. __asm jg end_of_horiz_loop
  329. #if ENABLE_ZWRITES_FOR_PARTICLES
  330. __asm mov bp, izi
  331. __asm mov word ptr [esi], bp
  332. #endif
  333. __asm mov eax, partparms.color
  334. __asm call [blendfunc]
  335. __asm add edi, 1
  336. __asm add esi, 2
  337. end_of_horiz_loop:
  338. __asm dec ecx
  339. __asm jnz top_of_pix_horiz_loop
  340. __asm pop esi
  341. __asm pop edi
  342. __asm mov ebp, d_zwidth
  343. __asm shl ebp, 1
  344. __asm add esi, ebp
  345. __asm add edi, [r_screenwidth]
  346. __asm pop ecx
  347. __asm push ecx
  348. __asm push edi
  349. __asm push esi
  350. __asm dec edx
  351. __asm jnz top_of_pix_vert_loop
  352. __asm pop ecx
  353. __asm pop ecx
  354. __asm pop ecx
  355. end:
  356. __asm pop edi
  357. __asm pop esi
  358. __asm mov ebp, ebpsave
  359. __asm ret
  360. }
  361. #else
  362. static byte BlendParticle33( int pcolor, int dstcolor )
  363. {
  364. return vid.alphamap[pcolor + dstcolor*256];
  365. }
  366. static byte BlendParticle66( int pcolor, int dstcolor )
  367. {
  368. return vid.alphamap[pcolor*256+dstcolor];
  369. }
  370. static byte BlendParticle100( int pcolor, int dstcolor )
  371. {
  372. dstcolor = dstcolor;
  373. return pcolor;
  374. }
  375. /*
  376. ** R_DrawParticle
  377. **
  378. ** Yes, this is amazingly slow, but it's the C reference
  379. ** implementation and should be both robust and vaguely
  380. ** understandable. The only time this path should be
  381. ** executed is if we're debugging on x86 or if we're
  382. ** recompiling and deploying on a non-x86 platform.
  383. **
  384. ** To minimize error and improve readability I went the
  385. ** function pointer route. This exacts some overhead, but
  386. ** it pays off in clean and easy to understand code.
  387. */
  388. void R_DrawParticle( void )
  389. {
  390. particle_t *pparticle = partparms.particle;
  391. int level = partparms.level;
  392. vec3_t local, transformed;
  393. float zi;
  394. byte *pdest;
  395. short *pz;
  396. int color = pparticle->color;
  397. int i, izi, pix, count, u, v;
  398. byte (*blendparticle)( int, int );
  399. /*
  400. ** transform the particle
  401. */
  402. VectorSubtract (pparticle->origin, r_origin, local);
  403. transformed[0] = DotProduct(local, r_pright);
  404. transformed[1] = DotProduct(local, r_pup);
  405. transformed[2] = DotProduct(local, r_ppn);
  406. if (transformed[2] < PARTICLE_Z_CLIP)
  407. return;
  408. /*
  409. ** bind the blend function pointer to the appropriate blender
  410. */
  411. if ( level == PARTICLE_33 )
  412. blendparticle = BlendParticle33;
  413. else if ( level == PARTICLE_66 )
  414. blendparticle = BlendParticle66;
  415. else
  416. blendparticle = BlendParticle100;
  417. /*
  418. ** project the point
  419. */
  420. // FIXME: preadjust xcenter and ycenter
  421. zi = 1.0 / transformed[2];
  422. u = (int)(xcenter + zi * transformed[0] + 0.5);
  423. v = (int)(ycenter - zi * transformed[1] + 0.5);
  424. if ((v > d_vrectbottom_particle) ||
  425. (u > d_vrectright_particle) ||
  426. (v < d_vrecty) ||
  427. (u < d_vrectx))
  428. {
  429. return;
  430. }
  431. /*
  432. ** compute addresses of zbuffer, framebuffer, and
  433. ** compute the Z-buffer reference value.
  434. */
  435. pz = d_pzbuffer + (d_zwidth * v) + u;
  436. pdest = d_viewbuffer + d_scantable[v] + u;
  437. izi = (int)(zi * 0x8000);
  438. /*
  439. ** determine the screen area covered by the particle,
  440. ** which also means clamping to a min and max
  441. */
  442. pix = izi >> d_pix_shift;
  443. if (pix < d_pix_min)
  444. pix = d_pix_min;
  445. else if (pix > d_pix_max)
  446. pix = d_pix_max;
  447. /*
  448. ** render the appropriate pixels
  449. */
  450. count = pix;
  451. switch (level) {
  452. case PARTICLE_33 :
  453. for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth)
  454. {
  455. //FIXME--do it in blocks of 8?
  456. for (i=0 ; i<pix ; i++)
  457. {
  458. if (pz[i] <= izi)
  459. {
  460. pz[i] = izi;
  461. pdest[i] = vid.alphamap[color + ((int)pdest[i]<<8)];
  462. }
  463. }
  464. }
  465. break;
  466. case PARTICLE_66 :
  467. for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth)
  468. {
  469. for (i=0 ; i<pix ; i++)
  470. {
  471. if (pz[i] <= izi)
  472. {
  473. pz[i] = izi;
  474. pdest[i] = vid.alphamap[(color<<8) + (int)pdest[i]];
  475. }
  476. }
  477. }
  478. break;
  479. default: //100
  480. for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth)
  481. {
  482. for (i=0 ; i<pix ; i++)
  483. {
  484. if (pz[i] <= izi)
  485. {
  486. pz[i] = izi;
  487. pdest[i] = color;
  488. }
  489. }
  490. }
  491. break;
  492. }
  493. }
  494. #endif // !id386
  495. /*
  496. ** R_DrawParticles
  497. **
  498. ** Responsible for drawing all of the particles in the particle list
  499. ** throughout the world. Doesn't care if we're using the C path or
  500. ** if we're using the asm path, it simply assigns a function pointer
  501. ** and goes.
  502. */
  503. void R_DrawParticles (void)
  504. {
  505. particle_t *p;
  506. int i;
  507. extern unsigned long fpu_sp24_cw, fpu_chop_cw;
  508. VectorScale( vright, xscaleshrink, r_pright );
  509. VectorScale( vup, yscaleshrink, r_pup );
  510. VectorCopy( vpn, r_ppn );
  511. #if id386 && !defined __linux__
  512. __asm fldcw word ptr [fpu_sp24_cw]
  513. #endif
  514. for (p=r_newrefdef.particles, i=0 ; i<r_newrefdef.num_particles ; i++,p++)
  515. {
  516. if ( p->alpha > 0.66 )
  517. partparms.level = PARTICLE_OPAQUE;
  518. else if ( p->alpha > 0.33 )
  519. partparms.level = PARTICLE_66;
  520. else
  521. partparms.level = PARTICLE_33;
  522. partparms.particle = p;
  523. partparms.color = p->color;
  524. #if id386 && !defined __linux__
  525. if ( i < r_newrefdef.num_particles-1 )
  526. s_prefetch_address = ( unsigned int ) ( p + 1 );
  527. else
  528. s_prefetch_address = ( unsigned int ) r_newrefdef.particles;
  529. #endif
  530. R_DrawParticle();
  531. }
  532. #if id386 && !defined __linux__
  533. __asm fldcw word ptr [fpu_chop_cw]
  534. #endif
  535. }