r_spr8.asm 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885
  1. .386P
  2. .model FLAT
  3. ;
  4. ; d_spr8.s
  5. ; x86 assembly-language horizontal 8-bpp transparent span-drawing code.
  6. ;
  7. include qasm.inc
  8. include d_if.inc
  9. if id386
  10. ;----------------------------------------------------------------------
  11. ; 8-bpp horizontal span drawing code for polygons, with transparency.
  12. ;----------------------------------------------------------------------
  13. _TEXT SEGMENT
  14. ; out-of-line, rarely-needed clamping code
  15. LClampHigh0:
  16. mov esi,ds:dword ptr[_bbextents]
  17. jmp LClampReentry0
  18. LClampHighOrLow0:
  19. jg LClampHigh0
  20. xor esi,esi
  21. jmp LClampReentry0
  22. LClampHigh1:
  23. mov edx,ds:dword ptr[_bbextentt]
  24. jmp LClampReentry1
  25. LClampHighOrLow1:
  26. jg LClampHigh1
  27. xor edx,edx
  28. jmp LClampReentry1
  29. LClampLow2:
  30. mov ebp,2048
  31. jmp LClampReentry2
  32. LClampHigh2:
  33. mov ebp,ds:dword ptr[_bbextents]
  34. jmp LClampReentry2
  35. LClampLow3:
  36. mov ecx,2048
  37. jmp LClampReentry3
  38. LClampHigh3:
  39. mov ecx,ds:dword ptr[_bbextentt]
  40. jmp LClampReentry3
  41. LClampLow4:
  42. mov eax,2048
  43. jmp LClampReentry4
  44. LClampHigh4:
  45. mov eax,ds:dword ptr[_bbextents]
  46. jmp LClampReentry4
  47. LClampLow5:
  48. mov ebx,2048
  49. jmp LClampReentry5
  50. LClampHigh5:
  51. mov ebx,ds:dword ptr[_bbextentt]
  52. jmp LClampReentry5
  53. pspans equ 4+16
  54. align 4
  55. public _D_SpriteDrawSpansXXX
  56. _D_SpriteDrawSpansXXX:
  57. push ebp ; preserve caller's stack frame
  58. push edi
  59. push esi ; preserve register variables
  60. push ebx
  61. ;
  62. ; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
  63. ; and span list pointers, and 1/z step in 0.32 fixed-point
  64. ;
  65. ; FIXME: any overlap from rearranging?
  66. fld ds:dword ptr[_d_sdivzstepu]
  67. fmul ds:dword ptr[fp_8]
  68. mov edx,ds:dword ptr[_cacheblock]
  69. fld ds:dword ptr[_d_tdivzstepu]
  70. fmul ds:dword ptr[fp_8]
  71. mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor
  72. fld ds:dword ptr[_d_zistepu]
  73. fmul ds:dword ptr[fp_8]
  74. mov ds:dword ptr[pbase],edx ; pbase = cacheblock
  75. fld ds:dword ptr[_d_zistepu]
  76. fmul ds:dword ptr[fp_64kx64k]
  77. fxch st(3)
  78. fstp ds:dword ptr[sdivz8stepu]
  79. fstp ds:dword ptr[zi8stepu]
  80. fstp ds:dword ptr[tdivz8stepu]
  81. fistp ds:dword ptr[izistep]
  82. mov eax,ds:dword ptr[izistep]
  83. ror eax,16 ; put upper 16 bits in low word
  84. mov ecx,ds:dword ptr[sspan_t_count+ebx]
  85. mov ds:dword ptr[izistep],eax
  86. cmp ecx,0
  87. jle LNextSpan
  88. LSpanLoop:
  89. ;
  90. ; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
  91. ; initial s and t values
  92. ;
  93. ; FIXME: pipeline FILD?
  94. fild ds:dword ptr[sspan_t_v+ebx]
  95. fild ds:dword ptr[sspan_t_u+ebx]
  96. fld st(1) ; dv | du | dv
  97. fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv
  98. fld st(1) ; du | dv*d_sdivzstepv | du | dv
  99. fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  100. fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  101. fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu |
  102. ; dv*d_sdivzstepv | du | dv
  103. fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu |
  104. ; dv*d_sdivzstepv | du | dv
  105. faddp st(2),st(0) ; du*d_tdivzstepu |
  106. ; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
  107. fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
  108. ; du*d_tdivzstepu | du | dv
  109. fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
  110. ; du*d_tdivzstepu | du | dv
  111. fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv |
  112. ; du*d_sdivzstepu + dv*d_sdivzstepv |
  113. ; du*d_tdivzstepu | du | dv
  114. fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
  115. ; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
  116. fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
  117. ; du*d_sdivzstepu; stays in %st(2) at end
  118. fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
  119. ; s/z
  120. fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv |
  121. ; du*d_tdivzstepu | du | s/z
  122. fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv |
  123. ; du*d_tdivzstepu | du | s/z
  124. faddp st(2),st(0) ; dv*d_zistepv |
  125. ; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
  126. fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu |
  127. ; dv*d_zistepv | s/z
  128. fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu |
  129. ; dv*d_tdivzstepv + du*d_tdivzstepu |
  130. ; dv*d_zistepv | s/z
  131. fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu |
  132. ; du*d_zistepu | dv*d_zistepv | s/z
  133. fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
  134. ; du*d_tdivzstepu; stays in %st(1) at end
  135. fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z
  136. faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z
  137. fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
  138. fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
  139. fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv +
  140. ; du*d_zistepu; stays in %st(0) at end
  141. ; 1/z | fp_64k | t/z | s/z
  142. fld st(0) ; FIXME: get rid of stall on FMUL?
  143. fmul ds:dword ptr[fp_64kx64k]
  144. fxch st(1)
  145. ;
  146. ; calculate and clamp s & t
  147. ;
  148. fdiv st(2),st(0) ; 1/z | z*64k | t/z | s/z
  149. fxch st(1)
  150. fistp ds:dword ptr[izi] ; 0.32 fixed-point 1/z
  151. mov ebp,ds:dword ptr[izi]
  152. ;
  153. ; set pz to point to the first z-buffer pixel in the span
  154. ;
  155. ror ebp,16 ; put upper 16 bits in low word
  156. mov eax,ds:dword ptr[sspan_t_v+ebx]
  157. mov ds:dword ptr[izi],ebp
  158. mov ebp,ds:dword ptr[sspan_t_u+ebx]
  159. imul ds:dword ptr[_d_zrowbytes]
  160. shl ebp,1 ; a word per pixel
  161. add eax,ds:dword ptr[_d_pzbuffer]
  162. add eax,ebp
  163. mov ds:dword ptr[pz],eax
  164. ;
  165. ; point %edi to the first pixel in the span
  166. ;
  167. mov ebp,ds:dword ptr[_d_viewbuffer]
  168. mov eax,ds:dword ptr[sspan_t_v+ebx]
  169. push ebx ; preserve spans pointer
  170. mov edx,ds:dword ptr[_tadjust]
  171. mov esi,ds:dword ptr[_sadjust]
  172. mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth
  173. add edi,ebp
  174. mov ebp,ds:dword ptr[sspan_t_u+ebx]
  175. add edi,ebp ; pdest = &pdestspan[scans->u];
  176. ;
  177. ; now start the FDIV for the end of the span
  178. ;
  179. cmp ecx,8
  180. ja LSetupNotLast1
  181. dec ecx
  182. jz LCleanup1 ; if only one pixel, no need to start an FDIV
  183. mov ds:dword ptr[spancountminus1],ecx
  184. ; finish up the s and t calcs
  185. fxch st(1) ; z*64k | 1/z | t/z | s/z
  186. fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
  187. fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
  188. fxch st(1) ; z*64k | s | 1/z | t/z | s/z
  189. fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
  190. fxch st(1) ; s | t | 1/z | t/z | s/z
  191. fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
  192. fistp ds:dword ptr[t] ; 1/z | t/z | s/z
  193. fild ds:dword ptr[spancountminus1]
  194. fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | spancountminus1
  195. fld ds:dword ptr[_d_zistepu] ; _d_zistepu | _d_tdivzstepu | spancountminus1
  196. fmul st(0),st(2) ; _d_zistepu*scm1 | _d_tdivzstepu | scm1
  197. fxch st(1) ; _d_tdivzstepu | _d_zistepu*scm1 | scm1
  198. fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
  199. fxch st(2) ; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
  200. fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
  201. ; _d_tdivzstepu*scm1
  202. fxch st(1) ; _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
  203. ; _d_tdivzstepu*scm1
  204. faddp st(3),st(0) ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
  205. fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
  206. faddp st(3),st(0) ; _d_sdivzstepu*scm1
  207. faddp st(3),st(0)
  208. fld ds:dword ptr[fp_64k]
  209. fdiv st(0),st(1) ; this is what we've gone to all this trouble to
  210. ; overlap
  211. jmp LFDIVInFlight1
  212. LCleanup1:
  213. ; finish up the s and t calcs
  214. fxch st(1) ; z*64k | 1/z | t/z | s/z
  215. fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
  216. fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
  217. fxch st(1) ; z*64k | s | 1/z | t/z | s/z
  218. fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
  219. fxch st(1) ; s | t | 1/z | t/z | s/z
  220. fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
  221. fistp ds:dword ptr[t] ; 1/z | t/z | s/z
  222. jmp LFDIVInFlight1
  223. align 4
  224. LSetupNotLast1:
  225. ; finish up the s and t calcs
  226. fxch st(1) ; z*64k | 1/z | t/z | s/z
  227. fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
  228. fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
  229. fxch st(1) ; z*64k | s | 1/z | t/z | s/z
  230. fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
  231. fxch st(1) ; s | t | 1/z | t/z | s/z
  232. fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
  233. fistp ds:dword ptr[t] ; 1/z | t/z | s/z
  234. fadd ds:dword ptr[zi8stepu]
  235. fxch st(2)
  236. fadd ds:dword ptr[sdivz8stepu]
  237. fxch st(2)
  238. fld ds:dword ptr[tdivz8stepu]
  239. faddp st(2),st(0)
  240. fld ds:dword ptr[fp_64k]
  241. fdiv st(0),st(1) ; z = 1/1/z
  242. ; this is what we've gone to all this trouble to
  243. ; overlap
  244. LFDIVInFlight1:
  245. add esi,ds:dword ptr[s]
  246. add edx,ds:dword ptr[t]
  247. mov ebx,ds:dword ptr[_bbextents]
  248. mov ebp,ds:dword ptr[_bbextentt]
  249. cmp esi,ebx
  250. ja LClampHighOrLow0
  251. LClampReentry0:
  252. mov ds:dword ptr[s],esi
  253. mov ebx,ds:dword ptr[pbase]
  254. shl esi,16
  255. cmp edx,ebp
  256. mov ds:dword ptr[sfracf],esi
  257. ja LClampHighOrLow1
  258. LClampReentry1:
  259. mov ds:dword ptr[t],edx
  260. mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac;
  261. shl edx,16
  262. mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac;
  263. sar esi,16
  264. mov ds:dword ptr[tfracf],edx
  265. ;
  266. ; calculate the texture starting address
  267. ;
  268. sar eax,16
  269. add esi,ebx
  270. imul eax,ds:dword ptr[_cachewidth] ; (tfrac >> 16) * cachewidth
  271. add esi,eax ; psource = pbase + (sfrac >> 16) +
  272. ; ((tfrac >> 16) * cachewidth);
  273. ;
  274. ; determine whether last span or not
  275. ;
  276. cmp ecx,8
  277. jna LLastSegment
  278. ;
  279. ; not the last segment; do full 8-wide segment
  280. ;
  281. LNotLastSegment:
  282. ;
  283. ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  284. ; get there
  285. ;
  286. ; pick up after the FDIV that was left in flight previously
  287. fld st(0) ; duplicate it
  288. fmul st(0),st(4) ; s = s/z * z
  289. fxch st(1)
  290. fmul st(0),st(3) ; t = t/z * z
  291. fxch st(1)
  292. fistp ds:dword ptr[snext]
  293. fistp ds:dword ptr[tnext]
  294. mov eax,ds:dword ptr[snext]
  295. mov edx,ds:dword ptr[tnext]
  296. sub ecx,8 ; count off this segments' pixels
  297. mov ebp,ds:dword ptr[_sadjust]
  298. push ecx ; remember count of remaining pixels
  299. mov ecx,ds:dword ptr[_tadjust]
  300. add ebp,eax
  301. add ecx,edx
  302. mov eax,ds:dword ptr[_bbextents]
  303. mov edx,ds:dword ptr[_bbextentt]
  304. cmp ebp,2048
  305. jl LClampLow2
  306. cmp ebp,eax
  307. ja LClampHigh2
  308. LClampReentry2:
  309. cmp ecx,2048
  310. jl LClampLow3
  311. cmp ecx,edx
  312. ja LClampHigh3
  313. LClampReentry3:
  314. mov ds:dword ptr[snext],ebp
  315. mov ds:dword ptr[tnext],ecx
  316. sub ebp,ds:dword ptr[s]
  317. sub ecx,ds:dword ptr[t]
  318. ;
  319. ; set up advancetable
  320. ;
  321. mov eax,ecx
  322. mov edx,ebp
  323. sar edx,19 ; sstep >>= 16;
  324. mov ebx,ds:dword ptr[_cachewidth]
  325. sar eax,19 ; tstep >>= 16;
  326. jz LIsZero
  327. imul eax,ebx ; (tstep >> 16) * cachewidth;
  328. LIsZero:
  329. add eax,edx ; add in sstep
  330. ; (tstep >> 16) * cachewidth + (sstep >> 16);
  331. mov edx,ds:dword ptr[tfracf]
  332. mov ds:dword ptr[advancetable+4],eax ; advance base in t
  333. add eax,ebx ; ((tstep >> 16) + 1) * cachewidth +
  334. ; (sstep >> 16);
  335. shl ebp,13 ; left-justify sstep fractional part
  336. mov ds:dword ptr[sstep],ebp
  337. mov ebx,ds:dword ptr[sfracf]
  338. shl ecx,13 ; left-justify tstep fractional part
  339. mov ds:dword ptr[advancetable],eax ; advance extra in t
  340. mov ds:dword ptr[tstep],ecx
  341. mov ecx,ds:dword ptr[pz]
  342. mov ebp,ds:dword ptr[izi]
  343. cmp bp,ds:word ptr[ecx]
  344. jl Lp1
  345. mov al,ds:byte ptr[esi] ; get first source texel
  346. cmp al,offset TRANSPARENT_COLOR
  347. jz Lp1
  348. mov ds:word ptr[ecx],bp
  349. mov ds:byte ptr[edi],al ; store first dest pixel
  350. Lp1:
  351. add ebp,ds:dword ptr[izistep]
  352. adc ebp,0
  353. add edx,ds:dword ptr[tstep] ; advance tfrac fractional part by tstep frac
  354. sbb eax,eax ; turn tstep carry into -1 (0 if none)
  355. add ebx,ds:dword ptr[sstep] ; advance sfrac fractional part by sstep frac
  356. adc esi,ds:dword ptr[advancetable+4+eax*4] ; point to next source texel
  357. cmp bp,ds:word ptr[2+ecx]
  358. jl Lp2
  359. mov al,ds:byte ptr[esi]
  360. cmp al,offset TRANSPARENT_COLOR
  361. jz Lp2
  362. mov ds:word ptr[2+ecx],bp
  363. mov ds:byte ptr[1+edi],al
  364. Lp2:
  365. add ebp,ds:dword ptr[izistep]
  366. adc ebp,0
  367. add edx,ds:dword ptr[tstep]
  368. sbb eax,eax
  369. add ebx,ds:dword ptr[sstep]
  370. adc esi,ds:dword ptr[advancetable+4+eax*4]
  371. cmp bp,ds:word ptr[4+ecx]
  372. jl Lp3
  373. mov al,ds:byte ptr[esi]
  374. cmp al,offset TRANSPARENT_COLOR
  375. jz Lp3
  376. mov ds:word ptr[4+ecx],bp
  377. mov ds:byte ptr[2+edi],al
  378. Lp3:
  379. add ebp,ds:dword ptr[izistep]
  380. adc ebp,0
  381. add edx,ds:dword ptr[tstep]
  382. sbb eax,eax
  383. add ebx,ds:dword ptr[sstep]
  384. adc esi,ds:dword ptr[advancetable+4+eax*4]
  385. cmp bp,ds:word ptr[6+ecx]
  386. jl Lp4
  387. mov al,ds:byte ptr[esi]
  388. cmp al,offset TRANSPARENT_COLOR
  389. jz Lp4
  390. mov ds:word ptr[6+ecx],bp
  391. mov ds:byte ptr[3+edi],al
  392. Lp4:
  393. add ebp,ds:dword ptr[izistep]
  394. adc ebp,0
  395. add edx,ds:dword ptr[tstep]
  396. sbb eax,eax
  397. add ebx,ds:dword ptr[sstep]
  398. adc esi,ds:dword ptr[advancetable+4+eax*4]
  399. cmp bp,ds:word ptr[8+ecx]
  400. jl Lp5
  401. mov al,ds:byte ptr[esi]
  402. cmp al,offset TRANSPARENT_COLOR
  403. jz Lp5
  404. mov ds:word ptr[8+ecx],bp
  405. mov ds:byte ptr[4+edi],al
  406. Lp5:
  407. add ebp,ds:dword ptr[izistep]
  408. adc ebp,0
  409. add edx,ds:dword ptr[tstep]
  410. sbb eax,eax
  411. add ebx,ds:dword ptr[sstep]
  412. adc esi,ds:dword ptr[advancetable+4+eax*4]
  413. ;
  414. ; start FDIV for end of next segment in flight, so it can overlap
  415. ;
  416. pop eax
  417. cmp eax,8 ; more than one segment after this?
  418. ja LSetupNotLast2 ; yes
  419. dec eax
  420. jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV
  421. mov ds:dword ptr[spancountminus1],eax
  422. fild ds:dword ptr[spancountminus1]
  423. fld ds:dword ptr[_d_zistepu] ; _d_zistepu | spancountminus1
  424. fmul st(0),st(1) ; _d_zistepu*scm1 | scm1
  425. fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | _d_zistepu*scm1 | scm1
  426. fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
  427. fxch st(1) ; _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
  428. faddp st(3),st(0) ; _d_tdivzstepu*scm1 | scm1
  429. fxch st(1) ; scm1 | _d_tdivzstepu*scm1
  430. fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
  431. fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
  432. faddp st(3),st(0) ; _d_sdivzstepu*scm1
  433. fld ds:dword ptr[fp_64k] ; 64k | _d_sdivzstepu*scm1
  434. fxch st(1) ; _d_sdivzstepu*scm1 | 64k
  435. faddp st(4),st(0) ; 64k
  436. fdiv st(0),st(1) ; this is what we've gone to all this trouble to
  437. ; overlap
  438. jmp LFDIVInFlight2
  439. align 4
  440. LSetupNotLast2:
  441. fadd ds:dword ptr[zi8stepu]
  442. fxch st(2)
  443. fadd ds:dword ptr[sdivz8stepu]
  444. fxch st(2)
  445. fld ds:dword ptr[tdivz8stepu]
  446. faddp st(2),st(0)
  447. fld ds:dword ptr[fp_64k]
  448. fdiv st(0),st(1) ; z = 1/1/z
  449. ; this is what we've gone to all this trouble to
  450. ; overlap
  451. LFDIVInFlight2:
  452. push eax
  453. cmp bp,ds:word ptr[10+ecx]
  454. jl Lp6
  455. mov al,ds:byte ptr[esi]
  456. cmp al,offset TRANSPARENT_COLOR
  457. jz Lp6
  458. mov ds:word ptr[10+ecx],bp
  459. mov ds:byte ptr[5+edi],al
  460. Lp6:
  461. add ebp,ds:dword ptr[izistep]
  462. adc ebp,0
  463. add edx,ds:dword ptr[tstep]
  464. sbb eax,eax
  465. add ebx,ds:dword ptr[sstep]
  466. adc esi,ds:dword ptr[advancetable+4+eax*4]
  467. cmp bp,ds:word ptr[12+ecx]
  468. jl Lp7
  469. mov al,ds:byte ptr[esi]
  470. cmp al,offset TRANSPARENT_COLOR
  471. jz Lp7
  472. mov ds:word ptr[12+ecx],bp
  473. mov ds:byte ptr[6+edi],al
  474. Lp7:
  475. add ebp,ds:dword ptr[izistep]
  476. adc ebp,0
  477. add edx,ds:dword ptr[tstep]
  478. sbb eax,eax
  479. add ebx,ds:dword ptr[sstep]
  480. adc esi,ds:dword ptr[advancetable+4+eax*4]
  481. cmp bp,ds:word ptr[14+ecx]
  482. jl Lp8
  483. mov al,ds:byte ptr[esi]
  484. cmp al,offset TRANSPARENT_COLOR
  485. jz Lp8
  486. mov ds:word ptr[14+ecx],bp
  487. mov ds:byte ptr[7+edi],al
  488. Lp8:
  489. add ebp,ds:dword ptr[izistep]
  490. adc ebp,0
  491. add edx,ds:dword ptr[tstep]
  492. sbb eax,eax
  493. add ebx,ds:dword ptr[sstep]
  494. adc esi,ds:dword ptr[advancetable+4+eax*4]
  495. add edi,8
  496. add ecx,16
  497. mov ds:dword ptr[tfracf],edx
  498. mov edx,ds:dword ptr[snext]
  499. mov ds:dword ptr[sfracf],ebx
  500. mov ebx,ds:dword ptr[tnext]
  501. mov ds:dword ptr[s],edx
  502. mov ds:dword ptr[t],ebx
  503. mov ds:dword ptr[pz],ecx
  504. mov ds:dword ptr[izi],ebp
  505. pop ecx ; retrieve count
  506. ;
  507. ; determine whether last span or not
  508. ;
  509. cmp ecx,8 ; are there multiple segments remaining?
  510. ja LNotLastSegment ; yes
  511. ;
  512. ; last segment of scan
  513. ;
  514. LLastSegment:
  515. ;
  516. ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  517. ; get there. The number of pixels left is variable, and we want to land on the
  518. ; last pixel, not step one past it, so we can't run into arithmetic problems
  519. ;
  520. test ecx,ecx
  521. jz LNoSteps ; just draw the last pixel and we're done
  522. ; pick up after the FDIV that was left in flight previously
  523. fld st(0) ; duplicate it
  524. fmul st(0),st(4) ; s = s/z * z
  525. fxch st(1)
  526. fmul st(0),st(3) ; t = t/z * z
  527. fxch st(1)
  528. fistp ds:dword ptr[snext]
  529. fistp ds:dword ptr[tnext]
  530. mov ebx,ds:dword ptr[_tadjust]
  531. mov eax,ds:dword ptr[_sadjust]
  532. add eax,ds:dword ptr[snext]
  533. add ebx,ds:dword ptr[tnext]
  534. mov ebp,ds:dword ptr[_bbextents]
  535. mov edx,ds:dword ptr[_bbextentt]
  536. cmp eax,2048
  537. jl LClampLow4
  538. cmp eax,ebp
  539. ja LClampHigh4
  540. LClampReentry4:
  541. mov ds:dword ptr[snext],eax
  542. cmp ebx,2048
  543. jl LClampLow5
  544. cmp ebx,edx
  545. ja LClampHigh5
  546. LClampReentry5:
  547. cmp ecx,1 ; don't bother
  548. je LOnlyOneStep ; if two pixels in segment, there's only one step,
  549. ; of the segment length
  550. sub eax,ds:dword ptr[s]
  551. sub ebx,ds:dword ptr[t]
  552. add eax,eax ; convert to 15.17 format so multiply by 1.31
  553. add ebx,ebx ; reciprocal yields 16.48
  554. imul ds:dword ptr[reciprocal_table-8+ecx*4] ; sstep = (snext - s) / (spancount-1)
  555. mov ebp,edx
  556. mov eax,ebx
  557. imul ds:dword ptr[reciprocal_table-8+ecx*4] ; tstep = (tnext - t) / (spancount-1)
  558. LSetEntryvec:
  559. ;
  560. ; set up advancetable
  561. ;
  562. mov ebx,ds:dword ptr[spr8entryvec_table+ecx*4]
  563. mov eax,edx
  564. push ebx ; entry point into code for RET later
  565. mov ecx,ebp
  566. sar ecx,16 ; sstep >>= 16;
  567. mov ebx,ds:dword ptr[_cachewidth]
  568. sar edx,16 ; tstep >>= 16;
  569. jz LIsZeroLast
  570. imul edx,ebx ; (tstep >> 16) * cachewidth;
  571. LIsZeroLast:
  572. add edx,ecx ; add in sstep
  573. ; (tstep >> 16) * cachewidth + (sstep >> 16);
  574. mov ecx,ds:dword ptr[tfracf]
  575. mov ds:dword ptr[advancetable+4],edx ; advance base in t
  576. add edx,ebx ; ((tstep >> 16) + 1) * cachewidth +
  577. ; (sstep >> 16);
  578. shl ebp,16 ; left-justify sstep fractional part
  579. mov ebx,ds:dword ptr[sfracf]
  580. shl eax,16 ; left-justify tstep fractional part
  581. mov ds:dword ptr[advancetable],edx ; advance extra in t
  582. mov ds:dword ptr[tstep],eax
  583. mov ds:dword ptr[sstep],ebp
  584. mov edx,ecx
  585. mov ecx,ds:dword ptr[pz]
  586. mov ebp,ds:dword ptr[izi]
  587. ret ; jump to the number-of-pixels handler
  588. ;----------------------------------------
  589. LNoSteps:
  590. mov ecx,ds:dword ptr[pz]
  591. sub edi,7 ; adjust for hardwired offset
  592. sub ecx,14
  593. jmp LEndSpan
  594. LOnlyOneStep:
  595. sub eax,ds:dword ptr[s]
  596. sub ebx,ds:dword ptr[t]
  597. mov ebp,eax
  598. mov edx,ebx
  599. jmp LSetEntryvec
  600. ;----------------------------------------
  601. public Spr8Entry2_8
  602. Spr8Entry2_8:
  603. sub edi,6 ; adjust for hardwired offsets
  604. sub ecx,12
  605. mov al,ds:byte ptr[esi]
  606. jmp LLEntry2_8
  607. ;----------------------------------------
  608. public Spr8Entry3_8
  609. Spr8Entry3_8:
  610. sub edi,5 ; adjust for hardwired offsets
  611. sub ecx,10
  612. jmp LLEntry3_8
  613. ;----------------------------------------
  614. public Spr8Entry4_8
  615. Spr8Entry4_8:
  616. sub edi,4 ; adjust for hardwired offsets
  617. sub ecx,8
  618. jmp LLEntry4_8
  619. ;----------------------------------------
  620. public Spr8Entry5_8
  621. Spr8Entry5_8:
  622. sub edi,3 ; adjust for hardwired offsets
  623. sub ecx,6
  624. jmp LLEntry5_8
  625. ;----------------------------------------
  626. public Spr8Entry6_8
  627. Spr8Entry6_8:
  628. sub edi,2 ; adjust for hardwired offsets
  629. sub ecx,4
  630. jmp LLEntry6_8
  631. ;----------------------------------------
  632. public Spr8Entry7_8
  633. Spr8Entry7_8:
  634. dec edi ; adjust for hardwired offsets
  635. sub ecx,2
  636. jmp LLEntry7_8
  637. ;----------------------------------------
  638. public Spr8Entry8_8
  639. Spr8Entry8_8:
  640. cmp bp,ds:word ptr[ecx]
  641. jl Lp9
  642. mov al,ds:byte ptr[esi]
  643. cmp al,offset TRANSPARENT_COLOR
  644. jz Lp9
  645. mov ds:word ptr[ecx],bp
  646. mov ds:byte ptr[edi],al
  647. Lp9:
  648. add ebp,ds:dword ptr[izistep]
  649. adc ebp,0
  650. add edx,ds:dword ptr[tstep]
  651. sbb eax,eax
  652. add ebx,ds:dword ptr[sstep]
  653. adc esi,ds:dword ptr[advancetable+4+eax*4]
  654. LLEntry7_8:
  655. cmp bp,ds:word ptr[2+ecx]
  656. jl Lp10
  657. mov al,ds:byte ptr[esi]
  658. cmp al,offset TRANSPARENT_COLOR
  659. jz Lp10
  660. mov ds:word ptr[2+ecx],bp
  661. mov ds:byte ptr[1+edi],al
  662. Lp10:
  663. add ebp,ds:dword ptr[izistep]
  664. adc ebp,0
  665. add edx,ds:dword ptr[tstep]
  666. sbb eax,eax
  667. add ebx,ds:dword ptr[sstep]
  668. adc esi,ds:dword ptr[advancetable+4+eax*4]
  669. LLEntry6_8:
  670. cmp bp,ds:word ptr[4+ecx]
  671. jl Lp11
  672. mov al,ds:byte ptr[esi]
  673. cmp al,offset TRANSPARENT_COLOR
  674. jz Lp11
  675. mov ds:word ptr[4+ecx],bp
  676. mov ds:byte ptr[2+edi],al
  677. Lp11:
  678. add ebp,ds:dword ptr[izistep]
  679. adc ebp,0
  680. add edx,ds:dword ptr[tstep]
  681. sbb eax,eax
  682. add ebx,ds:dword ptr[sstep]
  683. adc esi,ds:dword ptr[advancetable+4+eax*4]
  684. LLEntry5_8:
  685. cmp bp,ds:word ptr[6+ecx]
  686. jl Lp12
  687. mov al,ds:byte ptr[esi]
  688. cmp al,offset TRANSPARENT_COLOR
  689. jz Lp12
  690. mov ds:word ptr[6+ecx],bp
  691. mov ds:byte ptr[3+edi],al
  692. Lp12:
  693. add ebp,ds:dword ptr[izistep]
  694. adc ebp,0
  695. add edx,ds:dword ptr[tstep]
  696. sbb eax,eax
  697. add ebx,ds:dword ptr[sstep]
  698. adc esi,ds:dword ptr[advancetable+4+eax*4]
  699. LLEntry4_8:
  700. cmp bp,ds:word ptr[8+ecx]
  701. jl Lp13
  702. mov al,ds:byte ptr[esi]
  703. cmp al,offset TRANSPARENT_COLOR
  704. jz Lp13
  705. mov ds:word ptr[8+ecx],bp
  706. mov ds:byte ptr[4+edi],al
  707. Lp13:
  708. add ebp,ds:dword ptr[izistep]
  709. adc ebp,0
  710. add edx,ds:dword ptr[tstep]
  711. sbb eax,eax
  712. add ebx,ds:dword ptr[sstep]
  713. adc esi,ds:dword ptr[advancetable+4+eax*4]
  714. LLEntry3_8:
  715. cmp bp,ds:word ptr[10+ecx]
  716. jl Lp14
  717. mov al,ds:byte ptr[esi]
  718. cmp al,offset TRANSPARENT_COLOR
  719. jz Lp14
  720. mov ds:word ptr[10+ecx],bp
  721. mov ds:byte ptr[5+edi],al
  722. Lp14:
  723. add ebp,ds:dword ptr[izistep]
  724. adc ebp,0
  725. add edx,ds:dword ptr[tstep]
  726. sbb eax,eax
  727. add ebx,ds:dword ptr[sstep]
  728. adc esi,ds:dword ptr[advancetable+4+eax*4]
  729. LLEntry2_8:
  730. cmp bp,ds:word ptr[12+ecx]
  731. jl Lp15
  732. mov al,ds:byte ptr[esi]
  733. cmp al,offset TRANSPARENT_COLOR
  734. jz Lp15
  735. mov ds:word ptr[12+ecx],bp
  736. mov ds:byte ptr[6+edi],al
  737. Lp15:
  738. add ebp,ds:dword ptr[izistep]
  739. adc ebp,0
  740. add edx,ds:dword ptr[tstep]
  741. sbb eax,eax
  742. add ebx,ds:dword ptr[sstep]
  743. adc esi,ds:dword ptr[advancetable+4+eax*4]
  744. LEndSpan:
  745. cmp bp,ds:word ptr[14+ecx]
  746. jl Lp16
  747. mov al,ds:byte ptr[esi] ; load first texel in segment
  748. cmp al,offset TRANSPARENT_COLOR
  749. jz Lp16
  750. mov ds:word ptr[14+ecx],bp
  751. mov ds:byte ptr[7+edi],al
  752. Lp16:
  753. ;
  754. ; clear s/z, t/z, 1/z from FP stack
  755. ;
  756. fstp st(0)
  757. fstp st(0)
  758. fstp st(0)
  759. pop ebx ; restore spans pointer
  760. LNextSpan:
  761. add ebx,offset sspan_t_size ; point to next span
  762. mov ecx,ds:dword ptr[sspan_t_count+ebx]
  763. cmp ecx,0 ; any more spans?
  764. jg LSpanLoop ; yes
  765. jz LNextSpan ; yes, but this one's empty
  766. pop ebx ; restore register variables
  767. pop esi
  768. pop edi
  769. pop ebp ; restore the caller's stack frame
  770. ret
  771. _TEXT ENDS
  772. endif ; id386
  773. END