r_draw16.asm 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235
  1. .386P
  2. .model FLAT
  3. ;
  4. ; d_draw16.s
  5. ; x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
  6. ; subdivision.
  7. ;
  8. include qasm.inc
  9. include d_if.inc
  10. if id386
  11. ;----------------------------------------------------------------------
  12. ; 8-bpp horizontal span drawing code for polygons, with no transparency and
  13. ; 16-pixel subdivision.
  14. ;
  15. ; Assumes there is at least one span in pspans, and that every span
  16. ; contains at least one pixel
  17. ;----------------------------------------------------------------------
  18. _DATA SEGMENT
  19. _DATA ENDS
  20. _TEXT SEGMENT
  21. ; out-of-line, rarely-needed clamping code
  22. LClampHigh0:
  23. mov esi,ds:dword ptr[_bbextents]
  24. jmp LClampReentry0
  25. LClampHighOrLow0:
  26. jg LClampHigh0
  27. xor esi,esi
  28. jmp LClampReentry0
  29. LClampHigh1:
  30. mov edx,ds:dword ptr[_bbextentt]
  31. jmp LClampReentry1
  32. LClampHighOrLow1:
  33. jg LClampHigh1
  34. xor edx,edx
  35. jmp LClampReentry1
  36. LClampLow2:
  37. mov ebp,4096
  38. jmp LClampReentry2
  39. LClampHigh2:
  40. mov ebp,ds:dword ptr[_bbextents]
  41. jmp LClampReentry2
  42. LClampLow3:
  43. mov ecx,4096
  44. jmp LClampReentry3
  45. LClampHigh3:
  46. mov ecx,ds:dword ptr[_bbextentt]
  47. jmp LClampReentry3
  48. LClampLow4:
  49. mov eax,4096
  50. jmp LClampReentry4
  51. LClampHigh4:
  52. mov eax,ds:dword ptr[_bbextents]
  53. jmp LClampReentry4
  54. LClampLow5:
  55. mov ebx,4096
  56. jmp LClampReentry5
  57. LClampHigh5:
  58. mov ebx,ds:dword ptr[_bbextentt]
  59. jmp LClampReentry5
  60. pspans equ 4+16
  61. align 4
  62. public _D_DrawSpans16
  63. _D_DrawSpans16:
  64. push ebp ; preserve caller's stack frame
  65. push edi
  66. push esi ; preserve register variables
  67. push ebx
  68. ;
  69. ; set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
  70. ; and span list pointers
  71. ;
  72. ; TODO: any overlap from rearranging?
  73. fld ds:dword ptr[_d_sdivzstepu]
  74. fmul ds:dword ptr[fp_16]
  75. mov edx,ds:dword ptr[_cacheblock]
  76. fld ds:dword ptr[_d_tdivzstepu]
  77. fmul ds:dword ptr[fp_16]
  78. mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor
  79. fld ds:dword ptr[_d_zistepu]
  80. fmul ds:dword ptr[fp_16]
  81. mov ds:dword ptr[pbase],edx ; pbase = cacheblock
  82. fstp ds:dword ptr[zi16stepu]
  83. fstp ds:dword ptr[tdivz16stepu]
  84. fstp ds:dword ptr[sdivz16stepu]
  85. LSpanLoop:
  86. ;
  87. ; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
  88. ; initial s and t values
  89. ;
  90. ; FIXME: pipeline FILD?
  91. fild ds:dword ptr[espan_t_v+ebx]
  92. fild ds:dword ptr[espan_t_u+ebx]
  93. fld st(1) ; dv | du | dv
  94. fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv
  95. fld st(1) ; du | dv*d_sdivzstepv | du | dv
  96. fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  97. fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  98. fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu |
  99. ; dv*d_sdivzstepv | du | dv
  100. fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu |
  101. ; dv*d_sdivzstepv | du | dv
  102. faddp st(2),st(0) ; du*d_tdivzstepu |
  103. ; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
  104. fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
  105. ; du*d_tdivzstepu | du | dv
  106. fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
  107. ; du*d_tdivzstepu | du | dv
  108. fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv |
  109. ; du*d_sdivzstepu + dv*d_sdivzstepv |
  110. ; du*d_tdivzstepu | du | dv
  111. fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
  112. ; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
  113. fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
  114. ; du*d_sdivzstepu; stays in %st(2) at end
  115. fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
  116. ; s/z
  117. fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv |
  118. ; du*d_tdivzstepu | du | s/z
  119. fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv |
  120. ; du*d_tdivzstepu | du | s/z
  121. faddp st(2),st(0) ; dv*d_zistepv |
  122. ; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
  123. fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu |
  124. ; dv*d_zistepv | s/z
  125. fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu |
  126. ; dv*d_tdivzstepv + du*d_tdivzstepu |
  127. ; dv*d_zistepv | s/z
  128. fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu |
  129. ; du*d_zistepu | dv*d_zistepv | s/z
  130. fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
  131. ; du*d_tdivzstepu; stays in %st(1) at end
  132. fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z
  133. faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z
  134. fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
  135. fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
  136. fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv +
  137. ; du*d_zistepu; stays in %st(0) at end
  138. ; 1/z | fp_64k | t/z | s/z
  139. ;
  140. ; calculate and clamp s & t
  141. ;
  142. fdiv st(1),st(0) ; 1/z | z*64k | t/z | s/z
  143. ;
  144. ; point %edi to the first pixel in the span
  145. ;
  146. mov ecx,ds:dword ptr[_d_viewbuffer]
  147. mov eax,ds:dword ptr[espan_t_v+ebx]
  148. mov ds:dword ptr[pspantemp],ebx ; preserve spans pointer
  149. mov edx,ds:dword ptr[_tadjust]
  150. mov esi,ds:dword ptr[_sadjust]
  151. mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth
  152. add edi,ecx
  153. mov ecx,ds:dword ptr[espan_t_u+ebx]
  154. add edi,ecx ; pdest = &pdestspan[scans->u];
  155. mov ecx,ds:dword ptr[espan_t_count+ebx]
  156. ;
  157. ; now start the FDIV for the end of the span
  158. ;
  159. cmp ecx,16
  160. ja LSetupNotLast1
  161. dec ecx
  162. jz LCleanup1 ; if only one pixel, no need to start an FDIV
  163. mov ds:dword ptr[spancountminus1],ecx
  164. ; finish up the s and t calcs
  165. fxch st(1) ; z*64k | 1/z | t/z | s/z
  166. fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
  167. fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
  168. fxch st(1) ; z*64k | s | 1/z | t/z | s/z
  169. fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
  170. fxch st(1) ; s | t | 1/z | t/z | s/z
  171. fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
  172. fistp ds:dword ptr[t] ; 1/z | t/z | s/z
  173. fild ds:dword ptr[spancountminus1]
  174. fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | spancountminus1
  175. fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
  176. fmul st(0),st(2) ; C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
  177. fxch st(1) ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
  178. fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
  179. fxch st(2) ; scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
  180. fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
  181. ; C(d_tdivzstepu)*scm1
  182. fxch st(1) ; C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
  183. ; C(d_tdivzstepu)*scm1
  184. faddp st(3),st(0) ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
  185. fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
  186. faddp st(3),st(0) ; C(d_sdivzstepu)*scm1
  187. faddp st(3),st(0)
  188. fld ds:dword ptr[fp_64k]
  189. fdiv st(0),st(1) ; this is what we've gone to all this trouble to
  190. ; overlap
  191. jmp LFDIVInFlight1
  192. LCleanup1:
  193. ; finish up the s and t calcs
  194. fxch st(1) ; z*64k | 1/z | t/z | s/z
  195. fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
  196. fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
  197. fxch st(1) ; z*64k | s | 1/z | t/z | s/z
  198. fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
  199. fxch st(1) ; s | t | 1/z | t/z | s/z
  200. fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
  201. fistp ds:dword ptr[t] ; 1/z | t/z | s/z
  202. jmp LFDIVInFlight1
  203. align 4
  204. LSetupNotLast1:
  205. ; finish up the s and t calcs
  206. fxch st(1) ; z*64k | 1/z | t/z | s/z
  207. fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
  208. fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
  209. fxch st(1) ; z*64k | s | 1/z | t/z | s/z
  210. fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
  211. fxch st(1) ; s | t | 1/z | t/z | s/z
  212. fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
  213. fistp ds:dword ptr[t] ; 1/z | t/z | s/z
  214. fadd ds:dword ptr[zi16stepu]
  215. fxch st(2)
  216. fadd ds:dword ptr[sdivz16stepu]
  217. fxch st(2)
  218. fld ds:dword ptr[tdivz16stepu]
  219. faddp st(2),st(0)
  220. fld ds:dword ptr[fp_64k]
  221. fdiv st(0),st(1) ; z = 1/1/z
  222. ; this is what we've gone to all this trouble to
  223. ; overlap
  224. LFDIVInFlight1:
  225. add esi,ds:dword ptr[s]
  226. add edx,ds:dword ptr[t]
  227. mov ebx,ds:dword ptr[_bbextents]
  228. mov ebp,ds:dword ptr[_bbextentt]
  229. cmp esi,ebx
  230. ja LClampHighOrLow0
  231. LClampReentry0:
  232. mov ds:dword ptr[s],esi
  233. mov ebx,ds:dword ptr[pbase]
  234. shl esi,16
  235. cmp edx,ebp
  236. mov ds:dword ptr[sfracf],esi
  237. ja LClampHighOrLow1
  238. LClampReentry1:
  239. mov ds:dword ptr[t],edx
  240. mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac;
  241. shl edx,16
  242. mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac;
  243. sar esi,16
  244. mov ds:dword ptr[tfracf],edx
  245. ;
  246. ; calculate the texture starting address
  247. ;
  248. sar eax,16
  249. mov edx,ds:dword ptr[_cachewidth]
  250. imul eax,edx ; (tfrac >> 16) * cachewidth
  251. add esi,ebx
  252. add esi,eax ; psource = pbase + (sfrac >> 16) +
  253. ; ((tfrac >> 16) * cachewidth);
  254. ;
  255. ; determine whether last span or not
  256. ;
  257. cmp ecx,16
  258. jna LLastSegment
  259. ;
  260. ; not the last segment; do full 16-wide segment
  261. ;
  262. LNotLastSegment:
  263. ;
  264. ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  265. ; get there
  266. ;
  267. ; pick up after the FDIV that was left in flight previously
  268. fld st(0) ; duplicate it
  269. fmul st(0),st(4) ; s = s/z * z
  270. fxch st(1)
  271. fmul st(0),st(3) ; t = t/z * z
  272. fxch st(1)
  273. fistp ds:dword ptr[snext]
  274. fistp ds:dword ptr[tnext]
  275. mov eax,ds:dword ptr[snext]
  276. mov edx,ds:dword ptr[tnext]
  277. mov bl,ds:byte ptr[esi] ; get first source texel
  278. sub ecx,16 ; count off this segments' pixels
  279. mov ebp,ds:dword ptr[_sadjust]
  280. mov ds:dword ptr[counttemp],ecx ; remember count of remaining pixels
  281. mov ecx,ds:dword ptr[_tadjust]
  282. mov ds:byte ptr[edi],bl ; store first dest pixel
  283. add ebp,eax
  284. add ecx,edx
  285. mov eax,ds:dword ptr[_bbextents]
  286. mov edx,ds:dword ptr[_bbextentt]
  287. cmp ebp,4096
  288. jl LClampLow2
  289. cmp ebp,eax
  290. ja LClampHigh2
  291. LClampReentry2:
  292. cmp ecx,4096
  293. jl LClampLow3
  294. cmp ecx,edx
  295. ja LClampHigh3
  296. LClampReentry3:
  297. mov ds:dword ptr[snext],ebp
  298. mov ds:dword ptr[tnext],ecx
  299. sub ebp,ds:dword ptr[s]
  300. sub ecx,ds:dword ptr[t]
  301. ;
  302. ; set up advancetable
  303. ;
  304. mov eax,ecx
  305. mov edx,ebp
  306. sar eax,20 ; tstep >>= 16;
  307. jz LZero
  308. sar edx,20 ; sstep >>= 16;
  309. mov ebx,ds:dword ptr[_cachewidth]
  310. imul eax,ebx
  311. jmp LSetUp1
  312. LZero:
  313. sar edx,20 ; sstep >>= 16;
  314. mov ebx,ds:dword ptr[_cachewidth]
  315. LSetUp1:
  316. add eax,edx ; add in sstep
  317. ; (tstep >> 16) * cachewidth + (sstep >> 16);
  318. mov edx,ds:dword ptr[tfracf]
  319. mov ds:dword ptr[advancetable+4],eax ; advance base in t
  320. add eax,ebx ; ((tstep >> 16) + 1) * cachewidth +
  321. ; (sstep >> 16);
  322. shl ebp,12 ; left-justify sstep fractional part
  323. mov ebx,ds:dword ptr[sfracf]
  324. shl ecx,12 ; left-justify tstep fractional part
  325. mov ds:dword ptr[advancetable],eax ; advance extra in t
  326. mov ds:dword ptr[tstep],ecx
  327. add edx,ecx ; advance tfrac fractional part by tstep frac
  328. sbb ecx,ecx ; turn tstep carry into -1 (0 if none)
  329. add ebx,ebp ; advance sfrac fractional part by sstep frac
  330. adc esi,ds:dword ptr[advancetable+4+ecx*4] ; point to next source texel
  331. add edx,ds:dword ptr[tstep]
  332. sbb ecx,ecx
  333. mov al,ds:byte ptr[esi]
  334. add ebx,ebp
  335. mov ds:byte ptr[1+edi],al
  336. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  337. add edx,ds:dword ptr[tstep]
  338. sbb ecx,ecx
  339. add ebx,ebp
  340. mov al,ds:byte ptr[esi]
  341. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  342. add edx,ds:dword ptr[tstep]
  343. sbb ecx,ecx
  344. mov ds:byte ptr[2+edi],al
  345. add ebx,ebp
  346. mov al,ds:byte ptr[esi]
  347. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  348. add edx,ds:dword ptr[tstep]
  349. sbb ecx,ecx
  350. mov ds:byte ptr[3+edi],al
  351. add ebx,ebp
  352. mov al,ds:byte ptr[esi]
  353. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  354. add edx,ds:dword ptr[tstep]
  355. sbb ecx,ecx
  356. mov ds:byte ptr[4+edi],al
  357. add ebx,ebp
  358. mov al,ds:byte ptr[esi]
  359. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  360. add edx,ds:dword ptr[tstep]
  361. sbb ecx,ecx
  362. mov ds:byte ptr[5+edi],al
  363. add ebx,ebp
  364. mov al,ds:byte ptr[esi]
  365. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  366. add edx,ds:dword ptr[tstep]
  367. sbb ecx,ecx
  368. mov ds:byte ptr[6+edi],al
  369. add ebx,ebp
  370. mov al,ds:byte ptr[esi]
  371. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  372. add edx,ds:dword ptr[tstep]
  373. sbb ecx,ecx
  374. mov ds:byte ptr[7+edi],al
  375. add ebx,ebp
  376. mov al,ds:byte ptr[esi]
  377. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  378. ;
  379. ; start FDIV for end of next segment in flight, so it can overlap
  380. ;
  381. mov ecx,ds:dword ptr[counttemp]
  382. cmp ecx,16 ; more than one segment after this?
  383. ja LSetupNotLast2 ; yes
  384. dec ecx
  385. jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV
  386. mov ds:dword ptr[spancountminus1],ecx
  387. fild ds:dword ptr[spancountminus1]
  388. fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | spancountminus1
  389. fmul st(0),st(1) ; C(d_zistepu)*scm1 | scm1
  390. fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
  391. fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
  392. fxch st(1) ; C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
  393. faddp st(3),st(0) ; C(d_tdivzstepu)*scm1 | scm1
  394. fxch st(1) ; scm1 | C(d_tdivzstepu)*scm1
  395. fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
  396. fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
  397. faddp st(3),st(0) ; C(d_sdivzstepu)*scm1
  398. fld ds:dword ptr[fp_64k] ; 64k | C(d_sdivzstepu)*scm1
  399. fxch st(1) ; C(d_sdivzstepu)*scm1 | 64k
  400. faddp st(4),st(0) ; 64k
  401. fdiv st(0),st(1) ; this is what we've gone to all this trouble to
  402. ; overlap
  403. jmp LFDIVInFlight2
  404. align 4
  405. LSetupNotLast2:
  406. fadd ds:dword ptr[zi16stepu]
  407. fxch st(2)
  408. fadd ds:dword ptr[sdivz16stepu]
  409. fxch st(2)
  410. fld ds:dword ptr[tdivz16stepu]
  411. faddp st(2),st(0)
  412. fld ds:dword ptr[fp_64k]
  413. fdiv st(0),st(1) ; z = 1/1/z
  414. ; this is what we've gone to all this trouble to
  415. ; overlap
  416. LFDIVInFlight2:
  417. mov ds:dword ptr[counttemp],ecx
  418. add edx,ds:dword ptr[tstep]
  419. sbb ecx,ecx
  420. mov ds:byte ptr[8+edi],al
  421. add ebx,ebp
  422. mov al,ds:byte ptr[esi]
  423. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  424. add edx,ds:dword ptr[tstep]
  425. sbb ecx,ecx
  426. mov ds:byte ptr[9+edi],al
  427. add ebx,ebp
  428. mov al,ds:byte ptr[esi]
  429. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  430. add edx,ds:dword ptr[tstep]
  431. sbb ecx,ecx
  432. mov ds:byte ptr[10+edi],al
  433. add ebx,ebp
  434. mov al,ds:byte ptr[esi]
  435. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  436. add edx,ds:dword ptr[tstep]
  437. sbb ecx,ecx
  438. mov ds:byte ptr[11+edi],al
  439. add ebx,ebp
  440. mov al,ds:byte ptr[esi]
  441. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  442. add edx,ds:dword ptr[tstep]
  443. sbb ecx,ecx
  444. mov ds:byte ptr[12+edi],al
  445. add ebx,ebp
  446. mov al,ds:byte ptr[esi]
  447. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  448. add edx,ds:dword ptr[tstep]
  449. sbb ecx,ecx
  450. mov ds:byte ptr[13+edi],al
  451. add ebx,ebp
  452. mov al,ds:byte ptr[esi]
  453. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  454. add edx,ds:dword ptr[tstep]
  455. sbb ecx,ecx
  456. mov ds:byte ptr[14+edi],al
  457. add ebx,ebp
  458. mov al,ds:byte ptr[esi]
  459. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  460. add edi,16
  461. mov ds:dword ptr[tfracf],edx
  462. mov edx,ds:dword ptr[snext]
  463. mov ds:dword ptr[sfracf],ebx
  464. mov ebx,ds:dword ptr[tnext]
  465. mov ds:dword ptr[s],edx
  466. mov ds:dword ptr[t],ebx
  467. mov ecx,ds:dword ptr[counttemp] ; retrieve count
  468. ;
  469. ; determine whether last span or not
  470. ;
  471. cmp ecx,16 ; are there multiple segments remaining?
  472. mov ds:byte ptr[-1+edi],al
  473. ja LNotLastSegment ; yes
  474. ;
  475. ; last segment of scan
  476. ;
  477. LLastSegment:
  478. ;
  479. ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  480. ; get there. The number of pixels left is variable, and we want to land on the
  481. ; last pixel, not step one past it, so we can't run into arithmetic problems
  482. ;
  483. test ecx,ecx
  484. jz LNoSteps ; just draw the last pixel and we're done
  485. ; pick up after the FDIV that was left in flight previously
  486. fld st(0) ; duplicate it
  487. fmul st(0),st(4) ; s = s/z * z
  488. fxch st(1)
  489. fmul st(0),st(3) ; t = t/z * z
  490. fxch st(1)
  491. fistp ds:dword ptr[snext]
  492. fistp ds:dword ptr[tnext]
  493. mov al,ds:byte ptr[esi] ; load first texel in segment
  494. mov ebx,ds:dword ptr[_tadjust]
  495. mov ds:byte ptr[edi],al ; store first pixel in segment
  496. mov eax,ds:dword ptr[_sadjust]
  497. add eax,ds:dword ptr[snext]
  498. add ebx,ds:dword ptr[tnext]
  499. mov ebp,ds:dword ptr[_bbextents]
  500. mov edx,ds:dword ptr[_bbextentt]
  501. cmp eax,4096
  502. jl LClampLow4
  503. cmp eax,ebp
  504. ja LClampHigh4
  505. LClampReentry4:
  506. mov ds:dword ptr[snext],eax
  507. cmp ebx,4096
  508. jl LClampLow5
  509. cmp ebx,edx
  510. ja LClampHigh5
  511. LClampReentry5:
  512. cmp ecx,1 ; don't bother
  513. je LOnlyOneStep ; if two pixels in segment, there's only one step,
  514. ; of the segment length
  515. sub eax,ds:dword ptr[s]
  516. sub ebx,ds:dword ptr[t]
  517. add eax,eax ; convert to 15.17 format so multiply by 1.31
  518. add ebx,ebx ; reciprocal yields 16.48
  519. imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; sstep = (snext - s) /
  520. ; (spancount-1)
  521. mov ebp,edx
  522. mov eax,ebx
  523. imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; tstep = (tnext - t) /
  524. ; (spancount-1)
  525. LSetEntryvec:
  526. ;
  527. ; set up advancetable
  528. ;
  529. mov ebx,ds:dword ptr[entryvec_table_16+ecx*4]
  530. mov eax,edx
  531. mov ds:dword ptr[jumptemp],ebx ; entry point into code for RET later
  532. mov ecx,ebp
  533. sar edx,16 ; tstep >>= 16;
  534. mov ebx,ds:dword ptr[_cachewidth]
  535. sar ecx,16 ; sstep >>= 16;
  536. imul edx,ebx
  537. add edx,ecx ; add in sstep
  538. ; (tstep >> 16) * cachewidth + (sstep >> 16);
  539. mov ecx,ds:dword ptr[tfracf]
  540. mov ds:dword ptr[advancetable+4],edx ; advance base in t
  541. add edx,ebx ; ((tstep >> 16) + 1) * cachewidth +
  542. ; (sstep >> 16);
  543. shl ebp,16 ; left-justify sstep fractional part
  544. mov ebx,ds:dword ptr[sfracf]
  545. shl eax,16 ; left-justify tstep fractional part
  546. mov ds:dword ptr[advancetable],edx ; advance extra in t
  547. mov ds:dword ptr[tstep],eax
  548. mov edx,ecx
  549. add edx,eax
  550. sbb ecx,ecx
  551. add ebx,ebp
  552. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  553. jmp dword ptr[jumptemp] ; jump to the number-of-pixels handler
  554. ;----------------------------------------
  555. LNoSteps:
  556. mov al,ds:byte ptr[esi] ; load first texel in segment
  557. sub edi,15 ; adjust for hardwired offset
  558. jmp LEndSpan
  559. LOnlyOneStep:
  560. sub eax,ds:dword ptr[s]
  561. sub ebx,ds:dword ptr[t]
  562. mov ebp,eax
  563. mov edx,ebx
  564. jmp LSetEntryvec
  565. ;----------------------------------------
  566. public Entry2_16, Entry3_16, Entry4_16, Entry5_16
  567. public Entry6_16, Entry7_16, Entry8_16, Entry9_16
  568. public Entry10_16, Entry11_16, Entry12_16, Entry13_16
  569. public Entry14_16, Entry15_16, Entry16_16
  570. Entry2_16:
  571. sub edi,14 ; adjust for hardwired offsets
  572. mov al,ds:byte ptr[esi]
  573. jmp LEntry2_16
  574. ;----------------------------------------
  575. Entry3_16:
  576. sub edi,13 ; adjust for hardwired offsets
  577. add edx,eax
  578. mov al,ds:byte ptr[esi]
  579. sbb ecx,ecx
  580. add ebx,ebp
  581. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  582. jmp LEntry3_16
  583. ;----------------------------------------
  584. Entry4_16:
  585. sub edi,12 ; adjust for hardwired offsets
  586. add edx,eax
  587. mov al,ds:byte ptr[esi]
  588. sbb ecx,ecx
  589. add ebx,ebp
  590. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  591. add edx,ds:dword ptr[tstep]
  592. jmp LEntry4_16
  593. ;----------------------------------------
  594. Entry5_16:
  595. sub edi,11 ; adjust for hardwired offsets
  596. add edx,eax
  597. mov al,ds:byte ptr[esi]
  598. sbb ecx,ecx
  599. add ebx,ebp
  600. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  601. add edx,ds:dword ptr[tstep]
  602. jmp LEntry5_16
  603. ;----------------------------------------
  604. Entry6_16:
  605. sub edi,10 ; adjust for hardwired offsets
  606. add edx,eax
  607. mov al,ds:byte ptr[esi]
  608. sbb ecx,ecx
  609. add ebx,ebp
  610. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  611. add edx,ds:dword ptr[tstep]
  612. jmp LEntry6_16
  613. ;----------------------------------------
  614. Entry7_16:
  615. sub edi,9 ; adjust for hardwired offsets
  616. add edx,eax
  617. mov al,ds:byte ptr[esi]
  618. sbb ecx,ecx
  619. add ebx,ebp
  620. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  621. add edx,ds:dword ptr[tstep]
  622. jmp LEntry7_16
  623. ;----------------------------------------
  624. Entry8_16:
  625. sub edi,8 ; adjust for hardwired offsets
  626. add edx,eax
  627. mov al,ds:byte ptr[esi]
  628. sbb ecx,ecx
  629. add ebx,ebp
  630. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  631. add edx,ds:dword ptr[tstep]
  632. jmp LEntry8_16
  633. ;----------------------------------------
  634. Entry9_16:
  635. sub edi,7 ; adjust for hardwired offsets
  636. add edx,eax
  637. mov al,ds:byte ptr[esi]
  638. sbb ecx,ecx
  639. add ebx,ebp
  640. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  641. add edx,ds:dword ptr[tstep]
  642. jmp LEntry9_16
  643. ;----------------------------------------
  644. Entry10_16:
  645. sub edi,6 ; adjust for hardwired offsets
  646. add edx,eax
  647. mov al,ds:byte ptr[esi]
  648. sbb ecx,ecx
  649. add ebx,ebp
  650. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  651. add edx,ds:dword ptr[tstep]
  652. jmp LEntry10_16
  653. ;----------------------------------------
  654. Entry11_16:
  655. sub edi,5 ; adjust for hardwired offsets
  656. add edx,eax
  657. mov al,ds:byte ptr[esi]
  658. sbb ecx,ecx
  659. add ebx,ebp
  660. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  661. add edx,ds:dword ptr[tstep]
  662. jmp LEntry11_16
  663. ;----------------------------------------
  664. Entry12_16:
  665. sub edi,4 ; adjust for hardwired offsets
  666. add edx,eax
  667. mov al,ds:byte ptr[esi]
  668. sbb ecx,ecx
  669. add ebx,ebp
  670. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  671. add edx,ds:dword ptr[tstep]
  672. jmp LEntry12_16
  673. ;----------------------------------------
  674. Entry13_16:
  675. sub edi,3 ; adjust for hardwired offsets
  676. add edx,eax
  677. mov al,ds:byte ptr[esi]
  678. sbb ecx,ecx
  679. add ebx,ebp
  680. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  681. add edx,ds:dword ptr[tstep]
  682. jmp LEntry13_16
  683. ;----------------------------------------
  684. Entry14_16:
  685. sub edi,2 ; adjust for hardwired offsets
  686. add edx,eax
  687. mov al,ds:byte ptr[esi]
  688. sbb ecx,ecx
  689. add ebx,ebp
  690. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  691. add edx,ds:dword ptr[tstep]
  692. jmp LEntry14_16
  693. ;----------------------------------------
  694. Entry15_16:
  695. dec edi ; adjust for hardwired offsets
  696. add edx,eax
  697. mov al,ds:byte ptr[esi]
  698. sbb ecx,ecx
  699. add ebx,ebp
  700. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  701. add edx,ds:dword ptr[tstep]
  702. jmp LEntry15_16
  703. ;----------------------------------------
  704. Entry16_16:
  705. add edx,eax
  706. mov al,ds:byte ptr[esi]
  707. sbb ecx,ecx
  708. add ebx,ebp
  709. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  710. add edx,ds:dword ptr[tstep]
  711. sbb ecx,ecx
  712. mov ds:byte ptr[1+edi],al
  713. add ebx,ebp
  714. mov al,ds:byte ptr[esi]
  715. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  716. add edx,ds:dword ptr[tstep]
  717. LEntry15_16:
  718. sbb ecx,ecx
  719. mov ds:byte ptr[2+edi],al
  720. add ebx,ebp
  721. mov al,ds:byte ptr[esi]
  722. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  723. add edx,ds:dword ptr[tstep]
  724. LEntry14_16:
  725. sbb ecx,ecx
  726. mov ds:byte ptr[3+edi],al
  727. add ebx,ebp
  728. mov al,ds:byte ptr[esi]
  729. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  730. add edx,ds:dword ptr[tstep]
  731. LEntry13_16:
  732. sbb ecx,ecx
  733. mov ds:byte ptr[4+edi],al
  734. add ebx,ebp
  735. mov al,ds:byte ptr[esi]
  736. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  737. add edx,ds:dword ptr[tstep]
  738. LEntry12_16:
  739. sbb ecx,ecx
  740. mov ds:byte ptr[5+edi],al
  741. add ebx,ebp
  742. mov al,ds:byte ptr[esi]
  743. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  744. add edx,ds:dword ptr[tstep]
  745. LEntry11_16:
  746. sbb ecx,ecx
  747. mov ds:byte ptr[6+edi],al
  748. add ebx,ebp
  749. mov al,ds:byte ptr[esi]
  750. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  751. add edx,ds:dword ptr[tstep]
  752. LEntry10_16:
  753. sbb ecx,ecx
  754. mov ds:byte ptr[7+edi],al
  755. add ebx,ebp
  756. mov al,ds:byte ptr[esi]
  757. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  758. add edx,ds:dword ptr[tstep]
  759. LEntry9_16:
  760. sbb ecx,ecx
  761. mov ds:byte ptr[8+edi],al
  762. add ebx,ebp
  763. mov al,ds:byte ptr[esi]
  764. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  765. add edx,ds:dword ptr[tstep]
  766. LEntry8_16:
  767. sbb ecx,ecx
  768. mov ds:byte ptr[9+edi],al
  769. add ebx,ebp
  770. mov al,ds:byte ptr[esi]
  771. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  772. add edx,ds:dword ptr[tstep]
  773. LEntry7_16:
  774. sbb ecx,ecx
  775. mov ds:byte ptr[10+edi],al
  776. add ebx,ebp
  777. mov al,ds:byte ptr[esi]
  778. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  779. add edx,ds:dword ptr[tstep]
  780. LEntry6_16:
  781. sbb ecx,ecx
  782. mov ds:byte ptr[11+edi],al
  783. add ebx,ebp
  784. mov al,ds:byte ptr[esi]
  785. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  786. add edx,ds:dword ptr[tstep]
  787. LEntry5_16:
  788. sbb ecx,ecx
  789. mov ds:byte ptr[12+edi],al
  790. add ebx,ebp
  791. mov al,ds:byte ptr[esi]
  792. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  793. add edx,ds:dword ptr[tstep]
  794. LEntry4_16:
  795. sbb ecx,ecx
  796. mov ds:byte ptr[13+edi],al
  797. add ebx,ebp
  798. mov al,ds:byte ptr[esi]
  799. adc esi,ds:dword ptr[advancetable+4+ecx*4]
  800. LEntry3_16:
  801. mov ds:byte ptr[14+edi],al
  802. mov al,ds:byte ptr[esi]
  803. LEntry2_16:
  804. LEndSpan:
  805. ;
  806. ; clear s/z, t/z, 1/z from FP stack
  807. ;
  808. fstp st(0)
  809. fstp st(0)
  810. fstp st(0)
  811. mov ebx,ds:dword ptr[pspantemp] ; restore spans pointer
  812. mov ebx,ds:dword ptr[espan_t_pnext+ebx] ; point to next span
  813. test ebx,ebx ; any more spans?
  814. mov ds:byte ptr[15+edi],al
  815. jnz LSpanLoop ; more spans
  816. pop ebx ; restore register variables
  817. pop esi
  818. pop edi
  819. pop ebp ; restore the caller's stack frame
  820. ret
  821. ;----------------------------------------------------------------------
  822. ; 8-bpp horizontal span z drawing codefor polygons, with no transparency.
  823. ;
  824. ; Assumes there is at least one span in pzspans, and that every span
  825. ; contains at least one pixel
  826. ;----------------------------------------------------------------------
  827. ; z-clamp on a non-negative gradient span
  828. LClamp:
  829. mov edx,040000000h
  830. xor ebx,ebx
  831. fstp st(0)
  832. jmp LZDraw
  833. ; z-clamp on a negative gradient span
  834. LClampNeg:
  835. mov edx,040000000h
  836. xor ebx,ebx
  837. fstp st(0)
  838. jmp LZDrawNeg
  839. pzspans equ 4+16
  840. public _D_DrawZSpans
  841. _D_DrawZSpans:
  842. push ebp ; preserve caller's stack frame
  843. push edi
  844. push esi ; preserve register variables
  845. push ebx
  846. fld ds:dword ptr[_d_zistepu]
  847. mov eax,ds:dword ptr[_d_zistepu]
  848. mov esi,ds:dword ptr[pzspans+esp]
  849. test eax,eax
  850. jz LFNegSpan
  851. fmul ds:dword ptr[Float2ToThe31nd]
  852. fistp ds:dword ptr[izistep] ; note: we are relying on FP exceptions being turned
  853. ; off here to avoid range problems
  854. mov ebx,ds:dword ptr[izistep] ; remains loaded for all spans
  855. LFSpanLoop:
  856. ; set up the initial 1/z value
  857. fild ds:dword ptr[espan_t_v+esi]
  858. fild ds:dword ptr[espan_t_u+esi]
  859. mov ecx,ds:dword ptr[espan_t_v+esi]
  860. mov edi,ds:dword ptr[_d_pzbuffer]
  861. fmul ds:dword ptr[_d_zistepu]
  862. fxch st(1)
  863. fmul ds:dword ptr[_d_zistepv]
  864. fxch st(1)
  865. fadd ds:dword ptr[_d_ziorigin]
  866. imul ecx,ds:dword ptr[_d_zrowbytes]
  867. faddp st(1),st(0)
  868. ; clamp if z is nearer than 2 (1/z > 0.5)
  869. fcom ds:dword ptr[float_point5]
  870. add edi,ecx
  871. mov edx,ds:dword ptr[espan_t_u+esi]
  872. add edx,edx ; word count
  873. mov ecx,ds:dword ptr[espan_t_count+esi]
  874. add edi,edx ; pdest = &pdestspan[scans->u];
  875. push esi ; preserve spans pointer
  876. fnstsw ax
  877. test ah,045h
  878. jz LClamp
  879. fmul ds:dword ptr[Float2ToThe31nd]
  880. fistp ds:dword ptr[izi] ; note: we are relying on FP exceptions being turned
  881. ; off here to avoid problems when the span is closer
  882. ; than 1/(2**31)
  883. mov edx,ds:dword ptr[izi]
  884. ; at this point:
  885. ; %ebx = izistep
  886. ; %ecx = count
  887. ; %edx = izi
  888. ; %edi = pdest
  889. LZDraw:
  890. ; do a single pixel up front, if necessary to dword align the destination
  891. test edi,2
  892. jz LFMiddle
  893. mov eax,edx
  894. add edx,ebx
  895. shr eax,16
  896. dec ecx
  897. mov ds:word ptr[edi],ax
  898. add edi,2
  899. ; do middle a pair of aligned dwords at a time
  900. LFMiddle:
  901. push ecx
  902. shr ecx,1 ; count / 2
  903. jz LFLast ; no aligned dwords to do
  904. shr ecx,1 ; (count / 2) / 2
  905. jnc LFMiddleLoop ; even number of aligned dwords to do
  906. mov eax,edx
  907. add edx,ebx
  908. shr eax,16
  909. mov esi,edx
  910. add edx,ebx
  911. and esi,0FFFF0000h
  912. or eax,esi
  913. mov ds:dword ptr[edi],eax
  914. add edi,4
  915. and ecx,ecx
  916. jz LFLast
  917. LFMiddleLoop:
  918. mov eax,edx
  919. add edx,ebx
  920. shr eax,16
  921. mov esi,edx
  922. add edx,ebx
  923. and esi,0FFFF0000h
  924. or eax,esi
  925. mov ebp,edx
  926. mov ds:dword ptr[edi],eax
  927. add edx,ebx
  928. shr ebp,16
  929. mov esi,edx
  930. add edx,ebx
  931. and esi,0FFFF0000h
  932. or ebp,esi
  933. mov ds:dword ptr[4+edi],ebp ; FIXME: eliminate register contention
  934. add edi,8
  935. dec ecx
  936. jnz LFMiddleLoop
  937. LFLast:
  938. pop ecx ; retrieve count
  939. pop esi ; retrieve span pointer
  940. ; do the last, unaligned pixel, if there is one
  941. and ecx,1 ; is there an odd pixel left to do?
  942. jz LFSpanDone ; no
  943. shr edx,16
  944. mov ds:word ptr[edi],dx ; do the final pixel's z
  945. LFSpanDone:
  946. mov esi,ds:dword ptr[espan_t_pnext+esi]
  947. test esi,esi
  948. jnz LFSpanLoop
  949. jmp LFDone
  950. LFNegSpan:
  951. fmul ds:dword ptr[FloatMinus2ToThe31nd]
  952. fistp ds:dword ptr[izistep] ; note: we are relying on FP exceptions being turned
  953. ; off here to avoid range problems
  954. mov ebx,ds:dword ptr[izistep] ; remains loaded for all spans
  955. LFNegSpanLoop:
  956. ; set up the initial 1/z value
  957. fild ds:dword ptr[espan_t_v+esi]
  958. fild ds:dword ptr[espan_t_u+esi]
  959. mov ecx,ds:dword ptr[espan_t_v+esi]
  960. mov edi,ds:dword ptr[_d_pzbuffer]
  961. fmul ds:dword ptr[_d_zistepu]
  962. fxch st(1)
  963. fmul ds:dword ptr[_d_zistepv]
  964. fxch st(1)
  965. fadd ds:dword ptr[_d_ziorigin]
  966. imul ecx,ds:dword ptr[_d_zrowbytes]
  967. faddp st(1),st(0)
  968. ; clamp if z is nearer than 2 (1/z > 0.5)
  969. fcom ds:dword ptr[float_point5]
  970. add edi,ecx
  971. mov edx,ds:dword ptr[espan_t_u+esi]
  972. add edx,edx ; word count
  973. mov ecx,ds:dword ptr[espan_t_count+esi]
  974. add edi,edx ; pdest = &pdestspan[scans->u];
  975. push esi ; preserve spans pointer
  976. fnstsw ax
  977. test ah,045h
  978. jz LClampNeg
  979. fmul ds:dword ptr[Float2ToThe31nd]
  980. fistp ds:dword ptr[izi] ; note: we are relying on FP exceptions being turned
  981. ; off here to avoid problems when the span is closer
  982. ; than 1/(2**31)
  983. mov edx,ds:dword ptr[izi]
  984. ; at this point:
  985. ; %ebx = izistep
  986. ; %ecx = count
  987. ; %edx = izi
  988. ; %edi = pdest
  989. LZDrawNeg:
  990. ; do a single pixel up front, if necessary to dword align the destination
  991. test edi,2
  992. jz LFNegMiddle
  993. mov eax,edx
  994. sub edx,ebx
  995. shr eax,16
  996. dec ecx
  997. mov ds:word ptr[edi],ax
  998. add edi,2
  999. ; do middle a pair of aligned dwords at a time
  1000. LFNegMiddle:
  1001. push ecx
  1002. shr ecx,1 ; count / 2
  1003. jz LFNegLast ; no aligned dwords to do
  1004. shr ecx,1 ; (count / 2) / 2
  1005. jnc LFNegMiddleLoop ; even number of aligned dwords to do
  1006. mov eax,edx
  1007. sub edx,ebx
  1008. shr eax,16
  1009. mov esi,edx
  1010. sub edx,ebx
  1011. and esi,0FFFF0000h
  1012. or eax,esi
  1013. mov ds:dword ptr[edi],eax
  1014. add edi,4
  1015. and ecx,ecx
  1016. jz LFNegLast
  1017. LFNegMiddleLoop:
  1018. mov eax,edx
  1019. sub edx,ebx
  1020. shr eax,16
  1021. mov esi,edx
  1022. sub edx,ebx
  1023. and esi,0FFFF0000h
  1024. or eax,esi
  1025. mov ebp,edx
  1026. mov ds:dword ptr[edi],eax
  1027. sub edx,ebx
  1028. shr ebp,16
  1029. mov esi,edx
  1030. sub edx,ebx
  1031. and esi,0FFFF0000h
  1032. or ebp,esi
  1033. mov ds:dword ptr[4+edi],ebp ; FIXME: eliminate register contention
  1034. add edi,8
  1035. dec ecx
  1036. jnz LFNegMiddleLoop
  1037. LFNegLast:
  1038. pop ecx ; retrieve count
  1039. pop esi ; retrieve span pointer
  1040. ; do the last, unaligned pixel, if there is one
  1041. and ecx,1 ; is there an odd pixel left to do?
  1042. jz LFNegSpanDone ; no
  1043. shr edx,16
  1044. mov ds:word ptr[edi],dx ; do the final pixel's z
  1045. LFNegSpanDone:
  1046. mov esi,ds:dword ptr[espan_t_pnext+esi]
  1047. test esi,esi
  1048. jnz LFNegSpanLoop
  1049. LFDone:
  1050. pop ebx ; restore register variables
  1051. pop esi
  1052. pop edi
  1053. pop ebp ; restore the caller's stack frame
  1054. ret
  1055. _TEXT ENDS
  1056. endif ;id386
  1057. END