r_spr8.s 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. //
  2. // d_spr8.s
  3. // x86 assembly-language horizontal 8-bpp transparent span-drawing code.
  4. //
  5. #include "qasm.h"
  6. #if id386
  7. //----------------------------------------------------------------------
  8. // 8-bpp horizontal span drawing code for polygons, with transparency.
  9. //----------------------------------------------------------------------
  10. .text
  11. // out-of-line, rarely-needed clamping code
  12. LClampHigh0:
  13. movl C(bbextents),%esi
  14. jmp LClampReentry0
  15. LClampHighOrLow0:
  16. jg LClampHigh0
  17. xorl %esi,%esi
  18. jmp LClampReentry0
  19. LClampHigh1:
  20. movl C(bbextentt),%edx
  21. jmp LClampReentry1
  22. LClampHighOrLow1:
  23. jg LClampHigh1
  24. xorl %edx,%edx
  25. jmp LClampReentry1
  26. LClampLow2:
  27. movl $2048,%ebp
  28. jmp LClampReentry2
  29. LClampHigh2:
  30. movl C(bbextents),%ebp
  31. jmp LClampReentry2
  32. LClampLow3:
  33. movl $2048,%ecx
  34. jmp LClampReentry3
  35. LClampHigh3:
  36. movl C(bbextentt),%ecx
  37. jmp LClampReentry3
  38. LClampLow4:
  39. movl $2048,%eax
  40. jmp LClampReentry4
  41. LClampHigh4:
  42. movl C(bbextents),%eax
  43. jmp LClampReentry4
  44. LClampLow5:
  45. movl $2048,%ebx
  46. jmp LClampReentry5
  47. LClampHigh5:
  48. movl C(bbextentt),%ebx
  49. jmp LClampReentry5
  50. #define pspans 4+16
  51. .align 4
  52. .globl C(D_SpriteDrawSpans)
  53. C(D_SpriteDrawSpans):
  54. pushl %ebp // preserve caller's stack frame
  55. pushl %edi
  56. pushl %esi // preserve register variables
  57. pushl %ebx
  58. //
  59. // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
  60. // and span list pointers, and 1/z step in 0.32 fixed-point
  61. //
  62. // FIXME: any overlap from rearranging?
  63. flds C(d_sdivzstepu)
  64. fmuls fp_8
  65. movl C(cacheblock),%edx
  66. flds C(d_tdivzstepu)
  67. fmuls fp_8
  68. movl pspans(%esp),%ebx // point to the first span descriptor
  69. flds C(d_zistepu)
  70. fmuls fp_8
  71. movl %edx,pbase // pbase = cacheblock
  72. flds C(d_zistepu)
  73. fmuls fp_64kx64k
  74. fxch %st(3)
  75. fstps sdivz8stepu
  76. fstps zi8stepu
  77. fstps tdivz8stepu
  78. fistpl izistep
  79. movl izistep,%eax
  80. rorl $16,%eax // put upper 16 bits in low word
  81. movl sspan_t_count(%ebx),%ecx
  82. movl %eax,izistep
  83. cmpl $0,%ecx
  84. jle LNextSpan
  85. LSpanLoop:
  86. //
  87. // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
  88. // initial s and t values
  89. //
  90. // FIXME: pipeline FILD?
  91. fildl sspan_t_v(%ebx)
  92. fildl sspan_t_u(%ebx)
  93. fld %st(1) // dv | du | dv
  94. fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
  95. fld %st(1) // du | dv*d_sdivzstepv | du | dv
  96. fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  97. fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  98. fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
  99. // dv*d_sdivzstepv | du | dv
  100. fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
  101. // dv*d_sdivzstepv | du | dv
  102. faddp %st(0),%st(2) // du*d_tdivzstepu |
  103. // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
  104. fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
  105. // du*d_tdivzstepu | du | dv
  106. fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
  107. // du*d_tdivzstepu | du | dv
  108. fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
  109. // du*d_sdivzstepu + dv*d_sdivzstepv |
  110. // du*d_tdivzstepu | du | dv
  111. fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
  112. // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
  113. fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
  114. // du*d_sdivzstepu; stays in %st(2) at end
  115. fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
  116. // s/z
  117. fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
  118. // du*d_tdivzstepu | du | s/z
  119. fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
  120. // du*d_tdivzstepu | du | s/z
  121. faddp %st(0),%st(2) // dv*d_zistepv |
  122. // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
  123. fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
  124. // dv*d_zistepv | s/z
  125. fmuls C(d_zistepu) // du*d_zistepu |
  126. // dv*d_tdivzstepv + du*d_tdivzstepu |
  127. // dv*d_zistepv | s/z
  128. fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
  129. // du*d_zistepu | dv*d_zistepv | s/z
  130. fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
  131. // du*d_tdivzstepu; stays in %st(1) at end
  132. fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
  133. faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
  134. flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
  135. fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
  136. fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
  137. // du*d_zistepu; stays in %st(0) at end
  138. // 1/z | fp_64k | t/z | s/z
  139. fld %st(0) // FIXME: get rid of stall on FMUL?
  140. fmuls fp_64kx64k
  141. fxch %st(1)
  142. //
  143. // calculate and clamp s & t
  144. //
  145. fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
  146. fxch %st(1)
  147. fistpl izi // 0.32 fixed-point 1/z
  148. movl izi,%ebp
  149. //
  150. // set pz to point to the first z-buffer pixel in the span
  151. //
  152. rorl $16,%ebp // put upper 16 bits in low word
  153. movl sspan_t_v(%ebx),%eax
  154. movl %ebp,izi
  155. movl sspan_t_u(%ebx),%ebp
  156. imull C(d_zrowbytes)
  157. shll $1,%ebp // a word per pixel
  158. addl C(d_pzbuffer),%eax
  159. addl %ebp,%eax
  160. movl %eax,pz
  161. //
  162. // point %edi to the first pixel in the span
  163. //
  164. movl C(d_viewbuffer),%ebp
  165. movl sspan_t_v(%ebx),%eax
  166. pushl %ebx // preserve spans pointer
  167. movl C(tadjust),%edx
  168. movl C(sadjust),%esi
  169. movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
  170. addl %ebp,%edi
  171. movl sspan_t_u(%ebx),%ebp
  172. addl %ebp,%edi // pdest = &pdestspan[scans->u];
  173. //
  174. // now start the FDIV for the end of the span
  175. //
  176. cmpl $8,%ecx
  177. ja LSetupNotLast1
  178. decl %ecx
  179. jz LCleanup1 // if only one pixel, no need to start an FDIV
  180. movl %ecx,spancountminus1
  181. // finish up the s and t calcs
  182. fxch %st(1) // z*64k | 1/z | t/z | s/z
  183. fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
  184. fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
  185. fxch %st(1) // z*64k | s | 1/z | t/z | s/z
  186. fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
  187. fxch %st(1) // s | t | 1/z | t/z | s/z
  188. fistpl s // 1/z | t | t/z | s/z
  189. fistpl t // 1/z | t/z | s/z
  190. fildl spancountminus1
  191. flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
  192. flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
  193. fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
  194. fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
  195. fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
  196. fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
  197. fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
  198. // _d_tdivzstepu*scm1
  199. fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
  200. // _d_tdivzstepu*scm1
  201. faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
  202. fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
  203. faddp %st(0),%st(3) // _d_sdivzstepu*scm1
  204. faddp %st(0),%st(3)
  205. flds fp_64k
  206. fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
  207. // overlap
  208. jmp LFDIVInFlight1
  209. LCleanup1:
  210. // finish up the s and t calcs
  211. fxch %st(1) // z*64k | 1/z | t/z | s/z
  212. fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
  213. fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
  214. fxch %st(1) // z*64k | s | 1/z | t/z | s/z
  215. fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
  216. fxch %st(1) // s | t | 1/z | t/z | s/z
  217. fistpl s // 1/z | t | t/z | s/z
  218. fistpl t // 1/z | t/z | s/z
  219. jmp LFDIVInFlight1
  220. .align 4
  221. LSetupNotLast1:
  222. // finish up the s and t calcs
  223. fxch %st(1) // z*64k | 1/z | t/z | s/z
  224. fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
  225. fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
  226. fxch %st(1) // z*64k | s | 1/z | t/z | s/z
  227. fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
  228. fxch %st(1) // s | t | 1/z | t/z | s/z
  229. fistpl s // 1/z | t | t/z | s/z
  230. fistpl t // 1/z | t/z | s/z
  231. fadds zi8stepu
  232. fxch %st(2)
  233. fadds sdivz8stepu
  234. fxch %st(2)
  235. flds tdivz8stepu
  236. faddp %st(0),%st(2)
  237. flds fp_64k
  238. fdiv %st(1),%st(0) // z = 1/1/z
  239. // this is what we've gone to all this trouble to
  240. // overlap
  241. LFDIVInFlight1:
  242. addl s,%esi
  243. addl t,%edx
  244. movl C(bbextents),%ebx
  245. movl C(bbextentt),%ebp
  246. cmpl %ebx,%esi
  247. ja LClampHighOrLow0
  248. LClampReentry0:
  249. movl %esi,s
  250. movl pbase,%ebx
  251. shll $16,%esi
  252. cmpl %ebp,%edx
  253. movl %esi,sfracf
  254. ja LClampHighOrLow1
  255. LClampReentry1:
  256. movl %edx,t
  257. movl s,%esi // sfrac = scans->sfrac;
  258. shll $16,%edx
  259. movl t,%eax // tfrac = scans->tfrac;
  260. sarl $16,%esi
  261. movl %edx,tfracf
  262. //
  263. // calculate the texture starting address
  264. //
  265. sarl $16,%eax
  266. addl %ebx,%esi
  267. imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
  268. addl %eax,%esi // psource = pbase + (sfrac >> 16) +
  269. // ((tfrac >> 16) * cachewidth);
  270. //
  271. // determine whether last span or not
  272. //
  273. cmpl $8,%ecx
  274. jna LLastSegment
  275. //
  276. // not the last segment; do full 8-wide segment
  277. //
  278. LNotLastSegment:
  279. //
  280. // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  281. // get there
  282. //
  283. // pick up after the FDIV that was left in flight previously
  284. fld %st(0) // duplicate it
  285. fmul %st(4),%st(0) // s = s/z * z
  286. fxch %st(1)
  287. fmul %st(3),%st(0) // t = t/z * z
  288. fxch %st(1)
  289. fistpl snext
  290. fistpl tnext
  291. movl snext,%eax
  292. movl tnext,%edx
  293. subl $8,%ecx // count off this segments' pixels
  294. movl C(sadjust),%ebp
  295. pushl %ecx // remember count of remaining pixels
  296. movl C(tadjust),%ecx
  297. addl %eax,%ebp
  298. addl %edx,%ecx
  299. movl C(bbextents),%eax
  300. movl C(bbextentt),%edx
  301. cmpl $2048,%ebp
  302. jl LClampLow2
  303. cmpl %eax,%ebp
  304. ja LClampHigh2
  305. LClampReentry2:
  306. cmpl $2048,%ecx
  307. jl LClampLow3
  308. cmpl %edx,%ecx
  309. ja LClampHigh3
  310. LClampReentry3:
  311. movl %ebp,snext
  312. movl %ecx,tnext
  313. subl s,%ebp
  314. subl t,%ecx
  315. //
  316. // set up advancetable
  317. //
  318. movl %ecx,%eax
  319. movl %ebp,%edx
  320. sarl $19,%edx // sstep >>= 16;
  321. movl C(cachewidth),%ebx
  322. sarl $19,%eax // tstep >>= 16;
  323. jz LIsZero
  324. imull %ebx,%eax // (tstep >> 16) * cachewidth;
  325. LIsZero:
  326. addl %edx,%eax // add in sstep
  327. // (tstep >> 16) * cachewidth + (sstep >> 16);
  328. movl tfracf,%edx
  329. movl %eax,advancetable+4 // advance base in t
  330. addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
  331. // (sstep >> 16);
  332. shll $13,%ebp // left-justify sstep fractional part
  333. movl %ebp,sstep
  334. movl sfracf,%ebx
  335. shll $13,%ecx // left-justify tstep fractional part
  336. movl %eax,advancetable // advance extra in t
  337. movl %ecx,tstep
  338. movl pz,%ecx
  339. movl izi,%ebp
  340. cmpw (%ecx),%bp
  341. jl Lp1
  342. movb (%esi),%al // get first source texel
  343. cmpb $(TRANSPARENT_COLOR),%al
  344. jz Lp1
  345. movw %bp,(%ecx)
  346. movb %al,(%edi) // store first dest pixel
  347. Lp1:
  348. addl izistep,%ebp
  349. adcl $0,%ebp
  350. addl tstep,%edx // advance tfrac fractional part by tstep frac
  351. sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
  352. addl sstep,%ebx // advance sfrac fractional part by sstep frac
  353. adcl advancetable+4(,%eax,4),%esi // point to next source texel
  354. cmpw 2(%ecx),%bp
  355. jl Lp2
  356. movb (%esi),%al
  357. cmpb $(TRANSPARENT_COLOR),%al
  358. jz Lp2
  359. movw %bp,2(%ecx)
  360. movb %al,1(%edi)
  361. Lp2:
  362. addl izistep,%ebp
  363. adcl $0,%ebp
  364. addl tstep,%edx
  365. sbbl %eax,%eax
  366. addl sstep,%ebx
  367. adcl advancetable+4(,%eax,4),%esi
  368. cmpw 4(%ecx),%bp
  369. jl Lp3
  370. movb (%esi),%al
  371. cmpb $(TRANSPARENT_COLOR),%al
  372. jz Lp3
  373. movw %bp,4(%ecx)
  374. movb %al,2(%edi)
  375. Lp3:
  376. addl izistep,%ebp
  377. adcl $0,%ebp
  378. addl tstep,%edx
  379. sbbl %eax,%eax
  380. addl sstep,%ebx
  381. adcl advancetable+4(,%eax,4),%esi
  382. cmpw 6(%ecx),%bp
  383. jl Lp4
  384. movb (%esi),%al
  385. cmpb $(TRANSPARENT_COLOR),%al
  386. jz Lp4
  387. movw %bp,6(%ecx)
  388. movb %al,3(%edi)
  389. Lp4:
  390. addl izistep,%ebp
  391. adcl $0,%ebp
  392. addl tstep,%edx
  393. sbbl %eax,%eax
  394. addl sstep,%ebx
  395. adcl advancetable+4(,%eax,4),%esi
  396. cmpw 8(%ecx),%bp
  397. jl Lp5
  398. movb (%esi),%al
  399. cmpb $(TRANSPARENT_COLOR),%al
  400. jz Lp5
  401. movw %bp,8(%ecx)
  402. movb %al,4(%edi)
  403. Lp5:
  404. addl izistep,%ebp
  405. adcl $0,%ebp
  406. addl tstep,%edx
  407. sbbl %eax,%eax
  408. addl sstep,%ebx
  409. adcl advancetable+4(,%eax,4),%esi
  410. //
  411. // start FDIV for end of next segment in flight, so it can overlap
  412. //
  413. popl %eax
  414. cmpl $8,%eax // more than one segment after this?
  415. ja LSetupNotLast2 // yes
  416. decl %eax
  417. jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
  418. movl %eax,spancountminus1
  419. fildl spancountminus1
  420. flds C(d_zistepu) // _d_zistepu | spancountminus1
  421. fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
  422. flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
  423. fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
  424. fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
  425. faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
  426. fxch %st(1) // scm1 | _d_tdivzstepu*scm1
  427. fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
  428. fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
  429. faddp %st(0),%st(3) // _d_sdivzstepu*scm1
  430. flds fp_64k // 64k | _d_sdivzstepu*scm1
  431. fxch %st(1) // _d_sdivzstepu*scm1 | 64k
  432. faddp %st(0),%st(4) // 64k
  433. fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
  434. // overlap
  435. jmp LFDIVInFlight2
  436. .align 4
  437. LSetupNotLast2:
  438. fadds zi8stepu
  439. fxch %st(2)
  440. fadds sdivz8stepu
  441. fxch %st(2)
  442. flds tdivz8stepu
  443. faddp %st(0),%st(2)
  444. flds fp_64k
  445. fdiv %st(1),%st(0) // z = 1/1/z
  446. // this is what we've gone to all this trouble to
  447. // overlap
  448. LFDIVInFlight2:
  449. pushl %eax
  450. cmpw 10(%ecx),%bp
  451. jl Lp6
  452. movb (%esi),%al
  453. cmpb $(TRANSPARENT_COLOR),%al
  454. jz Lp6
  455. movw %bp,10(%ecx)
  456. movb %al,5(%edi)
  457. Lp6:
  458. addl izistep,%ebp
  459. adcl $0,%ebp
  460. addl tstep,%edx
  461. sbbl %eax,%eax
  462. addl sstep,%ebx
  463. adcl advancetable+4(,%eax,4),%esi
  464. cmpw 12(%ecx),%bp
  465. jl Lp7
  466. movb (%esi),%al
  467. cmpb $(TRANSPARENT_COLOR),%al
  468. jz Lp7
  469. movw %bp,12(%ecx)
  470. movb %al,6(%edi)
  471. Lp7:
  472. addl izistep,%ebp
  473. adcl $0,%ebp
  474. addl tstep,%edx
  475. sbbl %eax,%eax
  476. addl sstep,%ebx
  477. adcl advancetable+4(,%eax,4),%esi
  478. cmpw 14(%ecx),%bp
  479. jl Lp8
  480. movb (%esi),%al
  481. cmpb $(TRANSPARENT_COLOR),%al
  482. jz Lp8
  483. movw %bp,14(%ecx)
  484. movb %al,7(%edi)
  485. Lp8:
  486. addl izistep,%ebp
  487. adcl $0,%ebp
  488. addl tstep,%edx
  489. sbbl %eax,%eax
  490. addl sstep,%ebx
  491. adcl advancetable+4(,%eax,4),%esi
  492. addl $8,%edi
  493. addl $16,%ecx
  494. movl %edx,tfracf
  495. movl snext,%edx
  496. movl %ebx,sfracf
  497. movl tnext,%ebx
  498. movl %edx,s
  499. movl %ebx,t
  500. movl %ecx,pz
  501. movl %ebp,izi
  502. popl %ecx // retrieve count
  503. //
  504. // determine whether last span or not
  505. //
  506. cmpl $8,%ecx // are there multiple segments remaining?
  507. ja LNotLastSegment // yes
  508. //
  509. // last segment of scan
  510. //
  511. LLastSegment:
  512. //
  513. // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  514. // get there. The number of pixels left is variable, and we want to land on the
  515. // last pixel, not step one past it, so we can't run into arithmetic problems
  516. //
  517. testl %ecx,%ecx
  518. jz LNoSteps // just draw the last pixel and we're done
  519. // pick up after the FDIV that was left in flight previously
  520. fld %st(0) // duplicate it
  521. fmul %st(4),%st(0) // s = s/z * z
  522. fxch %st(1)
  523. fmul %st(3),%st(0) // t = t/z * z
  524. fxch %st(1)
  525. fistpl snext
  526. fistpl tnext
  527. movl C(tadjust),%ebx
  528. movl C(sadjust),%eax
  529. addl snext,%eax
  530. addl tnext,%ebx
  531. movl C(bbextents),%ebp
  532. movl C(bbextentt),%edx
  533. cmpl $2048,%eax
  534. jl LClampLow4
  535. cmpl %ebp,%eax
  536. ja LClampHigh4
  537. LClampReentry4:
  538. movl %eax,snext
  539. cmpl $2048,%ebx
  540. jl LClampLow5
  541. cmpl %edx,%ebx
  542. ja LClampHigh5
  543. LClampReentry5:
  544. cmpl $1,%ecx // don't bother
  545. je LOnlyOneStep // if two pixels in segment, there's only one step,
  546. // of the segment length
  547. subl s,%eax
  548. subl t,%ebx
  549. addl %eax,%eax // convert to 15.17 format so multiply by 1.31
  550. addl %ebx,%ebx // reciprocal yields 16.48
  551. imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
  552. movl %edx,%ebp
  553. movl %ebx,%eax
  554. imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
  555. LSetEntryvec:
  556. //
  557. // set up advancetable
  558. //
  559. movl spr8entryvec_table(,%ecx,4),%ebx
  560. movl %edx,%eax
  561. pushl %ebx // entry point into code for RET later
  562. movl %ebp,%ecx
  563. sarl $16,%ecx // sstep >>= 16;
  564. movl C(cachewidth),%ebx
  565. sarl $16,%edx // tstep >>= 16;
  566. jz LIsZeroLast
  567. imull %ebx,%edx // (tstep >> 16) * cachewidth;
  568. LIsZeroLast:
  569. addl %ecx,%edx // add in sstep
  570. // (tstep >> 16) * cachewidth + (sstep >> 16);
  571. movl tfracf,%ecx
  572. movl %edx,advancetable+4 // advance base in t
  573. addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
  574. // (sstep >> 16);
  575. shll $16,%ebp // left-justify sstep fractional part
  576. movl sfracf,%ebx
  577. shll $16,%eax // left-justify tstep fractional part
  578. movl %edx,advancetable // advance extra in t
  579. movl %eax,tstep
  580. movl %ebp,sstep
  581. movl %ecx,%edx
  582. movl pz,%ecx
  583. movl izi,%ebp
  584. ret // jump to the number-of-pixels handler
  585. //----------------------------------------
  586. LNoSteps:
  587. movl pz,%ecx
  588. subl $7,%edi // adjust for hardwired offset
  589. subl $14,%ecx
  590. jmp LEndSpan
  591. LOnlyOneStep:
  592. subl s,%eax
  593. subl t,%ebx
  594. movl %eax,%ebp
  595. movl %ebx,%edx
  596. jmp LSetEntryvec
  597. //----------------------------------------
  598. .globl Spr8Entry2_8
  599. Spr8Entry2_8:
  600. subl $6,%edi // adjust for hardwired offsets
  601. subl $12,%ecx
  602. movb (%esi),%al
  603. jmp LLEntry2_8
  604. //----------------------------------------
  605. .globl Spr8Entry3_8
  606. Spr8Entry3_8:
  607. subl $5,%edi // adjust for hardwired offsets
  608. subl $10,%ecx
  609. jmp LLEntry3_8
  610. //----------------------------------------
  611. .globl Spr8Entry4_8
  612. Spr8Entry4_8:
  613. subl $4,%edi // adjust for hardwired offsets
  614. subl $8,%ecx
  615. jmp LLEntry4_8
  616. //----------------------------------------
  617. .globl Spr8Entry5_8
  618. Spr8Entry5_8:
  619. subl $3,%edi // adjust for hardwired offsets
  620. subl $6,%ecx
  621. jmp LLEntry5_8
  622. //----------------------------------------
  623. .globl Spr8Entry6_8
  624. Spr8Entry6_8:
  625. subl $2,%edi // adjust for hardwired offsets
  626. subl $4,%ecx
  627. jmp LLEntry6_8
  628. //----------------------------------------
  629. .globl Spr8Entry7_8
  630. Spr8Entry7_8:
  631. decl %edi // adjust for hardwired offsets
  632. subl $2,%ecx
  633. jmp LLEntry7_8
  634. //----------------------------------------
  635. .globl Spr8Entry8_8
  636. Spr8Entry8_8:
  637. cmpw (%ecx),%bp
  638. jl Lp9
  639. movb (%esi),%al
  640. cmpb $(TRANSPARENT_COLOR),%al
  641. jz Lp9
  642. movw %bp,(%ecx)
  643. movb %al,(%edi)
  644. Lp9:
  645. addl izistep,%ebp
  646. adcl $0,%ebp
  647. addl tstep,%edx
  648. sbbl %eax,%eax
  649. addl sstep,%ebx
  650. adcl advancetable+4(,%eax,4),%esi
  651. LLEntry7_8:
  652. cmpw 2(%ecx),%bp
  653. jl Lp10
  654. movb (%esi),%al
  655. cmpb $(TRANSPARENT_COLOR),%al
  656. jz Lp10
  657. movw %bp,2(%ecx)
  658. movb %al,1(%edi)
  659. Lp10:
  660. addl izistep,%ebp
  661. adcl $0,%ebp
  662. addl tstep,%edx
  663. sbbl %eax,%eax
  664. addl sstep,%ebx
  665. adcl advancetable+4(,%eax,4),%esi
  666. LLEntry6_8:
  667. cmpw 4(%ecx),%bp
  668. jl Lp11
  669. movb (%esi),%al
  670. cmpb $(TRANSPARENT_COLOR),%al
  671. jz Lp11
  672. movw %bp,4(%ecx)
  673. movb %al,2(%edi)
  674. Lp11:
  675. addl izistep,%ebp
  676. adcl $0,%ebp
  677. addl tstep,%edx
  678. sbbl %eax,%eax
  679. addl sstep,%ebx
  680. adcl advancetable+4(,%eax,4),%esi
  681. LLEntry5_8:
  682. cmpw 6(%ecx),%bp
  683. jl Lp12
  684. movb (%esi),%al
  685. cmpb $(TRANSPARENT_COLOR),%al
  686. jz Lp12
  687. movw %bp,6(%ecx)
  688. movb %al,3(%edi)
  689. Lp12:
  690. addl izistep,%ebp
  691. adcl $0,%ebp
  692. addl tstep,%edx
  693. sbbl %eax,%eax
  694. addl sstep,%ebx
  695. adcl advancetable+4(,%eax,4),%esi
  696. LLEntry4_8:
  697. cmpw 8(%ecx),%bp
  698. jl Lp13
  699. movb (%esi),%al
  700. cmpb $(TRANSPARENT_COLOR),%al
  701. jz Lp13
  702. movw %bp,8(%ecx)
  703. movb %al,4(%edi)
  704. Lp13:
  705. addl izistep,%ebp
  706. adcl $0,%ebp
  707. addl tstep,%edx
  708. sbbl %eax,%eax
  709. addl sstep,%ebx
  710. adcl advancetable+4(,%eax,4),%esi
  711. LLEntry3_8:
  712. cmpw 10(%ecx),%bp
  713. jl Lp14
  714. movb (%esi),%al
  715. cmpb $(TRANSPARENT_COLOR),%al
  716. jz Lp14
  717. movw %bp,10(%ecx)
  718. movb %al,5(%edi)
  719. Lp14:
  720. addl izistep,%ebp
  721. adcl $0,%ebp
  722. addl tstep,%edx
  723. sbbl %eax,%eax
  724. addl sstep,%ebx
  725. adcl advancetable+4(,%eax,4),%esi
  726. LLEntry2_8:
  727. cmpw 12(%ecx),%bp
  728. jl Lp15
  729. movb (%esi),%al
  730. cmpb $(TRANSPARENT_COLOR),%al
  731. jz Lp15
  732. movw %bp,12(%ecx)
  733. movb %al,6(%edi)
  734. Lp15:
  735. addl izistep,%ebp
  736. adcl $0,%ebp
  737. addl tstep,%edx
  738. sbbl %eax,%eax
  739. addl sstep,%ebx
  740. adcl advancetable+4(,%eax,4),%esi
  741. LEndSpan:
  742. cmpw 14(%ecx),%bp
  743. jl Lp16
  744. movb (%esi),%al // load first texel in segment
  745. cmpb $(TRANSPARENT_COLOR),%al
  746. jz Lp16
  747. movw %bp,14(%ecx)
  748. movb %al,7(%edi)
  749. Lp16:
  750. //
  751. // clear s/z, t/z, 1/z from FP stack
  752. //
  753. fstp %st(0)
  754. fstp %st(0)
  755. fstp %st(0)
  756. popl %ebx // restore spans pointer
  757. LNextSpan:
  758. addl $(sspan_t_size),%ebx // point to next span
  759. movl sspan_t_count(%ebx),%ecx
  760. cmpl $0,%ecx // any more spans?
  761. jg LSpanLoop // yes
  762. jz LNextSpan // yes, but this one's empty
  763. popl %ebx // restore register variables
  764. popl %esi
  765. popl %edi
  766. popl %ebp // restore the caller's stack frame
  767. ret
  768. #endif // id386