d_polysa.s 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251
  1. //
  2. // d_polysa.s
  3. // x86 assembly-language polygon model drawing code
  4. //
  5. #include "qasm.h"
  6. #include "d_ifacea.h"
  7. #if id386
  8. // !!! if this is changed, it must be changed in d_polyse.c too !!!
  9. #define DPS_MAXSPANS MAXHEIGHT+1
  10. // 1 extra for spanpackage that marks end
  11. //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
  12. #define SPAN_SIZE (1024+1+1+1)*32
  13. .data
  14. .align 4
  15. p10_minus_p20: .single 0
  16. p01_minus_p21: .single 0
  17. temp0: .single 0
  18. temp1: .single 0
  19. Ltemp: .single 0
  20. aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
  21. .long LDraw4, LDraw3, LDraw2, LDraw1
  22. lzistepx: .long 0
  23. .text
  24. #ifndef NeXT
  25. .extern C(D_PolysetSetEdgeTable)
  26. .extern C(D_RasterizeAliasPolySmooth)
  27. #endif
  28. //----------------------------------------------------------------------
  29. // affine triangle gradient calculation code
  30. //----------------------------------------------------------------------
  31. #if 0
  32. #define skinwidth 4+0
  33. .globl C(R_PolysetCalcGradients)
  34. C(R_PolysetCalcGradients):
  35. // p00_minus_p20 = r_p0[0] - r_p2[0];
  36. // p01_minus_p21 = r_p0[1] - r_p2[1];
  37. // p10_minus_p20 = r_p1[0] - r_p2[0];
  38. // p11_minus_p21 = r_p1[1] - r_p2[1];
  39. //
  40. // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
  41. // p00_minus_p20 * p11_minus_p21);
  42. //
  43. // ystepdenominv = -xstepdenominv;
  44. fildl C(r_p0)+0 // r_p0[0]
  45. fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
  46. fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
  47. fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
  48. fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
  49. fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
  50. // r_p2[0] | r_p0[0]
  51. fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
  52. // r_p2[0] | r_p0[0]
  53. fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
  54. // r_p2[0] | r_p0[0]
  55. fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
  56. // r_p2[0] | r_p0[0]
  57. fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
  58. // r_p1[1] | r_p2[0] | r_p0[0]
  59. fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
  60. // r_p1[1] | r_p2[0] | p10_minus_p20
  61. fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
  62. // p00_minus_p20 | p10_minus_p20
  63. fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
  64. // p00_minus_p20 | p10_minus_p20
  65. fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
  66. // p00_minus_p20 | p10_minus_p20
  67. fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
  68. // p00_minus_p20 | p10_minus_p20
  69. flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
  70. // p00_minus_p20 | p10_minus_p20
  71. fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
  72. // p00_minus_p20 | d_xdenom
  73. fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
  74. // p00_minus_p20 | d_xdenom
  75. fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
  76. fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
  77. //// ceil () for light so positive steps are exaggerated, negative steps
  78. //// diminished, pushing us away from underflow toward overflow. Underflow is
  79. //// very visible, overflow is very unlikely, because of ambient lighting
  80. // t0 = r_p0[4] - r_p2[4];
  81. // t1 = r_p1[4] - r_p2[4];
  82. fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
  83. // p11_minus_p21
  84. fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
  85. // p00_minus_p20 | p11_minus_p21
  86. fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
  87. // p00_minus_p20 | p11_minus_p21
  88. fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
  89. // p00_minus_p20 | p11_minus_p21
  90. fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
  91. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  92. fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
  93. // p00_minus_p20 | p11_minus_p21
  94. fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
  95. // p11_minus_p21
  96. // r_lstepx = (int)
  97. // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
  98. // r_lstepy = (int)
  99. // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
  100. fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
  101. // p11_minus_p21
  102. fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
  103. // p00_minus_p20 | p11_minus_p21
  104. fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
  105. // p00_minus_p20 | p11_minus_p21
  106. fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
  107. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  108. fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
  109. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  110. fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
  111. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  112. fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  113. // t0*p11_minus_p21 | xstepdenominv |
  114. // p00_minus_p20 | p11_minus_p21
  115. fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  116. // t0*p11_minus_p21 | xstepdenominv |
  117. // p00_minus_p20 | p11_minus_p21
  118. fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
  119. // t1*p01_minus_p21 | t0*p11_minus_p21 |
  120. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  121. fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
  122. // t1*p00_minus_p20 | t0*p11_minus_p21 |
  123. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  124. fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
  125. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  126. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  127. fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
  128. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  129. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  130. fld %st(2) // xstepdenominv |
  131. // t1*p00_minus_p20 - t0*p10_minus_p20 |
  132. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  133. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  134. fmuls float_minus_1 // ystepdenominv |
  135. // t1*p00_minus_p20 - t0*p10_minus_p20 |
  136. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  137. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  138. fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
  139. // t1*p00_minus_p20 - t0*p10_minus_p20 |
  140. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  141. // p11_minus_p21
  142. fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  143. // xstepdenominv |
  144. // t1*p00_minus_p20 - t0*p10_minus_p20 |
  145. // | ystepdenominv | xstepdenominv |
  146. // p00_minus_p20 | p11_minus_p21
  147. fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
  148. // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  149. // xstepdenominv | ystepdenominv |
  150. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  151. fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  152. // ystepdenominv |
  153. // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  154. // xstepdenominv | ystepdenominv |
  155. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  156. fldcw ceil_cw
  157. fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
  158. // p00_minus_p20 | p11_minus_p21
  159. fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
  160. // p11_minus_p21
  161. fldcw single_cw
  162. // t0 = r_p0[2] - r_p2[2];
  163. // t1 = r_p1[2] - r_p2[2];
  164. fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
  165. // p00_minus_p20 | p11_minus_p21
  166. fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
  167. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  168. fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
  169. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  170. fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
  171. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  172. fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
  173. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  174. // p11_minus_p21
  175. fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
  176. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  177. fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
  178. // p00_minus_p20 | p11_minus_p21
  179. // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
  180. // xstepdenominv);
  181. // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
  182. // ystepdenominv);
  183. fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
  184. fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
  185. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  186. fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
  187. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  188. fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
  189. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  190. // p11_minus_p21
  191. fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
  192. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  193. // p11_minus_p21
  194. fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
  195. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  196. // p11_minus_p21
  197. fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  198. // t0*p11_minus_p21 | ystepdenominv |
  199. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  200. fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  201. // t0*p11_minus_p21 | ystepdenominv |
  202. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  203. fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
  204. // t1*p01_minus_p21 | t0*p11_minus_p21 |
  205. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  206. // p11_minus_p21
  207. fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
  208. // t1*p00_minus_p20 | t0*p11_minus_p21 |
  209. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  210. // p11_minus_p21
  211. fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
  212. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  213. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  214. // p11_minus_p21
  215. fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
  216. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  217. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  218. // p11_minus_p21
  219. fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  220. // ystepdenominv |
  221. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  222. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  223. // p11_minus_p21
  224. fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
  225. // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  226. // ystepdenominv | ystepdenominv |
  227. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  228. fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  229. // xstepdenominv |
  230. // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  231. // ystepdenominv | ystepdenominv |
  232. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  233. fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  234. // ystepdenominv |
  235. // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  236. // xstepdenominv | ystepdenominv |
  237. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  238. fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
  239. // p00_minus_p20 | p11_minus_p21
  240. fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
  241. // p11_minus_p21
  242. // t0 = r_p0[3] - r_p2[3];
  243. // t1 = r_p1[3] - r_p2[3];
  244. fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
  245. // p00_minus_p20 | p11_minus_p21
  246. fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
  247. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  248. fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
  249. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  250. fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
  251. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  252. fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
  253. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  254. // p11_minus_p21
  255. fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
  256. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  257. fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
  258. // p00_minus_p20 | p11_minus_p21
  259. // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
  260. // xstepdenominv);
  261. // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
  262. // ystepdenominv);
  263. fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
  264. // p00_minus_p20 | p11_minus_p21
  265. fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
  266. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  267. fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
  268. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  269. fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
  270. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  271. // p11_minus_p21
  272. fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
  273. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  274. // p11_minus_p21
  275. fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
  276. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  277. // p11_minus_p21
  278. fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  279. // t0*p11_minus_p21 | ystepdenominv |
  280. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  281. fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  282. // t0*p11_minus_p21 | ystepdenominv |
  283. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  284. fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
  285. // t1*p01_minus_p21 | t0*p11_minus_p21 |
  286. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  287. // p11_minus_p21
  288. fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
  289. // t1*p00_minus_p20 | t0*p11_minus_p21 |
  290. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  291. // p11_minus_p21
  292. fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
  293. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  294. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  295. // p11_minus_p21
  296. fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
  297. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  298. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  299. // p11_minus_p21
  300. fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  301. // ystepdenominv |
  302. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  303. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  304. // p11_minus_p21
  305. fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
  306. // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  307. // ystepdenominv | ystepdenominv |
  308. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  309. fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  310. // xstepdenominv |
  311. // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  312. // ystepdenominv | ystepdenominv |
  313. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  314. fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  315. // ystepdenominv |
  316. // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  317. // xstepdenominv | ystepdenominv |
  318. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  319. fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
  320. // p00_minus_p20 | p11_minus_p21
  321. fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
  322. // p11_minus_p21
  323. // t0 = r_p0[5] - r_p2[5];
  324. // t1 = r_p1[5] - r_p2[5];
  325. fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
  326. // p00_minus_p20 | p11_minus_p21
  327. fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
  328. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  329. fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
  330. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  331. fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
  332. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  333. fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
  334. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  335. // p11_minus_p21
  336. fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
  337. // xstepdenominv | p00_minus_p20 | p11_minus_p21
  338. fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
  339. // p00_minus_p20 | p11_minus_p21
  340. // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
  341. // xstepdenominv);
  342. // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
  343. // ystepdenominv);
  344. fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
  345. // p00_minus_p20 | p11_minus_p21
  346. fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
  347. // p00_minus_p20 | t0*p11_minus_p21
  348. fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
  349. // p00_minus_p20 | t0*p11_minus_p21
  350. fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
  351. // p00_minus_p20 | t0*p11_minus_p21
  352. fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
  353. // xstepdenominv | p00_minus_p20 |
  354. // t0*p11_minus_p21
  355. fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
  356. // xstepdenominv | p00_minus_p20 |
  357. // t0*p11_minus_p21
  358. fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  359. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  360. // t0*p11_minus_p21
  361. fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  362. // ystepdenominv | xstepdenominv | p00_minus_p20 |
  363. // t0*p11_minus_p21
  364. fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
  365. // ystepdenominv | xstepdenominv |
  366. // t1*p00_minus_p20 | t0*p11_minus_p21
  367. fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
  368. // ystepdenominv | xstepdenominv |
  369. // t1*p00_minus_p20 | t0*p10_minus_p20
  370. fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
  371. // ystepdenominv | xstepdenominv |
  372. // t1*p00_minus_p20 | t0*p10_minus_p20
  373. fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
  374. // xstepdenominv |
  375. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  376. // t0*p10_minus_p20
  377. fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
  378. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  379. // t1*p00_minus_p20 - t0*p10_minus_p20
  380. fxch %st(1) // xstepdenominv | ystepdenominv |
  381. // t1*p01_minus_p21 - t0*p11_minus_p21 |
  382. // t1*p00_minus_p20 - t0*p10_minus_p20
  383. fmulp %st(0),%st(2) // ystepdenominv |
  384. // (t1*p01_minus_p21 - t0*p11_minus_p21) *
  385. // xstepdenominv |
  386. // t1*p00_minus_p20 - t0*p10_minus_p20
  387. fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
  388. // xstepdenominv |
  389. // (t1*p00_minus_p20 - t0*p10_minus_p20) *
  390. // ystepdenominv
  391. fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
  392. // ystepdenominv
  393. fistpl C(r_zistepy)
  394. // a_sstepxfrac = r_sstepx << 16;
  395. // a_tstepxfrac = r_tstepx << 16;
  396. //
  397. // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
  398. // (r_sstepx >> 16);
  399. movl C(r_sstepx),%eax
  400. movl C(r_tstepx),%edx
  401. shll $16,%eax
  402. shll $16,%edx
  403. movl %eax,C(a_sstepxfrac)
  404. movl %edx,C(a_tstepxfrac)
  405. movl C(r_sstepx),%ecx
  406. movl C(r_tstepx),%eax
  407. sarl $16,%ecx
  408. sarl $16,%eax
  409. imull skinwidth(%esp)
  410. addl %ecx,%eax
  411. movl %eax,C(a_ststepxwhole)
  412. ret
  413. #endif
  414. //----------------------------------------------------------------------
  415. // recursive subdivision affine triangle drawing code
  416. //
  417. // not C-callable because of stdcall return
  418. //----------------------------------------------------------------------
  419. #define lp1 4+16
  420. #define lp2 8+16
  421. #define lp3 12+16
  422. .globl C(D_PolysetRecursiveTriangle)
  423. C(D_PolysetRecursiveTriangle):
  424. pushl %ebp // preserve caller stack frame pointer
  425. pushl %esi // preserve register variables
  426. pushl %edi
  427. pushl %ebx
  428. // int *temp;
  429. // int d;
  430. // int new[6];
  431. // int i;
  432. // int z;
  433. // short *zbuf;
  434. movl lp2(%esp),%esi
  435. movl lp1(%esp),%ebx
  436. movl lp3(%esp),%edi
  437. // d = lp2[0] - lp1[0];
  438. // if (d < -1 || d > 1)
  439. // goto split;
  440. movl 0(%esi),%eax
  441. movl 0(%ebx),%edx
  442. movl 4(%esi),%ebp
  443. subl %edx,%eax
  444. movl 4(%ebx),%ecx
  445. subl %ecx,%ebp
  446. incl %eax
  447. cmpl $2,%eax
  448. ja LSplit
  449. // d = lp2[1] - lp1[1];
  450. // if (d < -1 || d > 1)
  451. // goto split;
  452. movl 0(%edi),%eax
  453. incl %ebp
  454. cmpl $2,%ebp
  455. ja LSplit
  456. // d = lp3[0] - lp2[0];
  457. // if (d < -1 || d > 1)
  458. // goto split2;
  459. movl 0(%esi),%edx
  460. movl 4(%edi),%ebp
  461. subl %edx,%eax
  462. movl 4(%esi),%ecx
  463. subl %ecx,%ebp
  464. incl %eax
  465. cmpl $2,%eax
  466. ja LSplit2
  467. // d = lp3[1] - lp2[1];
  468. // if (d < -1 || d > 1)
  469. // goto split2;
  470. movl 0(%ebx),%eax
  471. incl %ebp
  472. cmpl $2,%ebp
  473. ja LSplit2
  474. // d = lp1[0] - lp3[0];
  475. // if (d < -1 || d > 1)
  476. // goto split3;
  477. movl 0(%edi),%edx
  478. movl 4(%ebx),%ebp
  479. subl %edx,%eax
  480. movl 4(%edi),%ecx
  481. subl %ecx,%ebp
  482. incl %eax
  483. incl %ebp
  484. movl %ebx,%edx
  485. cmpl $2,%eax
  486. ja LSplit3
  487. // d = lp1[1] - lp3[1];
  488. // if (d < -1 || d > 1)
  489. // {
  490. //split3:
  491. // temp = lp1;
  492. // lp3 = lp2;
  493. // lp1 = lp3;
  494. // lp2 = temp;
  495. // goto split;
  496. // }
  497. //
  498. // return; // entire tri is filled
  499. //
  500. cmpl $2,%ebp
  501. jna LDone
  502. LSplit3:
  503. movl %edi,%ebx
  504. movl %esi,%edi
  505. movl %edx,%esi
  506. jmp LSplit
  507. //split2:
  508. LSplit2:
  509. // temp = lp1;
  510. // lp1 = lp2;
  511. // lp2 = lp3;
  512. // lp3 = temp;
  513. movl %ebx,%eax
  514. movl %esi,%ebx
  515. movl %edi,%esi
  516. movl %eax,%edi
  517. //split:
  518. LSplit:
  519. subl $24,%esp // allocate space for a new vertex
  520. //// split this edge
  521. // new[0] = (lp1[0] + lp2[0]) >> 1;
  522. // new[1] = (lp1[1] + lp2[1]) >> 1;
  523. // new[2] = (lp1[2] + lp2[2]) >> 1;
  524. // new[3] = (lp1[3] + lp2[3]) >> 1;
  525. // new[5] = (lp1[5] + lp2[5]) >> 1;
  526. movl 8(%ebx),%eax
  527. movl 8(%esi),%edx
  528. movl 12(%ebx),%ecx
  529. addl %edx,%eax
  530. movl 12(%esi),%edx
  531. sarl $1,%eax
  532. addl %edx,%ecx
  533. movl %eax,8(%esp)
  534. movl 20(%ebx),%eax
  535. sarl $1,%ecx
  536. movl 20(%esi),%edx
  537. movl %ecx,12(%esp)
  538. addl %edx,%eax
  539. movl 0(%ebx),%ecx
  540. movl 0(%esi),%edx
  541. sarl $1,%eax
  542. addl %ecx,%edx
  543. movl %eax,20(%esp)
  544. movl 4(%ebx),%eax
  545. sarl $1,%edx
  546. movl 4(%esi),%ebp
  547. movl %edx,0(%esp)
  548. addl %eax,%ebp
  549. sarl $1,%ebp
  550. movl %ebp,4(%esp)
  551. //// draw the point if splitting a leading edge
  552. // if (lp2[1] > lp1[1])
  553. // goto nodraw;
  554. cmpl %eax,4(%esi)
  555. jg LNoDraw
  556. // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
  557. // goto nodraw;
  558. movl 0(%esi),%edx
  559. jnz LDraw
  560. cmpl %ecx,%edx
  561. jl LNoDraw
  562. LDraw:
  563. // z = new[5] >> 16;
  564. movl 20(%esp),%edx
  565. movl 4(%esp),%ecx
  566. sarl $16,%edx
  567. movl 0(%esp),%ebp
  568. // zbuf = zspantable[new[1]] + new[0];
  569. movl C(zspantable)(,%ecx,4),%eax
  570. // if (z >= *zbuf)
  571. // {
  572. cmpw (%eax,%ebp,2),%dx
  573. jnge LNoDraw
  574. // int pix;
  575. //
  576. // *zbuf = z;
  577. movw %dx,(%eax,%ebp,2)
  578. // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
  579. movl 12(%esp),%eax
  580. sarl $16,%eax
  581. movl 8(%esp),%edx
  582. sarl $16,%edx
  583. subl %ecx,%ecx
  584. movl C(skintable)(,%eax,4),%eax
  585. movl 4(%esp),%ebp
  586. movb (%eax,%edx,),%cl
  587. movl C(d_pcolormap),%edx
  588. movb (%edx,%ecx,),%dl
  589. movl 0(%esp),%ecx
  590. // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
  591. movl C(d_scantable)(,%ebp,4),%eax
  592. addl %eax,%ecx
  593. movl C(d_viewbuffer),%eax
  594. movb %dl,(%eax,%ecx,1)
  595. // }
  596. //
  597. //nodraw:
  598. LNoDraw:
  599. //// recursively continue
  600. // D_PolysetRecursiveTriangle (lp3, lp1, new);
  601. pushl %esp
  602. pushl %ebx
  603. pushl %edi
  604. call C(D_PolysetRecursiveTriangle)
  605. // D_PolysetRecursiveTriangle (lp3, new, lp2);
  606. movl %esp,%ebx
  607. pushl %esi
  608. pushl %ebx
  609. pushl %edi
  610. call C(D_PolysetRecursiveTriangle)
  611. addl $24,%esp
  612. LDone:
  613. popl %ebx // restore register variables
  614. popl %edi
  615. popl %esi
  616. popl %ebp // restore caller stack frame pointer
  617. ret $12
  618. //----------------------------------------------------------------------
  619. // 8-bpp horizontal span drawing code for affine polygons, with smooth
  620. // shading and no transparency
  621. //----------------------------------------------------------------------
  622. #define pspans 4+8
  623. .globl C(D_PolysetAff8Start)
  624. C(D_PolysetAff8Start):
  625. .globl C(R_PolysetDrawSpans8_Opaque)
  626. C(R_PolysetDrawSpans8_Opaque):
  627. pushl %esi // preserve register variables
  628. pushl %ebx
  629. movl pspans(%esp),%esi // point to the first span descriptor
  630. movl C(r_zistepx),%ecx
  631. pushl %ebp // preserve caller's stack frame
  632. pushl %edi
  633. rorl $16,%ecx // put high 16 bits of 1/z step in low word
  634. movl spanpackage_t_count(%esi),%edx
  635. movl %ecx,lzistepx
  636. LSpanLoop:
  637. // lcount = d_aspancount - pspanpackage->count;
  638. //
  639. // errorterm += erroradjustup;
  640. // if (errorterm >= 0)
  641. // {
  642. // d_aspancount += d_countextrastep;
  643. // errorterm -= erroradjustdown;
  644. // }
  645. // else
  646. // {
  647. // d_aspancount += ubasestep;
  648. // }
  649. movl C(d_aspancount),%eax
  650. subl %edx,%eax
  651. movl C(erroradjustup),%edx
  652. movl C(errorterm),%ebx
  653. addl %edx,%ebx
  654. js LNoTurnover
  655. movl C(erroradjustdown),%edx
  656. movl C(d_countextrastep),%edi
  657. subl %edx,%ebx
  658. movl C(d_aspancount),%ebp
  659. movl %ebx,C(errorterm)
  660. addl %edi,%ebp
  661. movl %ebp,C(d_aspancount)
  662. jmp LRightEdgeStepped
  663. LNoTurnover:
  664. movl C(d_aspancount),%edi
  665. movl C(ubasestep),%edx
  666. movl %ebx,C(errorterm)
  667. addl %edx,%edi
  668. movl %edi,C(d_aspancount)
  669. LRightEdgeStepped:
  670. cmpl $1,%eax
  671. jl LNextSpan
  672. jz LExactlyOneLong
  673. //
  674. // set up advancetable
  675. //
  676. movl C(a_ststepxwhole),%ecx
  677. movl C(r_affinetridesc)+atd_skinwidth,%edx
  678. movl %ecx,advancetable+4 // advance base in t
  679. addl %edx,%ecx
  680. movl %ecx,advancetable // advance extra in t
  681. movl C(a_tstepxfrac),%ecx
  682. movw C(r_lstepx),%cx
  683. movl %eax,%edx // count
  684. movl %ecx,tstep
  685. addl $7,%edx
  686. shrl $3,%edx // count of full and partial loops
  687. movl spanpackage_t_sfrac(%esi),%ebx
  688. movw %dx,%bx
  689. movl spanpackage_t_pz(%esi),%ecx
  690. negl %eax
  691. movl spanpackage_t_pdest(%esi),%edi
  692. andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
  693. subl %eax,%edi // compensate for hardwired offsets
  694. subl %eax,%ecx
  695. subl %eax,%ecx
  696. movl spanpackage_t_tfrac(%esi),%edx
  697. movw spanpackage_t_light(%esi),%dx
  698. movl spanpackage_t_zi(%esi),%ebp
  699. rorl $16,%ebp // put high 16 bits of 1/z in low word
  700. pushl %esi
  701. movl spanpackage_t_ptex(%esi),%esi
  702. jmp aff8entryvec_table(,%eax,4)
  703. // %bx = count of full and partial loops
  704. // %ebx high word = sfrac
  705. // %ecx = pz
  706. // %dx = light
  707. // %edx high word = tfrac
  708. // %esi = ptex
  709. // %edi = pdest
  710. // %ebp = 1/z
  711. // tstep low word = C(r_lstepx)
  712. // tstep high word = C(a_tstepxfrac)
  713. // C(a_sstepxfrac) low word = 0
  714. // C(a_sstepxfrac) high word = C(a_sstepxfrac)
  715. LDrawLoop:
  716. // FIXME: do we need to clamp light? We may need at least a buffer bit to
  717. // keep it from poking into tfrac and causing problems
  718. LDraw8:
  719. cmpw (%ecx),%bp
  720. jl Lp1
  721. xorl %eax,%eax
  722. movb %dh,%ah
  723. movb (%esi),%al
  724. movw %bp,(%ecx)
  725. movb 0x12345678(%eax),%al
  726. LPatch8:
  727. movb %al,(%edi)
  728. Lp1:
  729. addl tstep,%edx
  730. sbbl %eax,%eax
  731. addl lzistepx,%ebp
  732. adcl $0,%ebp
  733. addl C(a_sstepxfrac),%ebx
  734. adcl advancetable+4(,%eax,4),%esi
  735. LDraw7:
  736. cmpw 2(%ecx),%bp
  737. jl Lp2
  738. xorl %eax,%eax
  739. movb %dh,%ah
  740. movb (%esi),%al
  741. movw %bp,2(%ecx)
  742. movb 0x12345678(%eax),%al
  743. LPatch7:
  744. movb %al,1(%edi)
  745. Lp2:
  746. addl tstep,%edx
  747. sbbl %eax,%eax
  748. addl lzistepx,%ebp
  749. adcl $0,%ebp
  750. addl C(a_sstepxfrac),%ebx
  751. adcl advancetable+4(,%eax,4),%esi
  752. LDraw6:
  753. cmpw 4(%ecx),%bp
  754. jl Lp3
  755. xorl %eax,%eax
  756. movb %dh,%ah
  757. movb (%esi),%al
  758. movw %bp,4(%ecx)
  759. movb 0x12345678(%eax),%al
  760. LPatch6:
  761. movb %al,2(%edi)
  762. Lp3:
  763. addl tstep,%edx
  764. sbbl %eax,%eax
  765. addl lzistepx,%ebp
  766. adcl $0,%ebp
  767. addl C(a_sstepxfrac),%ebx
  768. adcl advancetable+4(,%eax,4),%esi
  769. LDraw5:
  770. cmpw 6(%ecx),%bp
  771. jl Lp4
  772. xorl %eax,%eax
  773. movb %dh,%ah
  774. movb (%esi),%al
  775. movw %bp,6(%ecx)
  776. movb 0x12345678(%eax),%al
  777. LPatch5:
  778. movb %al,3(%edi)
  779. Lp4:
  780. addl tstep,%edx
  781. sbbl %eax,%eax
  782. addl lzistepx,%ebp
  783. adcl $0,%ebp
  784. addl C(a_sstepxfrac),%ebx
  785. adcl advancetable+4(,%eax,4),%esi
  786. LDraw4:
  787. cmpw 8(%ecx),%bp
  788. jl Lp5
  789. xorl %eax,%eax
  790. movb %dh,%ah
  791. movb (%esi),%al
  792. movw %bp,8(%ecx)
  793. movb 0x12345678(%eax),%al
  794. LPatch4:
  795. movb %al,4(%edi)
  796. Lp5:
  797. addl tstep,%edx
  798. sbbl %eax,%eax
  799. addl lzistepx,%ebp
  800. adcl $0,%ebp
  801. addl C(a_sstepxfrac),%ebx
  802. adcl advancetable+4(,%eax,4),%esi
  803. LDraw3:
  804. cmpw 10(%ecx),%bp
  805. jl Lp6
  806. xorl %eax,%eax
  807. movb %dh,%ah
  808. movb (%esi),%al
  809. movw %bp,10(%ecx)
  810. movb 0x12345678(%eax),%al
  811. LPatch3:
  812. movb %al,5(%edi)
  813. Lp6:
  814. addl tstep,%edx
  815. sbbl %eax,%eax
  816. addl lzistepx,%ebp
  817. adcl $0,%ebp
  818. addl C(a_sstepxfrac),%ebx
  819. adcl advancetable+4(,%eax,4),%esi
  820. LDraw2:
  821. cmpw 12(%ecx),%bp
  822. jl Lp7
  823. xorl %eax,%eax
  824. movb %dh,%ah
  825. movb (%esi),%al
  826. movw %bp,12(%ecx)
  827. movb 0x12345678(%eax),%al
  828. LPatch2:
  829. movb %al,6(%edi)
  830. Lp7:
  831. addl tstep,%edx
  832. sbbl %eax,%eax
  833. addl lzistepx,%ebp
  834. adcl $0,%ebp
  835. addl C(a_sstepxfrac),%ebx
  836. adcl advancetable+4(,%eax,4),%esi
  837. LDraw1:
  838. cmpw 14(%ecx),%bp
  839. jl Lp8
  840. xorl %eax,%eax
  841. movb %dh,%ah
  842. movb (%esi),%al
  843. movw %bp,14(%ecx)
  844. movb 0x12345678(%eax),%al
  845. LPatch1:
  846. movb %al,7(%edi)
  847. Lp8:
  848. addl tstep,%edx
  849. sbbl %eax,%eax
  850. addl lzistepx,%ebp
  851. adcl $0,%ebp
  852. addl C(a_sstepxfrac),%ebx
  853. adcl advancetable+4(,%eax,4),%esi
  854. addl $8,%edi
  855. addl $16,%ecx
  856. decw %bx
  857. jnz LDrawLoop
  858. popl %esi // restore spans pointer
  859. LNextSpan:
  860. addl $(spanpackage_t_size),%esi // point to next span
  861. LNextSpanESISet:
  862. movl spanpackage_t_count(%esi),%edx
  863. cmpl $-999999,%edx // any more spans?
  864. jnz LSpanLoop // yes
  865. popl %edi
  866. popl %ebp // restore the caller's stack frame
  867. popl %ebx // restore register variables
  868. popl %esi
  869. ret
  870. // draw a one-long span
  871. LExactlyOneLong:
  872. movl spanpackage_t_pz(%esi),%ecx
  873. movl spanpackage_t_zi(%esi),%ebp
  874. rorl $16,%ebp // put high 16 bits of 1/z in low word
  875. movl spanpackage_t_ptex(%esi),%ebx
  876. cmpw (%ecx),%bp
  877. jl LNextSpan
  878. xorl %eax,%eax
  879. movl spanpackage_t_pdest(%esi),%edi
  880. movb spanpackage_t_light+1(%esi),%ah
  881. addl $(spanpackage_t_size),%esi // point to next span
  882. movb (%ebx),%al
  883. movw %bp,(%ecx)
  884. movb 0x12345678(%eax),%al
  885. LPatch9:
  886. movb %al,(%edi)
  887. jmp LNextSpanESISet
  888. .globl C(D_PolysetAff8End)
  889. C(D_PolysetAff8End):
  890. .extern C(alias_colormap)
  891. // #define pcolormap 4
  892. .globl C(D_Aff8Patch)
  893. C(D_Aff8Patch):
  894. movl C(alias_colormap),%eax
  895. movl %eax,LPatch1-4
  896. movl %eax,LPatch2-4
  897. movl %eax,LPatch3-4
  898. movl %eax,LPatch4-4
  899. movl %eax,LPatch5-4
  900. movl %eax,LPatch6-4
  901. movl %eax,LPatch7-4
  902. movl %eax,LPatch8-4
  903. movl %eax,LPatch9-4
  904. ret
  905. //----------------------------------------------------------------------
  906. // Alias model triangle left-edge scanning code
  907. //----------------------------------------------------------------------
  908. #define height 4+16
  909. .globl C(R_PolysetScanLeftEdge)
  910. C(R_PolysetScanLeftEdge):
  911. pushl %ebp // preserve caller stack frame pointer
  912. pushl %esi // preserve register variables
  913. pushl %edi
  914. pushl %ebx
  915. movl height(%esp),%eax
  916. movl C(d_sfrac),%ecx
  917. andl $0xFFFF,%eax
  918. movl C(d_ptex),%ebx
  919. orl %eax,%ecx
  920. movl C(d_pedgespanpackage),%esi
  921. movl C(d_tfrac),%edx
  922. movl C(d_light),%edi
  923. movl C(d_zi),%ebp
  924. // %eax: scratch
  925. // %ebx: d_ptex
  926. // %ecx: d_sfrac in high word, count in low word
  927. // %edx: d_tfrac
  928. // %esi: d_pedgespanpackage, errorterm, scratch alternately
  929. // %edi: d_light
  930. // %ebp: d_zi
  931. // do
  932. // {
  933. LScanLoop:
  934. // d_pedgespanpackage->ptex = ptex;
  935. // d_pedgespanpackage->pdest = d_pdest;
  936. // d_pedgespanpackage->pz = d_pz;
  937. // d_pedgespanpackage->count = d_aspancount;
  938. // d_pedgespanpackage->light = d_light;
  939. // d_pedgespanpackage->zi = d_zi;
  940. // d_pedgespanpackage->sfrac = d_sfrac << 16;
  941. // d_pedgespanpackage->tfrac = d_tfrac << 16;
  942. movl %ebx,spanpackage_t_ptex(%esi)
  943. movl C(d_pdest),%eax
  944. movl %eax,spanpackage_t_pdest(%esi)
  945. movl C(d_pz),%eax
  946. movl %eax,spanpackage_t_pz(%esi)
  947. movl C(d_aspancount),%eax
  948. movl %eax,spanpackage_t_count(%esi)
  949. movl %edi,spanpackage_t_light(%esi)
  950. movl %ebp,spanpackage_t_zi(%esi)
  951. movl %ecx,spanpackage_t_sfrac(%esi)
  952. movl %edx,spanpackage_t_tfrac(%esi)
  953. // pretouch the next cache line
  954. movb spanpackage_t_size(%esi),%al
  955. // d_pedgespanpackage++;
  956. addl $(spanpackage_t_size),%esi
  957. movl C(erroradjustup),%eax
  958. movl %esi,C(d_pedgespanpackage)
  959. // errorterm += erroradjustup;
  960. movl C(errorterm),%esi
  961. addl %eax,%esi
  962. movl C(d_pdest),%eax
  963. // if (errorterm >= 0)
  964. // {
  965. js LNoLeftEdgeTurnover
  966. // errorterm -= erroradjustdown;
  967. // d_pdest += d_pdestextrastep;
  968. subl C(erroradjustdown),%esi
  969. addl C(d_pdestextrastep),%eax
  970. movl %esi,C(errorterm)
  971. movl %eax,C(d_pdest)
  972. // d_pz += d_pzextrastep;
  973. // d_aspancount += d_countextrastep;
  974. // d_ptex += d_ptexextrastep;
  975. // d_sfrac += d_sfracextrastep;
  976. // d_ptex += d_sfrac >> 16;
  977. // d_sfrac &= 0xFFFF;
  978. // d_tfrac += d_tfracextrastep;
  979. movl C(d_pz),%eax
  980. movl C(d_aspancount),%esi
  981. addl C(d_pzextrastep),%eax
  982. addl C(d_sfracextrastep),%ecx
  983. adcl C(d_ptexextrastep),%ebx
  984. addl C(d_countextrastep),%esi
  985. movl %eax,C(d_pz)
  986. movl C(d_tfracextrastep),%eax
  987. movl %esi,C(d_aspancount)
  988. addl %eax,%edx
  989. // if (d_tfrac & 0x10000)
  990. // {
  991. jnc LSkip1
  992. // d_ptex += r_affinetridesc.skinwidth;
  993. // d_tfrac &= 0xFFFF;
  994. addl C(r_affinetridesc)+atd_skinwidth,%ebx
  995. // }
  996. LSkip1:
  997. // d_light += d_lightextrastep;
  998. // d_zi += d_ziextrastep;
  999. addl C(d_lightextrastep),%edi
  1000. addl C(d_ziextrastep),%ebp
  1001. // }
  1002. movl C(d_pedgespanpackage),%esi
  1003. decl %ecx
  1004. testl $0xFFFF,%ecx
  1005. jnz LScanLoop
  1006. popl %ebx
  1007. popl %edi
  1008. popl %esi
  1009. popl %ebp
  1010. ret
  1011. // else
  1012. // {
  1013. LNoLeftEdgeTurnover:
  1014. movl %esi,C(errorterm)
  1015. // d_pdest += d_pdestbasestep;
  1016. addl C(d_pdestbasestep),%eax
  1017. movl %eax,C(d_pdest)
  1018. // d_pz += d_pzbasestep;
  1019. // d_aspancount += ubasestep;
  1020. // d_ptex += d_ptexbasestep;
  1021. // d_sfrac += d_sfracbasestep;
  1022. // d_ptex += d_sfrac >> 16;
  1023. // d_sfrac &= 0xFFFF;
  1024. movl C(d_pz),%eax
  1025. movl C(d_aspancount),%esi
  1026. addl C(d_pzbasestep),%eax
  1027. addl C(d_sfracbasestep),%ecx
  1028. adcl C(d_ptexbasestep),%ebx
  1029. addl C(ubasestep),%esi
  1030. movl %eax,C(d_pz)
  1031. movl %esi,C(d_aspancount)
  1032. // d_tfrac += d_tfracbasestep;
  1033. movl C(d_tfracbasestep),%esi
  1034. addl %esi,%edx
  1035. // if (d_tfrac & 0x10000)
  1036. // {
  1037. jnc LSkip2
  1038. // d_ptex += r_affinetridesc.skinwidth;
  1039. // d_tfrac &= 0xFFFF;
  1040. addl C(r_affinetridesc)+atd_skinwidth,%ebx
  1041. // }
  1042. LSkip2:
  1043. // d_light += d_lightbasestep;
  1044. // d_zi += d_zibasestep;
  1045. addl C(d_lightbasestep),%edi
  1046. addl C(d_zibasestep),%ebp
  1047. // }
  1048. // } while (--height);
  1049. movl C(d_pedgespanpackage),%esi
  1050. decl %ecx
  1051. testl $0xFFFF,%ecx
  1052. jnz LScanLoop
  1053. popl %ebx
  1054. popl %edi
  1055. popl %esi
  1056. popl %ebp
  1057. ret
  1058. #endif // id386