TMAP_16.ASM 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
  2. ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
  3. ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
  4. ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
  5. ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
  6. ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
  7. ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
  8. ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
  9. ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  10. ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
  11. ;
  12. ; $Source: f:/miner/source/texmap/rcs/tmap_16.asm $
  13. ; $Revision: 1.4 $
  14. ; $Author: mike $
  15. ; $Date: 1994/11/30 00:56:32 $
  16. ;
  17. ; 16 bits per pixel texture mapper
  18. ;
  19. ; $Log: tmap_16.asm $
  20. ; Revision 1.4 1994/11/30 00:56:32 mike
  21. ; optimization.
  22. ;
  23. ; Revision 1.3 1994/11/12 16:39:16 mike
  24. ; jae to ja.
  25. ;
  26. ; Revision 1.2 1993/11/22 10:23:49 mike
  27. ; *** empty log message ***
  28. ;
  29. ; Revision 1.1 1993/09/08 17:29:17 mike
  30. ; Initial revision
  31. ;
  32. ;
  33. ;
  34. .386
  35. public asm_tmap_scanline_lin_16_
  36. include tmap_inc.asm
  37. _DATA SEGMENT DWORD PUBLIC USE32 'DATA'
  38. extrn _fx_u:dword
  39. extrn _fx_v:dword
  40. extrn _fx_du_dx:dword
  41. extrn _fx_dv_dx:dword
  42. extrn _fx_y:dword
  43. extrn _fx_xleft:dword
  44. extrn _fx_xright:dword
  45. extrn _pixptr:dword
  46. extrn _x:dword
  47. extrn _loop_count:dword
  48. _DATA ENDS
  49. DGROUP GROUP _DATA
  50. _TEXT SEGMENT PARA PUBLIC USE32 'CODE'
  51. ASSUME DS:_DATA
  52. ASSUME CS:_TEXT
  53. ; --------------------------------------------------------------------------------------------------
  54. ; Enter:
  55. ; _xleft fixed point left x coordinate
  56. ; _xright fixed point right x coordinate
  57. ; _y fixed point y coordinate
  58. ; _pixptr address of source pixel map
  59. ; _u fixed point initial u coordinate
  60. ; _v fixed point initial v coordinate
  61. ; _du_dx fixed point du/dx
  62. ; _dv_dx fixed point dv/dx
  63. ; for (x = (int) xleft; x <= (int) xright; x++) {
  64. ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
  65. ; _setpixel(x,y);
  66. ;
  67. ; u += du_dx;
  68. ; v += dv_dx;
  69. ; z += dz_dx;
  70. ; }
  71. align 4
  72. asm_tmap_scanline_lin_16_:
  73. pusha
  74. ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
  75. ; esi source pixel pointer = pixptr
  76. ; edi initial row pointer = y*320+x
  77. ; set esi = pointer to start of texture map data
  78. mov esi,_pixptr
  79. ; set edi = address of first pixel to modify
  80. mov edi,_fx_y
  81. cmp edi,_window_bottom
  82. ja _none_to_do
  83. sub edi,_window_top
  84. imul edi,_bytes_per_row
  85. mov eax,_fx_xleft
  86. sar eax,16
  87. jns eax_ok
  88. sub eax,eax
  89. eax_ok:
  90. sub eax,_window_left
  91. add edi,eax
  92. add edi,eax ; add again because it's 2 bytes/pixel
  93. add edi,write_buffer
  94. ; set _loop_count = # of iterations
  95. mov eax,_fx_xright
  96. sar eax,16
  97. cmp eax,_window_right
  98. jb eax_ok1
  99. mov eax,_window_right
  100. eax_ok1: cmp eax,_window_left
  101. ja eax_ok2
  102. mov eax,_window_left
  103. eax_ok2:
  104. mov ebx,_fx_xleft
  105. sar ebx,16
  106. sub eax,ebx
  107. js _none_to_do
  108. cmp eax,_window_width
  109. jbe _ok_to_do
  110. mov eax,_window_width
  111. _ok_to_do:
  112. mov _loop_count,eax
  113. ; edi destination pixel pointer
  114. mov ebx,_fx_u
  115. mov ecx,_fx_du_dx
  116. mov edx,_fx_dv_dx
  117. mov ebp,_fx_v
  118. shl ebx,10
  119. shl ebp,10
  120. shl edx,10
  121. shl ecx,10
  122. ; eax work
  123. ; ebx u
  124. ; ecx du_dx
  125. ; edx dv_dx
  126. ; ebp v
  127. ; esi read address
  128. ; edi write address
  129. _size = (_end1 - _start1)/num_iters
  130. mov eax,num_iters-1
  131. sub eax,_loop_count
  132. imul eax,eax,dword ptr _size
  133. add eax,offset _start1
  134. jmp eax
  135. align 4
  136. _start1:
  137. ; "OPTIMIZATIONS" maybe not worth making
  138. ; Getting rid of the esi from the mov al,[esi+eax] instruction.
  139. ; This would require moving into eax at the top of the loop, rather than doing the sub eax,eax.
  140. ; You would have to align your bitmaps so that the two shlds would create the proper base address.
  141. ; In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps).
  142. ; I did timings without converting the sub to a mov eax,esi and setting esi to the proper value.
  143. ; There was a speedup of about 1% to 1.5% without converting the sub to a mov.
  144. ; Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al.
  145. ; The problem with this is you would have a dword offset for nnnn. My timings indicate it is slower. (I think.)
  146. ; Combining u,v and du,dv into single longwords.
  147. ; The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough
  148. ; instructions to separate a destination operand from being used by the next instruction. It shaves out one
  149. ; register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated.
  150. ; usage:
  151. ; eax work
  152. ; ebx u coordinate
  153. ; ecx delta u
  154. ; edx delta v
  155. ; ebp v coordinate
  156. ; esi pointer to source bitmap
  157. ; edi write address
  158. rept num_iters
  159. mov eax,ebp ; clear for
  160. add ebp,edx ; update v coordinate
  161. shr eax,26 ; shift in v coordinate
  162. shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate
  163. add ebx,ecx ; update u coordinate
  164. mov ax,[esi+2*eax] ; get pixel from source bitmap
  165. mov [edi],ax
  166. add edi,2
  167. ; inner loop if bitmaps are 256x256
  168. ; your register usage is bogus, and you must clear ecx
  169. ; fix your setup
  170. ; this is only about 10% faster in the inner loop
  171. ; this method would adapt to writing two pixels at a time better than
  172. ; the 64x64 method because you wouldn't run out of registers
  173. ; Note that this method assumes that both dv_dx and du_dx are in edx.
  174. ; edx = vi|vf|ui|uf
  175. ; where each field is 8 bits, vi = integer v coordinate, vf = fractional v coordinate, etc.
  176. ;** add ebx,edx
  177. ;** mov cl,bh
  178. ;** shld cx,bx,8
  179. ;** mov al,[esi+ecx]
  180. ;** mov [edi],al
  181. ;** inc edi
  182. endm
  183. _end1:
  184. _none_to_do: popa
  185. ret
  186. _TEXT ends
  187. end
  188. ; This is the inner loop to write two pixels at a time
  189. ; This is about 2.5% faster overall (on Mike's 66 MHz 80486 DX2, VLB)
  190. ; You must write code to even align edi and do half as many iterations, and write
  191. ; the beginning and ending extra pixels, if necessary.
  192. ; sub eax,eax ; clear for
  193. ; shld eax,ebp,6 ; shift in v coordinate
  194. ; add ebp,_fx_dv_dx ; update v coordinate
  195. ; shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate
  196. ; add ebx,ecx ; update u coordinate
  197. ; mov dl,[esi+eax] ; get pixel from source bitmap
  198. ;
  199. ; sub eax,eax ; clear for
  200. ; shld eax,ebp,6 ; shift in v coordinate
  201. ; add ebp,_fx_dv_dx ; update v coordinate
  202. ; shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate
  203. ; add ebx,ecx ; update u coordinate
  204. ; mov dh,[esi+eax] ; get pixel from source bitmap
  205. ;
  206. ; mov [edi],dx
  207. ; add edi,2