123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
- ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
- ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
- ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
- ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
- ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
- ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
- ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
- ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
- ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
- ;
- ; $Source: f:/miner/source/texmap/rcs/tmap_16.asm $
- ; $Revision: 1.4 $
- ; $Author: mike $
- ; $Date: 1994/11/30 00:56:32 $
- ;
- ; 16 bits per pixel texture mapper
- ;
- ; $Log: tmap_16.asm $
- ; Revision 1.4 1994/11/30 00:56:32 mike
- ; optimization.
- ;
- ; Revision 1.3 1994/11/12 16:39:16 mike
- ; jae to ja.
- ;
- ; Revision 1.2 1993/11/22 10:23:49 mike
- ; *** empty log message ***
- ;
- ; Revision 1.1 1993/09/08 17:29:17 mike
- ; Initial revision
- ;
- ;
- ;
- .386
- public asm_tmap_scanline_lin_16_
- include tmap_inc.asm
- _DATA SEGMENT DWORD PUBLIC USE32 'DATA'
- extrn _fx_u:dword
- extrn _fx_v:dword
- extrn _fx_du_dx:dword
- extrn _fx_dv_dx:dword
- extrn _fx_y:dword
- extrn _fx_xleft:dword
- extrn _fx_xright:dword
- extrn _pixptr:dword
- extrn _x:dword
- extrn _loop_count:dword
- _DATA ENDS
- DGROUP GROUP _DATA
- _TEXT SEGMENT PARA PUBLIC USE32 'CODE'
- ASSUME DS:_DATA
- ASSUME CS:_TEXT
- ; --------------------------------------------------------------------------------------------------
- ; Enter:
- ; _xleft fixed point left x coordinate
- ; _xright fixed point right x coordinate
- ; _y fixed point y coordinate
- ; _pixptr address of source pixel map
- ; _u fixed point initial u coordinate
- ; _v fixed point initial v coordinate
- ; _du_dx fixed point du/dx
- ; _dv_dx fixed point dv/dx
- ; for (x = (int) xleft; x <= (int) xright; x++) {
- ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
- ; _setpixel(x,y);
- ;
- ; u += du_dx;
- ; v += dv_dx;
- ; z += dz_dx;
- ; }
- align 4
- asm_tmap_scanline_lin_16_:
- pusha
- ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
- ; esi source pixel pointer = pixptr
- ; edi initial row pointer = y*320+x
- ; set esi = pointer to start of texture map data
- mov esi,_pixptr
- ; set edi = address of first pixel to modify
- mov edi,_fx_y
- cmp edi,_window_bottom
- ja _none_to_do
- sub edi,_window_top
- imul edi,_bytes_per_row
- mov eax,_fx_xleft
- sar eax,16
- jns eax_ok
- sub eax,eax
- eax_ok:
- sub eax,_window_left
- add edi,eax
- add edi,eax ; add again because it's 2 bytes/pixel
- add edi,write_buffer
- ; set _loop_count = # of iterations
- mov eax,_fx_xright
- sar eax,16
- cmp eax,_window_right
- jb eax_ok1
- mov eax,_window_right
- eax_ok1: cmp eax,_window_left
- ja eax_ok2
- mov eax,_window_left
- eax_ok2:
- mov ebx,_fx_xleft
- sar ebx,16
- sub eax,ebx
- js _none_to_do
- cmp eax,_window_width
- jbe _ok_to_do
- mov eax,_window_width
- _ok_to_do:
- mov _loop_count,eax
- ; edi destination pixel pointer
- mov ebx,_fx_u
- mov ecx,_fx_du_dx
- mov edx,_fx_dv_dx
- mov ebp,_fx_v
- shl ebx,10
- shl ebp,10
- shl edx,10
- shl ecx,10
- ; eax work
- ; ebx u
- ; ecx du_dx
- ; edx dv_dx
- ; ebp v
- ; esi read address
- ; edi write address
- _size = (_end1 - _start1)/num_iters
- mov eax,num_iters-1
- sub eax,_loop_count
- imul eax,eax,dword ptr _size
- add eax,offset _start1
- jmp eax
- align 4
- _start1:
- ; "OPTIMIZATIONS" maybe not worth making
- ; Getting rid of the esi from the mov al,[esi+eax] instruction.
- ; This would require moving into eax at the top of the loop, rather than doing the sub eax,eax.
- ; You would have to align your bitmaps so that the two shlds would create the proper base address.
- ; In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps).
- ; I did timings without converting the sub to a mov eax,esi and setting esi to the proper value.
- ; There was a speedup of about 1% to 1.5% without converting the sub to a mov.
- ; Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al.
- ; The problem with this is you would have a dword offset for nnnn. My timings indicate it is slower. (I think.)
- ; Combining u,v and du,dv into single longwords.
- ; The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough
- ; instructions to separate a destination operand from being used by the next instruction. It shaves out one
- ; register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated.
- ; usage:
- ; eax work
- ; ebx u coordinate
- ; ecx delta u
- ; edx delta v
- ; ebp v coordinate
- ; esi pointer to source bitmap
- ; edi write address
- rept num_iters
- mov eax,ebp ; clear for
- add ebp,edx ; update v coordinate
- shr eax,26 ; shift in v coordinate
- shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate
- add ebx,ecx ; update u coordinate
- mov ax,[esi+2*eax] ; get pixel from source bitmap
- mov [edi],ax
- add edi,2
- ; inner loop if bitmaps are 256x256
- ; your register usage is bogus, and you must clear ecx
- ; fix your setup
- ; this is only about 10% faster in the inner loop
- ; this method would adapt to writing two pixels at a time better than
- ; the 64x64 method because you wouldn't run out of registers
- ; Note that this method assumes that both dv_dx and du_dx are in edx.
- ; edx = vi|vf|ui|uf
- ; where each field is 8 bits, vi = integer v coordinate, vf = fractional v coordinate, etc.
- ;** add ebx,edx
- ;** mov cl,bh
- ;** shld cx,bx,8
- ;** mov al,[esi+ecx]
- ;** mov [edi],al
- ;** inc edi
- endm
- _end1:
- _none_to_do: popa
- ret
- _TEXT ends
- end
- ; This is the inner loop to write two pixels at a time
- ; This is about 2.5% faster overall (on Mike's 66 MHz 80486 DX2, VLB)
- ; You must write code to even align edi and do half as many iterations, and write
- ; the beginning and ending extra pixels, if necessary.
- ; sub eax,eax ; clear for
- ; shld eax,ebp,6 ; shift in v coordinate
- ; add ebp,_fx_dv_dx ; update v coordinate
- ; shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate
- ; add ebx,ecx ; update u coordinate
- ; mov dl,[esi+eax] ; get pixel from source bitmap
- ;
- ; sub eax,eax ; clear for
- ; shld eax,ebp,6 ; shift in v coordinate
- ; add ebp,_fx_dv_dx ; update v coordinate
- ; shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate
- ; add ebx,ecx ; update u coordinate
- ; mov dh,[esi+eax] ; get pixel from source bitmap
- ;
- ; mov [edi],dx
- ; add edi,2
|