1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081 |
- ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
- ; *
- ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
- ; *
- ; * Copyright (C) 1995-2003 Mark Adler
- ; * For conditions of distribution and use, see copyright notice in zlib.h
- ; *
- ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
- ; * Please use the copyright conditions above.
- ; *
- ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
- ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
- ; * the moment. I have successfully compiled and tested this code with gcc2.96,
- ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
- ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
- ; * enabled. I will attempt to merge the MMX code into this version. Newer
- ; * versions of this and inffast.S can be found at
- ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
- ; *
- ; * 2005 : modification by Gilles Vollant
- ; */
- ; For Visual C++ 4.x and higher and ML 6.x and higher
- ; ml.exe is in directory \MASM611C of Win95 DDK
- ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
- ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
- ;
- ;
- ; compile with command line option
- ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
- ; if you define NO_GZIP (see inflate.h), compile with
- ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
- ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
- ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
- ; in inflate_state in inflate.h)
- zlib1222sup equ 8
- IFDEF GUNZIP
- INFLATE_MODE_TYPE equ 11
- INFLATE_MODE_BAD equ 26
- ELSE
- IFNDEF NO_GUNZIP
- INFLATE_MODE_TYPE equ 11
- INFLATE_MODE_BAD equ 26
- ELSE
- INFLATE_MODE_TYPE equ 3
- INFLATE_MODE_BAD equ 17
- ENDIF
- ENDIF
- ; 75 "inffast.S"
- ;FILE "inffast.S"
- ;;;GLOBAL _inflate_fast
- ;;;SECTION .text
- .586p
- .mmx
- name inflate_fast_x86
- .MODEL FLAT
- _DATA segment
- inflate_fast_use_mmx:
- dd 1
- _TEXT segment
- ALIGN 4
- db 'Fast decoding Code from Chris Anderson'
- db 0
- ALIGN 4
- invalid_literal_length_code_msg:
- db 'invalid literal/length code'
- db 0
- ALIGN 4
- invalid_distance_code_msg:
- db 'invalid distance code'
- db 0
- ALIGN 4
- invalid_distance_too_far_msg:
- db 'invalid distance too far back'
- db 0
- ALIGN 4
- inflate_fast_mask:
- dd 0
- dd 1
- dd 3
- dd 7
- dd 15
- dd 31
- dd 63
- dd 127
- dd 255
- dd 511
- dd 1023
- dd 2047
- dd 4095
- dd 8191
- dd 16383
- dd 32767
- dd 65535
- dd 131071
- dd 262143
- dd 524287
- dd 1048575
- dd 2097151
- dd 4194303
- dd 8388607
- dd 16777215
- dd 33554431
- dd 67108863
- dd 134217727
- dd 268435455
- dd 536870911
- dd 1073741823
- dd 2147483647
- dd 4294967295
- mode_state equ 0 ;/* state->mode */
- wsize_state equ (32+zlib1222sup) ;/* state->wsize */
- write_state equ (36+4+zlib1222sup) ;/* state->write */
- window_state equ (40+4+zlib1222sup) ;/* state->window */
- hold_state equ (44+4+zlib1222sup) ;/* state->hold */
- bits_state equ (48+4+zlib1222sup) ;/* state->bits */
- lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
- distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
- lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
- distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
- ;;SECTION .text
- ; 205 "inffast.S"
- ;GLOBAL inflate_fast_use_mmx
- ;SECTION .data
- ; GLOBAL inflate_fast_use_mmx:object
- ;.size inflate_fast_use_mmx, 4
- ; 226 "inffast.S"
- ;SECTION .text
- ALIGN 4
- _inflate_fast proc near
- .FPO (16, 4, 0, 0, 1, 0)
- push edi
- push esi
- push ebp
- push ebx
- pushfd
- sub esp,64
- cld
- mov esi, [esp+88]
- mov edi, [esi+28]
- mov edx, [esi+4]
- mov eax, [esi+0]
- add edx,eax
- sub edx,11
- mov [esp+44],eax
- mov [esp+20],edx
- mov ebp, [esp+92]
- mov ecx, [esi+16]
- mov ebx, [esi+12]
- sub ebp,ecx
- neg ebp
- add ebp,ebx
- sub ecx,257
- add ecx,ebx
- mov [esp+60],ebx
- mov [esp+40],ebp
- mov [esp+16],ecx
- ; 285 "inffast.S"
- mov eax, [edi+lencode_state]
- mov ecx, [edi+distcode_state]
- mov [esp+8],eax
- mov [esp+12],ecx
- mov eax,1
- mov ecx, [edi+lenbits_state]
- shl eax,cl
- dec eax
- mov [esp+0],eax
- mov eax,1
- mov ecx, [edi+distbits_state]
- shl eax,cl
- dec eax
- mov [esp+4],eax
- mov eax, [edi+wsize_state]
- mov ecx, [edi+write_state]
- mov edx, [edi+window_state]
- mov [esp+52],eax
- mov [esp+48],ecx
- mov [esp+56],edx
- mov ebp, [edi+hold_state]
- mov ebx, [edi+bits_state]
- ; 321 "inffast.S"
- mov esi, [esp+44]
- mov ecx, [esp+20]
- cmp ecx,esi
- ja L_align_long
- add ecx,11
- sub ecx,esi
- mov eax,12
- sub eax,ecx
- lea edi, [esp+28]
- rep movsb
- mov ecx,eax
- xor eax,eax
- rep stosb
- lea esi, [esp+28]
- mov [esp+20],esi
- jmp L_is_aligned
- L_align_long:
- test esi,3
- jz L_is_aligned
- xor eax,eax
- mov al, [esi]
- inc esi
- mov ecx,ebx
- add ebx,8
- shl eax,cl
- or ebp,eax
- jmp L_align_long
- L_is_aligned:
- mov edi, [esp+60]
- ; 366 "inffast.S"
- L_check_mmx:
- cmp dword ptr [inflate_fast_use_mmx],2
- je L_init_mmx
- ja L_do_loop
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- mov eax, [esp]
- xor dword ptr [esp],0200000h
- popfd
- pushfd
- pop edx
- xor edx,eax
- jz L_dont_use_mmx
- xor eax,eax
- cpuid
- cmp ebx,0756e6547h
- jne L_dont_use_mmx
- cmp ecx,06c65746eh
- jne L_dont_use_mmx
- cmp edx,049656e69h
- jne L_dont_use_mmx
- mov eax,1
- cpuid
- shr eax,8
- and eax,15
- cmp eax,6
- jne L_dont_use_mmx
- test edx,0800000h
- jnz L_use_mmx
- jmp L_dont_use_mmx
- L_use_mmx:
- mov dword ptr [inflate_fast_use_mmx],2
- jmp L_check_mmx_pop
- L_dont_use_mmx:
- mov dword ptr [inflate_fast_use_mmx],3
- L_check_mmx_pop:
- pop edx
- pop ecx
- pop ebx
- pop eax
- jmp L_check_mmx
- ; 426 "inffast.S"
- ALIGN 4
- L_do_loop:
- ; 437 "inffast.S"
- cmp bl,15
- ja L_get_length_code
- xor eax,eax
- lodsw
- mov cl,bl
- add bl,16
- shl eax,cl
- or ebp,eax
- L_get_length_code:
- mov edx, [esp+0]
- mov ecx, [esp+8]
- and edx,ebp
- mov eax, [ecx+edx*4]
- L_dolen:
- mov cl,ah
- sub bl,ah
- shr ebp,cl
- test al,al
- jnz L_test_for_length_base
- shr eax,16
- stosb
- L_while_test:
- cmp [esp+16],edi
- jbe L_break_loop
- cmp [esp+20],esi
- ja L_do_loop
- jmp L_break_loop
- L_test_for_length_base:
- ; 502 "inffast.S"
- mov edx,eax
- shr edx,16
- mov cl,al
- test al,16
- jz L_test_for_second_level_length
- and cl,15
- jz L_save_len
- cmp bl,cl
- jae L_add_bits_to_len
- mov ch,cl
- xor eax,eax
- lodsw
- mov cl,bl
- add bl,16
- shl eax,cl
- or ebp,eax
- mov cl,ch
- L_add_bits_to_len:
- mov eax,1
- shl eax,cl
- dec eax
- sub bl,cl
- and eax,ebp
- shr ebp,cl
- add edx,eax
- L_save_len:
- mov [esp+24],edx
- L_decode_distance:
- ; 549 "inffast.S"
- cmp bl,15
- ja L_get_distance_code
- xor eax,eax
- lodsw
- mov cl,bl
- add bl,16
- shl eax,cl
- or ebp,eax
- L_get_distance_code:
- mov edx, [esp+4]
- mov ecx, [esp+12]
- and edx,ebp
- mov eax, [ecx+edx*4]
- L_dodist:
- mov edx,eax
- shr edx,16
- mov cl,ah
- sub bl,ah
- shr ebp,cl
- ; 584 "inffast.S"
- mov cl,al
- test al,16
- jz L_test_for_second_level_dist
- and cl,15
- jz L_check_dist_one
- cmp bl,cl
- jae L_add_bits_to_dist
- mov ch,cl
- xor eax,eax
- lodsw
- mov cl,bl
- add bl,16
- shl eax,cl
- or ebp,eax
- mov cl,ch
- L_add_bits_to_dist:
- mov eax,1
- shl eax,cl
- dec eax
- sub bl,cl
- and eax,ebp
- shr ebp,cl
- add edx,eax
- jmp L_check_window
- L_check_window:
- ; 625 "inffast.S"
- mov [esp+44],esi
- mov eax,edi
- sub eax, [esp+40]
- cmp eax,edx
- jb L_clip_window
- mov ecx, [esp+24]
- mov esi,edi
- sub esi,edx
- sub ecx,3
- mov al, [esi]
- mov [edi],al
- mov al, [esi+1]
- mov dl, [esi+2]
- add esi,3
- mov [edi+1],al
- mov [edi+2],dl
- add edi,3
- rep movsb
- mov esi, [esp+44]
- jmp L_while_test
- ALIGN 4
- L_check_dist_one:
- cmp edx,1
- jne L_check_window
- cmp [esp+40],edi
- je L_check_window
- dec edi
- mov ecx, [esp+24]
- mov al, [edi]
- sub ecx,3
- mov [edi+1],al
- mov [edi+2],al
- mov [edi+3],al
- add edi,4
- rep stosb
- jmp L_while_test
- ALIGN 4
- L_test_for_second_level_length:
- test al,64
- jnz L_test_for_end_of_block
- mov eax,1
- shl eax,cl
- dec eax
- and eax,ebp
- add eax,edx
- mov edx, [esp+8]
- mov eax, [edx+eax*4]
- jmp L_dolen
- ALIGN 4
- L_test_for_second_level_dist:
- test al,64
- jnz L_invalid_distance_code
- mov eax,1
- shl eax,cl
- dec eax
- and eax,ebp
- add eax,edx
- mov edx, [esp+12]
- mov eax, [edx+eax*4]
- jmp L_dodist
- ALIGN 4
- L_clip_window:
- ; 721 "inffast.S"
- mov ecx,eax
- mov eax, [esp+52]
- neg ecx
- mov esi, [esp+56]
- cmp eax,edx
- jb L_invalid_distance_too_far
- add ecx,edx
- cmp dword ptr [esp+48],0
- jne L_wrap_around_window
- sub eax,ecx
- add esi,eax
- ; 749 "inffast.S"
- mov eax, [esp+24]
- cmp eax,ecx
- jbe L_do_copy1
- sub eax,ecx
- rep movsb
- mov esi,edi
- sub esi,edx
- jmp L_do_copy1
- cmp eax,ecx
- jbe L_do_copy1
- sub eax,ecx
- rep movsb
- mov esi,edi
- sub esi,edx
- jmp L_do_copy1
- L_wrap_around_window:
- ; 793 "inffast.S"
- mov eax, [esp+48]
- cmp ecx,eax
- jbe L_contiguous_in_window
- add esi, [esp+52]
- add esi,eax
- sub esi,ecx
- sub ecx,eax
- mov eax, [esp+24]
- cmp eax,ecx
- jbe L_do_copy1
- sub eax,ecx
- rep movsb
- mov esi, [esp+56]
- mov ecx, [esp+48]
- cmp eax,ecx
- jbe L_do_copy1
- sub eax,ecx
- rep movsb
- mov esi,edi
- sub esi,edx
- jmp L_do_copy1
- L_contiguous_in_window:
- ; 836 "inffast.S"
- add esi,eax
- sub esi,ecx
- mov eax, [esp+24]
- cmp eax,ecx
- jbe L_do_copy1
- sub eax,ecx
- rep movsb
- mov esi,edi
- sub esi,edx
- L_do_copy1:
- ; 862 "inffast.S"
- mov ecx,eax
- rep movsb
- mov esi, [esp+44]
- jmp L_while_test
- ; 878 "inffast.S"
- ALIGN 4
- L_init_mmx:
- emms
- movd mm0,ebp
- mov ebp,ebx
- ; 896 "inffast.S"
- movd mm4,dword ptr [esp+0]
- movq mm3,mm4
- movd mm5,dword ptr [esp+4]
- movq mm2,mm5
- pxor mm1,mm1
- mov ebx, [esp+8]
- jmp L_do_loop_mmx
- ALIGN 4
- L_do_loop_mmx:
- psrlq mm0,mm1
- cmp ebp,32
- ja L_get_length_code_mmx
- movd mm6,ebp
- movd mm7,dword ptr [esi]
- add esi,4
- psllq mm7,mm6
- add ebp,32
- por mm0,mm7
- L_get_length_code_mmx:
- pand mm4,mm0
- movd eax,mm4
- movq mm4,mm3
- mov eax, [ebx+eax*4]
- L_dolen_mmx:
- movzx ecx,ah
- movd mm1,ecx
- sub ebp,ecx
- test al,al
- jnz L_test_for_length_base_mmx
- shr eax,16
- stosb
- L_while_test_mmx:
- cmp [esp+16],edi
- jbe L_break_loop
- cmp [esp+20],esi
- ja L_do_loop_mmx
- jmp L_break_loop
- L_test_for_length_base_mmx:
- mov edx,eax
- shr edx,16
- test al,16
- jz L_test_for_second_level_length_mmx
- and eax,15
- jz L_decode_distance_mmx
- psrlq mm0,mm1
- movd mm1,eax
- movd ecx,mm0
- sub ebp,eax
- and ecx, [inflate_fast_mask+eax*4]
- add edx,ecx
- L_decode_distance_mmx:
- psrlq mm0,mm1
- cmp ebp,32
- ja L_get_dist_code_mmx
- movd mm6,ebp
- movd mm7,dword ptr [esi]
- add esi,4
- psllq mm7,mm6
- add ebp,32
- por mm0,mm7
- L_get_dist_code_mmx:
- mov ebx, [esp+12]
- pand mm5,mm0
- movd eax,mm5
- movq mm5,mm2
- mov eax, [ebx+eax*4]
- L_dodist_mmx:
- movzx ecx,ah
- mov ebx,eax
- shr ebx,16
- sub ebp,ecx
- movd mm1,ecx
- test al,16
- jz L_test_for_second_level_dist_mmx
- and eax,15
- jz L_check_dist_one_mmx
- L_add_bits_to_dist_mmx:
- psrlq mm0,mm1
- movd mm1,eax
- movd ecx,mm0
- sub ebp,eax
- and ecx, [inflate_fast_mask+eax*4]
- add ebx,ecx
- L_check_window_mmx:
- mov [esp+44],esi
- mov eax,edi
- sub eax, [esp+40]
- cmp eax,ebx
- jb L_clip_window_mmx
- mov ecx,edx
- mov esi,edi
- sub esi,ebx
- sub ecx,3
- mov al, [esi]
- mov [edi],al
- mov al, [esi+1]
- mov dl, [esi+2]
- add esi,3
- mov [edi+1],al
- mov [edi+2],dl
- add edi,3
- rep movsb
- mov esi, [esp+44]
- mov ebx, [esp+8]
- jmp L_while_test_mmx
- ALIGN 4
- L_check_dist_one_mmx:
- cmp ebx,1
- jne L_check_window_mmx
- cmp [esp+40],edi
- je L_check_window_mmx
- dec edi
- mov ecx,edx
- mov al, [edi]
- sub ecx,3
- mov [edi+1],al
- mov [edi+2],al
- mov [edi+3],al
- add edi,4
- rep stosb
- mov ebx, [esp+8]
- jmp L_while_test_mmx
- ALIGN 4
- L_test_for_second_level_length_mmx:
- test al,64
- jnz L_test_for_end_of_block
- and eax,15
- psrlq mm0,mm1
- movd ecx,mm0
- and ecx, [inflate_fast_mask+eax*4]
- add ecx,edx
- mov eax, [ebx+ecx*4]
- jmp L_dolen_mmx
- ALIGN 4
- L_test_for_second_level_dist_mmx:
- test al,64
- jnz L_invalid_distance_code
- and eax,15
- psrlq mm0,mm1
- movd ecx,mm0
- and ecx, [inflate_fast_mask+eax*4]
- mov eax, [esp+12]
- add ecx,ebx
- mov eax, [eax+ecx*4]
- jmp L_dodist_mmx
- ALIGN 4
- L_clip_window_mmx:
- mov ecx,eax
- mov eax, [esp+52]
- neg ecx
- mov esi, [esp+56]
- cmp eax,ebx
- jb L_invalid_distance_too_far
- add ecx,ebx
- cmp dword ptr [esp+48],0
- jne L_wrap_around_window_mmx
- sub eax,ecx
- add esi,eax
- cmp edx,ecx
- jbe L_do_copy1_mmx
- sub edx,ecx
- rep movsb
- mov esi,edi
- sub esi,ebx
- jmp L_do_copy1_mmx
- cmp edx,ecx
- jbe L_do_copy1_mmx
- sub edx,ecx
- rep movsb
- mov esi,edi
- sub esi,ebx
- jmp L_do_copy1_mmx
- L_wrap_around_window_mmx:
- mov eax, [esp+48]
- cmp ecx,eax
- jbe L_contiguous_in_window_mmx
- add esi, [esp+52]
- add esi,eax
- sub esi,ecx
- sub ecx,eax
- cmp edx,ecx
- jbe L_do_copy1_mmx
- sub edx,ecx
- rep movsb
- mov esi, [esp+56]
- mov ecx, [esp+48]
- cmp edx,ecx
- jbe L_do_copy1_mmx
- sub edx,ecx
- rep movsb
- mov esi,edi
- sub esi,ebx
- jmp L_do_copy1_mmx
- L_contiguous_in_window_mmx:
- add esi,eax
- sub esi,ecx
- cmp edx,ecx
- jbe L_do_copy1_mmx
- sub edx,ecx
- rep movsb
- mov esi,edi
- sub esi,ebx
- L_do_copy1_mmx:
- mov ecx,edx
- rep movsb
- mov esi, [esp+44]
- mov ebx, [esp+8]
- jmp L_while_test_mmx
- ; 1174 "inffast.S"
- L_invalid_distance_code:
- mov ecx, invalid_distance_code_msg
- mov edx,INFLATE_MODE_BAD
- jmp L_update_stream_state
- L_test_for_end_of_block:
- test al,32
- jz L_invalid_literal_length_code
- mov ecx,0
- mov edx,INFLATE_MODE_TYPE
- jmp L_update_stream_state
- L_invalid_literal_length_code:
- mov ecx, invalid_literal_length_code_msg
- mov edx,INFLATE_MODE_BAD
- jmp L_update_stream_state
- L_invalid_distance_too_far:
- mov esi, [esp+44]
- mov ecx, invalid_distance_too_far_msg
- mov edx,INFLATE_MODE_BAD
- jmp L_update_stream_state
- L_update_stream_state:
- mov eax, [esp+88]
- test ecx,ecx
- jz L_skip_msg
- mov [eax+24],ecx
- L_skip_msg:
- mov eax, [eax+28]
- mov [eax+mode_state],edx
- jmp L_break_loop
- ALIGN 4
- L_break_loop:
- ; 1243 "inffast.S"
- cmp dword ptr [inflate_fast_use_mmx],2
- jne L_update_next_in
- mov ebx,ebp
- L_update_next_in:
- ; 1266 "inffast.S"
- mov eax, [esp+88]
- mov ecx,ebx
- mov edx, [eax+28]
- shr ecx,3
- sub esi,ecx
- shl ecx,3
- sub ebx,ecx
- mov [eax+12],edi
- mov [edx+bits_state],ebx
- mov ecx,ebx
- lea ebx, [esp+28]
- cmp [esp+20],ebx
- jne L_buf_not_used
- sub esi,ebx
- mov ebx, [eax+0]
- mov [esp+20],ebx
- add esi,ebx
- mov ebx, [eax+4]
- sub ebx,11
- add [esp+20],ebx
- L_buf_not_used:
- mov [eax+0],esi
- mov ebx,1
- shl ebx,cl
- dec ebx
- cmp dword ptr [inflate_fast_use_mmx],2
- jne L_update_hold
- psrlq mm0,mm1
- movd ebp,mm0
- emms
- L_update_hold:
- and ebp,ebx
- mov [edx+hold_state],ebp
- mov ebx, [esp+20]
- cmp ebx,esi
- jbe L_last_is_smaller
- sub ebx,esi
- add ebx,11
- mov [eax+4],ebx
- jmp L_fixup_out
- L_last_is_smaller:
- sub esi,ebx
- neg esi
- add esi,11
- mov [eax+4],esi
- L_fixup_out:
- mov ebx, [esp+16]
- cmp ebx,edi
- jbe L_end_is_smaller
- sub ebx,edi
- add ebx,257
- mov [eax+16],ebx
- jmp L_done
- L_end_is_smaller:
- sub edi,ebx
- neg edi
- add edi,257
- mov [eax+16],edi
- L_done:
- add esp,64
- popfd
- pop ebx
- pop ebp
- pop esi
- pop edi
- ret
- _inflate_fast endp
- _TEXT ends
- end
|