123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635 |
- Patch downloaded from
- http://bugs.gentoo.org/show_bug.cgi?id=121871
- http://bugs.gentoo.org/attachment.cgi?id=98094
- --- libdv-0.104-old/libdv/asm_common.S
- +++ libdv-0.104/libdv/asm_common.S
- @@ -0,0 +1,29 @@
- +/* public domain, do what you want */
- +
- +#ifdef __PIC__
- +# define MUNG(sym) sym##@GOTOFF(%ebp)
- +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
- +#else
- +# define MUNG(sym) sym
- +# define MUNG_ARR(sym, args...) sym(,##args)
- +#endif
- +
- +#ifdef __PIC__
- +# undef __i686 /* gcc define gets in our way */
- +# define LOAD_PIC_REG(reg) \
- + .ifndef __i686.get_pc_thunk.reg; \
- + .section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \
- + .global __i686.get_pc_thunk.reg; \
- + .hidden __i686.get_pc_thunk.reg; \
- + .type __i686.get_pc_thunk.reg,@function; \
- + __i686.get_pc_thunk.reg: \
- + movl (%esp), %e##reg; \
- + ret; \
- + .size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \
- + .previous; \
- + .endif; \
- + call __i686.get_pc_thunk.reg; \
- + addl $_GLOBAL_OFFSET_TABLE_, %e##reg
- +#else
- +# define LOAD_PIC_REG(reg)
- +#endif
- --- libdv-0.104-old/libdv/dct_block_mmx.S
- +++ libdv-0.104/libdv/dct_block_mmx.S
- @@ -53,19 +53,22 @@ scratch2: .quad 0
-
- .section .note.GNU-stack, "", @progbits
-
- +#include "asm_common.S"
- +
- .text
-
- .align 8
- .global _dv_dct_88_block_mmx
- .hidden _dv_dct_88_block_mmx
- .type _dv_dct_88_block_mmx,@function
- _dv_dct_88_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
-
- - movl 8(%ebp), %esi # source
- + LOAD_PIC_REG(bp)
- +
- + movl 12(%esp), %esi # source
-
- # column 0
- movq 16*0(%esi), %mm0 # v0
- @@ -86,22 +91,22 @@ _dv_dct_88_block_mmx:
-
- movq 16*3(%esi), %mm5 # v3
- movq 16*4(%esi), %mm7 # v4
- - movq %mm7, scratch1 # scratch1: v4 ;
- + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
- movq %mm5, %mm7 # duplicate v3
- - paddw scratch1, %mm5 # v03: v3+v4
- - psubw scratch1, %mm7 # v04: v3-v4
- - movq %mm5, scratch2 # scratch2: v03
- + paddw MUNG(scratch1), %mm5 # v03: v3+v4
- + psubw MUNG(scratch1), %mm7 # v04: v3-v4
- + movq %mm5, MUNG(scratch2) # scratch2: v03
- movq %mm0, %mm5 # mm5: v00
-
- - paddw scratch2, %mm0 # v10: v00+v03
- - psubw scratch2, %mm5 # v13: v00-v03
- - movq %mm3, scratch3 # scratch3: v02
- + paddw MUNG(scratch2), %mm0 # v10: v00+v03
- + psubw MUNG(scratch2), %mm5 # v13: v00-v03
- + movq %mm3, MUNG(scratch3) # scratch3: v02
- movq %mm1, %mm3 # duplicate v01
-
- - paddw scratch3, %mm1 # v11: v01+v02
- - psubw scratch3, %mm3 # v12: v01-v02
- + paddw MUNG(scratch3), %mm1 # v11: v01+v02
- + psubw MUNG(scratch3), %mm3 # v12: v01-v02
-
- - movq %mm6, scratch4 # scratch4: v05
- + movq %mm6, MUNG(scratch4) # scratch4: v05
- movq %mm0, %mm6 # duplicate v10
-
- paddw %mm1, %mm0 # v10+v11
- @@ -111,10 +116,10 @@ _dv_dct_88_block_mmx:
- movq %mm6, 16*4(%esi) # out4: v10-v11
-
- movq %mm4, %mm0 # mm0: v06
- - paddw scratch4, %mm4 # v15: v05+v06
- + paddw MUNG(scratch4), %mm4 # v15: v05+v06
- paddw %mm2, %mm0 # v16: v07+v06
-
- - pmulhw WA3, %mm4 # v35~: WA3*v15
- + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
- psllw $1, %mm4 # v35: compensate the coeefient scale
-
- movq %mm4, %mm6 # duplicate v35
- @@ -123,7 +128,7 @@ _dv_dct_88_block_mmx:
-
- paddw %mm5, %mm3 # v22: v12+v13
-
- - pmulhw WA1, %mm3 # v32~: WA1*v22
- + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22
- psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
- movq %mm5, %mm6 # duplicate v13
-
- @@ -134,13 +139,13 @@ _dv_dct_88_block_mmx:
- movq %mm6, 16*6(%esi) # out6: v13-v32
-
-
- - paddw scratch4, %mm7 # v14n: v04+v05
- + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
- movq %mm0, %mm5 # duplicate v16
-
- psubw %mm7, %mm0 # va1: v16-v14n
- - pmulhw WA5, %mm0 # va0~: va1*WA5
- - pmulhw WA4, %mm5 # v36~~: v16*WA4
- - pmulhw WA2, %mm7 # v34~~: v14n*WA2
- + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
- + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
- + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
- psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
- psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
-
- @@ -188,22 +193,22 @@ _dv_dct_88_block_mmx:
-
- movq 16*3(%esi), %mm5 # v3
- movq 16*4(%esi), %mm7 # v4
- - movq %mm7, scratch1 # scratch1: v4 ;
- + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
- movq %mm5, %mm7 # duplicate v3
- - paddw scratch1, %mm5 # v03: v3+v4
- - psubw scratch1, %mm7 # v04: v3-v4
- - movq %mm5, scratch2 # scratch2: v03
- + paddw MUNG(scratch1), %mm5 # v03: v3+v4
- + psubw MUNG(scratch1), %mm7 # v04: v3-v4
- + movq %mm5, MUNG(scratch2) # scratch2: v03
- movq %mm0, %mm5 # mm5: v00
-
- - paddw scratch2, %mm0 # v10: v00+v03
- - psubw scratch2, %mm5 # v13: v00-v03
- - movq %mm3, scratch3 # scratc3: v02
- + paddw MUNG(scratch2), %mm0 # v10: v00+v03
- + psubw MUNG(scratch2), %mm5 # v13: v00-v03
- + movq %mm3, MUNG(scratch3) # scratc3: v02
- movq %mm1, %mm3 # duplicate v01
-
- - paddw scratch3, %mm1 # v11: v01+v02
- - psubw scratch3, %mm3 # v12: v01-v02
- + paddw MUNG(scratch3), %mm1 # v11: v01+v02
- + psubw MUNG(scratch3), %mm3 # v12: v01-v02
-
- - movq %mm6, scratch4 # scratc4: v05
- + movq %mm6, MUNG(scratch4) # scratc4: v05
- movq %mm0, %mm6 # duplicate v10
-
- paddw %mm1, %mm0 # v10+v11
- @@ -213,10 +218,10 @@ _dv_dct_88_block_mmx:
- movq %mm6, 16*4(%esi) # out4: v10-v11
-
- movq %mm4, %mm0 # mm0: v06
- - paddw scratch4, %mm4 # v15: v05+v06
- + paddw MUNG(scratch4), %mm4 # v15: v05+v06
- paddw %mm2, %mm0 # v16: v07+v06
-
- - pmulhw WA3, %mm4 # v35~: WA3*v15
- + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
- psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
-
- movq %mm4, %mm6 # duplicate v35
- @@ -225,7 +230,7 @@ _dv_dct_88_block_mmx:
-
- paddw %mm5, %mm3 # v22: v12+v13
-
- - pmulhw WA1, %mm3 # v32~: WA3*v15
- + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15
- psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
- movq %mm5, %mm6 # duplicate v13
-
- @@ -235,13 +240,13 @@ _dv_dct_88_block_mmx:
- movq %mm5, 16*2(%esi) # out2: v13+v32
- movq %mm6, 16*6(%esi) # out6: v13-v32
-
- - paddw scratch4, %mm7 # v14n: v04+v05
- + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
- movq %mm0, %mm5 # duplicate v16
-
- psubw %mm7, %mm0 # va1: v16-v14n
- - pmulhw WA2, %mm7 # v34~~: v14n*WA2
- - pmulhw WA5, %mm0 # va0~: va1*WA5
- - pmulhw WA4, %mm5 # v36~~: v16*WA4
- + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
- + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
- + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
- psllw $16-NSHIFT, %mm7
- psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
- # scale note that WA4 is shifted 1 bit less than the others
- @@ -748,11 +755,12 @@ _dv_dct_block_mmx_postscale_88:
- _dv_dct_248_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
-
- - movl 8(%ebp), %esi # source
- + LOAD_PIC_REG(bp)
- +
- + movl 16(%esp), %esi # source
-
- # column 0
-
- @@ -779,7 +789,7 @@ _dv_dct_248_block_mmx:
- paddw %mm1, %mm0 # v20: v10+v11
- psubw %mm1, %mm3 # v21: v10-v11
-
- - pmulhw WA1, %mm5 # v32~: WA1*v22
- + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
- movq %mm4, %mm2
- psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
-
- @@ -818,7 +828,7 @@ _dv_dct_248_block_mmx:
- paddw %mm1, %mm0 # v20: v10+v11
- psubw %mm1, %mm3 # v21: v10-v11
-
- - pmulhw WA1, %mm5 # v32~: WA1*v22
- + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
- movq %mm4, %mm2
- psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
-
- @@ -855,7 +865,7 @@ _dv_dct_248_block_mmx:
- paddw %mm1, %mm0 # v20: v10+v11
- psubw %mm1, %mm3 # v21: v10-v11
-
- - pmulhw WA1, %mm5 # v32~: WA1*v22
- + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
- movq %mm4, %mm2
- psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
-
- @@ -892,7 +902,7 @@ _dv_dct_248_block_mmx:
- paddw %mm1, %mm0 # v20: v10+v11
- psubw %mm1, %mm3 # v21: v10-v11
-
- - pmulhw WA1, %mm5 # v32~: WA1*v22
- + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
- movq %mm4, %mm2
- psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
-
- --- libdv-0.104-old/libdv/dv.c
- +++ libdv-0.104/libdv/dv.c
- @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
- } /* dv_reconfigure */
-
-
- +extern uint8_t dv_quant_offset[4];
- +extern uint8_t dv_quant_shifts[22][4];
- +
- static inline void
- dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
- int i;
- @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
- dv_idct_248 (co248, mb->b[i].coeffs);
- } else {
- #if ARCH_X86
- - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
- + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
- _dv_idct_88(mb->b[i].coeffs);
- #elif ARCH_X86_64
- _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
- @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
- dv_idct_248 (co248, mb->b[b].coeffs);
- } else {
- #if ARCH_X86
- - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
- + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
- _dv_weight_88_inverse(bl->coeffs);
- _dv_idct_88(bl->coeffs);
- #elif ARCH_X86_64
- --- libdv-0.104-old/libdv/encode.c
- +++ libdv-0.104/libdv/encode.c
- @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
- }
-
- extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
- - dv_vlc_entry_t ** out);
- + dv_vlc_entry_t ** out,
- + dv_vlc_entry_t * lookup);
-
- extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
- dv_vlc_entry_t ** out);
- @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
- #elif ARCH_X86
- int num_bits;
-
- - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
- + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
- emms();
- #else
- int num_bits;
- @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
- return num_bits;
- }
-
- -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
- +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
- extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
-
- extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
- @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
- #elif ARCH_X86_64
- return _dv_vlc_num_bits_block_x86_64(coeffs);
- #else
- - return _dv_vlc_num_bits_block_x86(coeffs);
- + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
- #endif
- }
-
- --- libdv-0.104-old/libdv/encode_x86.S
- +++ libdv-0.104/libdv/encode_x86.S
- @@ -23,9 +23,6 @@
- * The libdv homepage is http://libdv.sourceforge.net/.
- */
-
- -.data
- -ALLONE: .word 1,1,1,1
- -VLCADDMASK: .byte 255,0,0,0,255,0,0,0
-
-
- .section .note.GNU-stack, "", @progbits
- @@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx:
-
- movl $63, %ecx
-
- - movl vlc_encode_lookup, %esi
- + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
-
- pxor %mm0, %mm0
- pxor %mm2, %mm2
- - movq VLCADDMASK, %mm1
- + pushl $0x000000FF # these four lines
- + pushl $0x000000FF # load VLCADDMASK
- + movq (%esp), %mm1 # into %mm1 off the stack
- + addl $8, %esp # --> no TEXTRELs
- xorl %ebp, %ebp
- subl $8, %edx
- vlc_encode_block_mmx_loop:
- @@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86:
- addl $2, %edi
-
- movl $63, %ecx
- - movl vlc_num_bits_lookup, %esi
- + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
-
- vlc_num_bits_block_x86_loop:
- movw (%edi), %ax
- @@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows:
- paddw %mm5, %mm1
-
- paddw %mm1, %mm0
- -
- - pmaddwd ALLONE, %mm0
- +
- + pushl $0x00010001 # these four lines
- + pushl $0x00010001 # load ALLONE
- + pmaddwd (%esp), %mm0 # into %mm0 off the stack
- + addl $8, %esp # --> no TEXTRELs
- movq %mm0, %mm1
- psrlq $32, %mm1
- paddd %mm1, %mm0
- --- libdv-0.104-old/libdv/idct_block_mmx.S
- +++ libdv-0.104/libdv/idct_block_mmx.S
- @@ -8,17 +8,21 @@
-
- .section .note.GNU-stack, "", @progbits
-
- +#include "asm_common.S"
- +
- .text
- .align 4
- .global _dv_idct_block_mmx
- .hidden _dv_idct_block_mmx
- .type _dv_idct_block_mmx,@function
- _dv_idct_block_mmx:
- pushl %ebp
- - movl %esp,%ebp
- pushl %esi
- - leal preSC, %ecx
- - movl 8(%ebp),%esi /* source matrix */
- +
- + LOAD_PIC_REG(bp)
- +
- + leal MUNG(preSC), %ecx
- + movl 12(%esp),%esi /* source matrix */
-
- /*
- * column 0: even part
- @@ -35,7 +41,7 @@ _dv_idct_block_mmx:
- movq %mm1, %mm2 /* added 11/1/96 */
- pmulhw 8*8(%esi),%mm5 /* V8 */
- psubsw %mm0, %mm1 /* V16 */
- - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
- + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
- paddsw %mm0, %mm2 /* V17 */
- movq %mm2, %mm0 /* duplicate V17 */
- psraw $1, %mm2 /* t75=t82 */
- @@ -76,7 +82,7 @@ _dv_idct_block_mmx:
- paddsw %mm0, %mm3 /* V29 ; free mm0 */
- movq %mm7, %mm1 /* duplicate V26 */
- psraw $1, %mm3 /* t91=t94 */
- - pmulhw x539f539f539f539f,%mm7 /* V33 */
- + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
- psraw $1, %mm1 /* t96 */
- movq %mm5, %mm0 /* duplicate V2 */
- psraw $2, %mm4 /* t85=t87 */
- @@ -84,15 +90,15 @@ _dv_idct_block_mmx:
- psubsw %mm4, %mm0 /* V28 ; free mm4 */
- movq %mm0, %mm2 /* duplicate V28 */
- psraw $1, %mm5 /* t90=t93 */
- - pmulhw x4546454645464546,%mm0 /* V35 */
- + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
- psraw $1, %mm2 /* t97 */
- movq %mm5, %mm4 /* duplicate t90=t93 */
- psubsw %mm2, %mm1 /* V32 ; free mm2 */
- - pmulhw x61f861f861f861f8,%mm1 /* V36 */
- + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
- psllw $1, %mm7 /* t107 */
- paddsw %mm3, %mm5 /* V31 */
- psubsw %mm3, %mm4 /* V30 ; free mm3 */
- - pmulhw x5a825a825a825a82,%mm4 /* V34 */
- + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
- nop
- psubsw %mm1, %mm0 /* V38 */
- psubsw %mm7, %mm1 /* V37 ; free mm7 */
- @@ -159,7 +165,7 @@ _dv_idct_block_mmx:
- psubsw %mm7, %mm1 /* V50 */
- pmulhw 8*9(%esi), %mm5 /* V9 */
- paddsw %mm7, %mm2 /* V51 */
- - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
- + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
- movq %mm2, %mm6 /* duplicate V51 */
- psraw $1, %mm2 /* t138=t144 */
- movq %mm3, %mm4 /* duplicate V1 */
- @@ -200,11 +206,11 @@ _dv_idct_block_mmx:
- * even more by doing the correction step in a later stage when the number
- * is actually multiplied by 16
- */
- - paddw x0005000200010001, %mm4
- + paddw MUNG(x0005000200010001), %mm4
- psubsw %mm6, %mm3 /* V60 ; free mm6 */
- psraw $1, %mm0 /* t154=t156 */
- movq %mm3, %mm1 /* duplicate V60 */
- - pmulhw x539f539f539f539f, %mm1 /* V67 */
- + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
- movq %mm5, %mm6 /* duplicate V3 */
- psraw $2, %mm4 /* t148=t150 */
- paddsw %mm4, %mm5 /* V61 */
- @@ -213,13 +219,13 @@ _dv_idct_block_mmx:
- psllw $1, %mm1 /* t169 */
- paddsw %mm0, %mm5 /* V65 -> result */
- psubsw %mm0, %mm4 /* V64 ; free mm0 */
- - pmulhw x5a825a825a825a82, %mm4 /* V68 */
- + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
- psraw $1, %mm3 /* t158 */
- psubsw %mm6, %mm3 /* V66 */
- movq %mm5, %mm2 /* duplicate V65 */
- - pmulhw x61f861f861f861f8, %mm3 /* V70 */
- + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
- psllw $1, %mm6 /* t165 */
- - pmulhw x4546454645464546, %mm6 /* V69 */
- + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
- psraw $1, %mm2 /* t172 */
- /* moved from next block */
- movq 8*5(%esi), %mm0 /* V56 */
- @@ -344,7 +350,7 @@ _dv_idct_block_mmx:
- * movq 8*13(%esi), %mm4 tmt13
- */
- psubsw %mm4, %mm3 /* V134 */
- - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
- + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
- movq 8*9(%esi), %mm6 /* tmt9 */
- paddsw %mm4, %mm5 /* V135 ; mm4 free */
- movq %mm0, %mm4 /* duplicate tmt1 */
- @@ -373,17 +379,17 @@ _dv_idct_block_mmx:
- psubsw %mm7, %mm0 /* V144 */
- movq %mm0, %mm3 /* duplicate V144 */
- paddsw %mm7, %mm2 /* V147 ; free mm7 */
- - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
- + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
- movq %mm1, %mm7 /* duplicate tmt3 */
- paddsw %mm5, %mm7 /* V145 */
- psubsw %mm5, %mm1 /* V146 ; free mm5 */
- psubsw %mm1, %mm3 /* V150 */
- movq %mm7, %mm5 /* duplicate V145 */
- - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
- + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
- psubsw %mm2, %mm5 /* V148 */
- - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
- + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
- psllw $2, %mm0 /* t311 */
- - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
- + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
- paddsw %mm2, %mm7 /* V149 ; free mm2 */
- psllw $1, %mm1 /* t313 */
- nop /* without the nop - freeze here for one clock */
- @@ -409,7 +415,7 @@ _dv_idct_block_mmx:
- paddsw %mm3, %mm6 /* V164 ; free mm3 */
- movq %mm4, %mm3 /* duplicate V142 */
- psubsw %mm5, %mm4 /* V165 ; free mm5 */
- - movq %mm2, scratch7 /* out7 */
- + movq %mm2, MUNG(scratch7) /* out7 */
- psraw $4, %mm6
- psraw $4, %mm4
- paddsw %mm5, %mm3 /* V162 */
- @@ -420,11 +426,11 @@ _dv_idct_block_mmx:
- */
- movq %mm6, 8*9(%esi) /* out9 */
- paddsw %mm1, %mm0 /* V161 */
- - movq %mm3, scratch5 /* out5 */
- + movq %mm3, MUNG(scratch5) /* out5 */
- psubsw %mm1, %mm5 /* V166 ; free mm1 */
- movq %mm4, 8*11(%esi) /* out11 */
- psraw $4, %mm5
- - movq %mm0, scratch3 /* out3 */
- + movq %mm0, MUNG(scratch3) /* out3 */
- movq %mm2, %mm4 /* duplicate V140 */
- movq %mm5, 8*13(%esi) /* out13 */
- paddsw %mm7, %mm2 /* V160 */
- @@ -434,7 +440,7 @@ _dv_idct_block_mmx:
- /* moved from the next block */
- movq 8*3(%esi), %mm7
- psraw $4, %mm4
- - movq %mm2, scratch1 /* out1 */
- + movq %mm2, MUNG(scratch1) /* out1 */
- /* moved from the next block */
- movq %mm0, %mm1
- movq %mm4, 8*15(%esi) /* out15 */
- @@ -491,15 +497,15 @@ _dv_idct_block_mmx:
- paddsw %mm4, %mm3 /* V113 ; free mm4 */
- movq %mm0, %mm4 /* duplicate V110 */
- paddsw %mm1, %mm2 /* V111 */
- - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
- + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
- psubsw %mm1, %mm5 /* V112 ; free mm1 */
- psubsw %mm5, %mm4 /* V116 */
- movq %mm2, %mm1 /* duplicate V111 */
- - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
- + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
- psubsw %mm3, %mm2 /* V114 */
- - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
- + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
- paddsw %mm3, %mm1 /* V115 ; free mm3 */
- - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
- + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
- psllw $2, %mm0 /* t266 */
- movq %mm1, (%esi) /* save V115 */
- psllw $1, %mm5 /* t268 */
- @@ -517,7 +523,7 @@ _dv_idct_block_mmx:
- movq %mm6, %mm3 /* duplicate tmt4 */
- psubsw %mm0, %mm6 /* V100 */
- paddsw %mm0, %mm3 /* V101 ; free mm0 */
- - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
- + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
- movq %mm7, %mm5 /* duplicate tmt0 */
- movq 8*8(%esi), %mm1 /* tmt8 */
- paddsw %mm1, %mm7 /* V103 */
- @@ -551,10 +557,10 @@ _dv_idct_block_mmx:
- movq 8*2(%esi), %mm3 /* V123 */
- paddsw %mm4, %mm7 /* out0 */
- /* moved up from next block */
- - movq scratch3, %mm0
- + movq MUNG(scratch3), %mm0
- psraw $4, %mm7
- /* moved up from next block */
- - movq scratch5, %mm6
- + movq MUNG(scratch5), %mm6
- psubsw %mm4, %mm1 /* out14 ; free mm4 */
- paddsw %mm3, %mm5 /* out2 */
- psraw $4, %mm1
- @@ -565,7 +571,7 @@ _dv_idct_block_mmx:
- movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
- psraw $4, %mm2
- /* moved up to the prev block */
- - movq scratch7, %mm4
- + movq MUNG(scratch7), %mm4
- /* moved up to the prev block */
- psraw $4, %mm0
- movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
- @@ -579,7 +585,7 @@ _dv_idct_block_mmx:
- * psraw $4, %mm0
- * psraw $4, %mm6
- */
- - movq scratch1, %mm1
- + movq MUNG(scratch1), %mm1
- psraw $4, %mm4
- movq %mm0, 8*3(%esi) /* out3 */
- psraw $4, %mm1
- --- libdv-0.104-old/libdv/parse.c
- +++ libdv-0.104/libdv/parse.c
- @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
- exit(0);
- #endif
- } /* dv_parse_ac_coeffs */
- +#if defined __GNUC__ && __ELF__
- +# define dv_strong_hidden_alias(name, aliasname) \
- + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
- +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
- +#else
- +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
- +#endif
-
- /* ---------------------------------------------------------------------------
- */
- --- libdv-0.104-old/libdv/quant.c
- +++ libdv-0.104/libdv/quant.c
- @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
- uint32_t dv_quant_248_mul_tab [2] [22] [64];
- uint32_t dv_quant_88_mul_tab [2] [22] [64];
-
- -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
- +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
- extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
- static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
- static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
- @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
- _dv_quant_x86_64(block, qno, klass);
- emms();
- #else
- - _dv_quant_x86(block, qno, klass);
- + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
- emms();
- #endif
- }
- --- libdv-0.104-old/libdv/quant.h
- +++ libdv-0.104/libdv/quant.h
- @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
- extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
- extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
- dv_248_coeff_t *co);
- -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
- +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
- extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
- extern void dv_quant_init (void);
- #ifdef __cplusplus
- --- libdv-0.104-old/libdv/quant_x86.S
- +++ libdv-0.104/libdv/quant_x86.S
- @@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86:
-
- /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
- movl ARGn(1),%eax /* qno */
- + movl ARGn(3),%ebx /* dv_quant_offset */
- + addl ARGn(2),%ebx /* class */
- + movzbl (%ebx),%ecx
- movl ARGn(2),%ebx /* class */
- - movzbl dv_quant_offset(%ebx),%ecx
- addl %ecx,%eax
- - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
- + movl ARGn(4),%edx /* dv_quant_shifts */
- + leal (%edx,%eax,4),%edx /* edx is pq */
-
- /* extra = (class == 3); */
- /* 0 1 2 3 */
- @@ -212,11 +219,13 @@ _dv_quant_x86:
-
- /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
- movl ARGn(1),%eax /* qno */
- + movl ARGn(3),%ebx /* offset */
- + addl ARGn(2),%ebx /* class */
- + movzbl (%ebx),%ecx
- movl ARGn(2),%ebx /* class */
- -
- - movzbl dv_quant_offset(%ebx),%ecx
- + movl ARGn(4),%edx /* shifts */
- addl %ecx,%eax
- - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
- + leal (%edx,%eax,4),%edx /* edx is pq */
-
- /* extra = (class == 3); */
- /* 0 1 2 3 */
- --- libdv-0.104-old/libdv/rgbtoyuv.S
- +++ libdv-0.104/libdv/rgbtoyuv.S
- @@ -41,9 +41,6 @@
- #define DV_WIDTH_SHORT_HALF 720
- #define DV_WIDTH_BYTE_HALF 360
-
- -.global _dv_rgbtoycb_mmx
- -# .global yuvtoycb_mmx
- -
- .data
-
- .align 8
- @@ -110,25 +107,26 @@ VR0GR: .long 0,0
- VBG0B: .long 0,0
-
- #endif
- -
- +
- +#include "asm_common.S"
- +
- .section .note.GNU-stack, "", @progbits
-
- .text
-
- -#define _inPtr 8
- -#define _rows 12
- -#define _columns 16
- -#define _outyPtr 20
- -#define _outuPtr 24
- -#define _outvPtr 28
- +#define _inPtr 24+8
- +#define _rows 24+12
- +#define _columns 24+16
- +#define _outyPtr 24+20
- +#define _outuPtr 24+24
- +#define _outvPtr 24+28
-
- .global _dv_rgbtoycb_mmx
- .hidden _dv_rgbtoycb_mmx
- .type _dv_rgbtoycb_mmx,@function
- _dv_rgbtoycb_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %eax
- pushl %ebx
- pushl %ecx
- @@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx:
- pushl %esi
- pushl %edi
-
- - leal ZEROSX, %eax #This section gets around a bug
- + LOAD_PIC_REG(bp)
- +
- + leal MUNG(ZEROSX), %eax #This section gets around a bug
- movq (%eax), %mm0 #unlikely to persist
- - movq %mm0, ZEROS
- - leal OFFSETDX, %eax
- + movq %mm0, MUNG(ZEROS)
- + leal MUNG(OFFSETDX), %eax
- movq (%eax), %mm0
- - movq %mm0, OFFSETD
- - leal OFFSETWX, %eax
- + movq %mm0, MUNG(OFFSETD)
- + leal MUNG(OFFSETWX), %eax
- movq (%eax), %mm0
- - movq %mm0, OFFSETW
- - leal OFFSETBX, %eax
- + movq %mm0, MUNG(OFFSETW)
- + leal MUNG(OFFSETBX), %eax
- movq (%eax), %mm0
- - movq %mm0, OFFSETB
- - leal YR0GRX, %eax
- + movq %mm0, MUNG(OFFSETB)
- + leal MUNG(YR0GRX), %eax
- movq (%eax), %mm0
- - movq %mm0, YR0GR
- - leal YBG0BX, %eax
- + movq %mm0, MUNG(YR0GR)
- + leal MUNG(YBG0BX), %eax
- movq (%eax), %mm0
- - movq %mm0, YBG0B
- - leal UR0GRX, %eax
- + movq %mm0, MUNG(YBG0B)
- + leal MUNG(UR0GRX), %eax
- movq (%eax), %mm0
- - movq %mm0, UR0GR
- - leal UBG0BX, %eax
- + movq %mm0, MUNG(UR0GR)
- + leal MUNG(UBG0BX), %eax
- movq (%eax), %mm0
- - movq %mm0, UBG0B
- - leal VR0GRX, %eax
- + movq %mm0, MUNG(UBG0B)
- + leal MUNG(VR0GRX), %eax
- movq (%eax), %mm0
- - movq %mm0, VR0GR
- - leal VBG0BX, %eax
- + movq %mm0, MUNG(VR0GR)
- + leal MUNG(VBG0BX), %eax
- movq (%eax), %mm0
- - movq %mm0, VBG0B
- -
- - movl _rows(%ebp), %eax
- - movl _columns(%ebp), %ebx
- + movq %mm0, MUNG(VBG0B)
- + movl _rows(%esp), %eax
- + movl _columns(%esp), %ebx
- mull %ebx #number pixels
- shrl $3, %eax #number of loops
- movl %eax, %edi #loop counter in edi
- - movl _inPtr(%ebp), %eax
- - movl _outyPtr(%ebp), %ebx
- - movl _outuPtr(%ebp), %ecx
- - movl _outvPtr(%ebp), %edx
- + movl _inPtr(%esp), %eax
- + movl _outyPtr(%esp), %ebx
- + movl _outuPtr(%esp), %ecx
- + movl _outvPtr(%esp), %edx
- rgbtoycb_mmx_loop:
- movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
- pxor %mm6, %mm6 #0 -> mm6
- @@ -184,29 +186,29 @@ rgbtoycb_mmx_loop:
- punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
- movq %mm0, %mm2 #R1B0G0R0 -> mm2
-
- - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
- + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0
- movq %mm1, %mm3 #B1G1R1B0 -> mm3
-
- - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
- + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1
- movq %mm2, %mm4 #R1B0G0R0 -> mm4
-
- - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
- + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2
- movq %mm3, %mm5 #B1G1R1B0 -> mm5
-
- - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
- + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3
- punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
-
- - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
- + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4
- paddd %mm1, %mm0 #Y1Y0 -> mm0
-
- - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
- + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5
-
- movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
- paddd %mm3, %mm2 #U1U0 -> mm2
-
- movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
-
- - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
- + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1
- paddd %mm5, %mm4 #V1V0 -> mm4
-
- movq %mm1, %mm5 #B3G3R3B2 -> mm5
- @@ -214,29 +216,29 @@ rgbtoycb_mmx_loop:
-
- paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
-
- - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
- + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6
- movq %mm1, %mm3 #R3B2G2R2 -> mm3
-
- - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
- + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1
- movq %mm5, %mm7 #B3G3R3B2 -> mm7
-
- - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
- + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5
- psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
-
- - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
- + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0
- movq %mm3, %mm6 #R3B2G2R2 -> mm6
- - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
- + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6
- psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
-
- paddd %mm5, %mm1 #Y3Y2 -> mm1
- movq %mm7, %mm5 #B3G3R3B2 -> mm5
- - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
- + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2
- psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
-
- - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
- + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2
- packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
-
- - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
- + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5
- psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
-
- movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
- @@ -251,58 +253,58 @@ rgbtoycb_mmx_loop:
- movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
- psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
-
- - paddw OFFSETY, %mm0
- + paddw MUNG(OFFSETY), %mm0
- movq %mm0, (%ebx) #store Y3Y2Y1Y0
- packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
-
- - movq TEMP0, %mm0 #R5B4G4R4 -> mm0
- + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0
- addl $8, %ebx
- -
- - punpcklbw ZEROS, %mm7 #B5G500 -> mm7
- +
- + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7
- movq %mm0, %mm6 #R5B4G4R4 -> mm6
-
- - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
- + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU
- psrlq $32, %mm0 #00R5B4 -> mm0
-
- paddw %mm0, %mm7 #B5G5R5B4 -> mm7
- movq %mm6, %mm2 #B5B4G4R4 -> mm2
-
- - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
- + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2
- movq %mm7, %mm0 #B5G5R5B4 -> mm0
-
- - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
- + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7
- packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
-
- addl $24, %eax #increment RGB count
-
- - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
- + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4
- movq %mm6, %mm4 #B5B4G4R4 -> mm4
-
- - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
- + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4
- movq %mm0, %mm3 #B5G5R5B4 -> mm0
-
- - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
- + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4
- paddd %mm7, %mm2 #Y5Y4 -> mm2
-
- - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
- + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4
- pxor %mm7, %mm7 #0 -> mm7
-
- - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
- + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3
- punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
-
- paddd %mm6, %mm0 #U5U4 -> mm0
- movq %mm1, %mm6 #B7G7R7B6 -> mm6
-
- - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
- + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6
- punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
-
- movq %mm5, %mm7 #R7B6G6R6 -> mm7
- paddd %mm4, %mm3 #V5V4 -> mm3
-
- - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
- + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5
- movq %mm1, %mm4 #B7G7R7B6 -> mm4
-
- - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
- + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4
- psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
-
- psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
- @@ -310,25 +312,25 @@ rgbtoycb_mmx_loop:
- paddd %mm5, %mm6 #Y7Y6 -> mm6
- movq %mm7, %mm5 #R7B6G6R6 -> mm5
-
- - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
- + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7
- psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
-
- - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
- + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1
- psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
-
- packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
-
- - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
- + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5
- paddd %mm4, %mm7 #U7U6 -> mm7
-
- psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
- - paddw OFFSETY, %mm2
- + paddw MUNG(OFFSETY), %mm2
- movq %mm2, (%ebx) #store Y7Y6Y5Y4
-
- - movq ALLONE, %mm6
- + movq MUNG(ALLONE), %mm6
- packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
-
- - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
- + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4
- pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
-
- pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
- @@ -338,8 +340,8 @@ rgbtoycb_mmx_loop:
-
- psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
- psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
- -
- - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
- +
- + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5
-
- movq %mm4, (%ecx) # store U
-
- @@ -422,14 +426,15 @@ _dv_ppm_copy_y_block_mmx:
- _dv_pgm_copy_y_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
-
- - movq OFFSETY, %mm7
- + LOAD_PIC_REG(bp)
- +
- + movl 16(%esp), %edi # dest
- + movl 20(%esp), %esi # src
- +
- + movq MUNG(OFFSETY), %mm7
- pxor %mm6, %mm6
-
- movq (%esi), %mm0
- @@ -564,14 +571,15 @@ _dv_pgm_copy_y_block_mmx:
- _dv_video_copy_y_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
-
- - movq OFFSETBX, %mm7
- + LOAD_PIC_REG(bp)
- +
- + movl 16(%esp), %edi # dest
- + movl 20(%esp), %esi # src
- +
- + movq MUNG(OFFSETBX), %mm7
- pxor %mm6, %mm6
-
- movq (%esi), %mm0
- @@ -852,16 +864,16 @@ _dv_ppm_copy_pal_c_block_mmx:
- _dv_pgm_copy_pal_c_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
-
- + LOAD_PIC_REG(bp)
- +
- + movl 20(%esp), %edi # dest
- + movl 24(%esp), %esi # src
-
- - movq OFFSETBX, %mm7
- + movq MUNG(OFFSETBX), %mm7
- pxor %mm6, %mm6
-
-
- @@ -1000,15 +1014,16 @@ _dv_pgm_copy_pal_c_block_mmx:
- _dv_video_copy_pal_c_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
-
- - movq OFFSETBX, %mm7
- + LOAD_PIC_REG(bp)
- +
- + movl 20(%esp), %edi # dest
- + movl 24(%esp), %esi # src
- +
- + movq MUNG(OFFSETBX), %mm7
- paddw %mm7, %mm7
- pxor %mm6, %mm6
-
- @@ -1095,18 +1112,18 @@ video_copy_pal_c_block_mmx_loop:
- _dv_ppm_copy_ntsc_c_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
- +
- + LOAD_PIC_REG(bp)
- +
- + movl 20(%esp), %edi # dest
- + movl 24(%esp), %esi # src
-
- movl $4, %ebx
-
- - movq ALLONE, %mm6
- -
- + movq MUNG(ALLONE), %mm6
- ppm_copy_ntsc_c_block_mmx_loop:
-
- movq (%esi), %mm0
- @@ -1168,14 +1187,15 @@ ppm_copy_ntsc_c_block_mmx_loop:
- _dv_pgm_copy_ntsc_c_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
-
- - movq OFFSETBX, %mm7
- + LOAD_PIC_REG(bp)
- +
- + movl 16(%esp), %edi # dest
- + movl 20(%esp), %esi # src
- +
- + movq MUNG(OFFSETBX), %mm7
- paddw %mm7, %mm7
- pxor %mm6, %mm6
-
- @@ -1325,15 +1347,16 @@ _dv_pgm_copy_ntsc_c_block_mmx:
- _dv_video_copy_ntsc_c_block_mmx:
-
- pushl %ebp
- - movl %esp, %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
- -
- - movl 8(%ebp), %edi # dest
- - movl 12(%ebp), %esi # src
-
- - movq OFFSETBX, %mm7
- + LOAD_PIC_REG(bp)
- +
- + movl 20(%esp), %edi # dest
- + movl 24(%esp), %esi # src
- +
- + movq MUNG(OFFSETBX), %mm7
- paddw %mm7, %mm7
- pxor %mm6, %mm6
-
- --- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S
- +++ libdv-0.104/libdv/rgbtoyuv_x86_64.S
- @@ -41,9 +41,6 @@
- #define DV_WIDTH_SHORT_HALF 720
- #define DV_WIDTH_BYTE_HALF 360
-
- -.global _dv_rgbtoycb_mmx_x86_64
- -# .global yuvtoycb_mmx_x86_64
- -
- .data
-
- .align 8
- --- libdv-0.104-old/libdv/vlc_x86.S
- +++ libdv-0.104/libdv/vlc_x86.S
- @@ -1,31 +1,39 @@
- #include "asmoff.h"
- .section .note.GNU-stack, "", @progbits
- + #include "asm_common.S"
-
- .text
- .align 4
- .globl dv_decode_vlc
- +.globl asm_dv_decode_vlc
- +.hidden asm_dv_decode_vlc
- +asm_dv_decode_vlc = dv_decode_vlc
- +
- .type dv_decode_vlc,@function
- dv_decode_vlc:
- pushl %ebx
- + pushl %ebp
- +
- + LOAD_PIC_REG(bp)
-
- - /* Args are at 8(%esp). */
- - movl 8(%esp),%eax /* %eax is bits */
- - movl 12(%esp),%ebx /* %ebx is maxbits */
- + /* Args are at 12(%esp). */
- + movl 12(%esp),%eax /* %eax is bits */
- + movl 16(%esp),%ebx /* %ebx is maxbits */
- andl $0x3f,%ebx /* limit index range STL*/
-
- - movl dv_vlc_class_index_mask(,%ebx,4),%edx
- + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
- andl %eax,%edx
- - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
- + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
- sarl %cl,%edx
- - movl dv_vlc_classes(,%ebx,4),%ecx
- + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
- movsbl (%ecx,%edx,1),%edx /* %edx is class */
-
- - movl dv_vlc_index_mask(,%edx,4),%ebx
- - movl dv_vlc_index_rshift(,%edx,4),%ecx
- + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
- + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
- andl %eax,%ebx
- sarl %cl,%ebx
-
- - movl dv_vlc_lookups(,%edx,4),%edx
- + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
- movl (%edx,%ebx,4),%edx
-
- /* Now %edx holds result, like this:
- @@ -42,7 +51,7 @@ dv_decode_vlc:
- movl %edx,%ecx
- sarl $8,%ecx
- andl $0xff,%ecx
- - movl sign_mask(,%ecx,4),%ebx
- + movl MUNG_ARR(sign_mask,%ecx,4),%ebx
- andl %ebx,%eax
- negl %eax
- sarl $31,%eax
- @@ -63,14 +72,14 @@ dv_decode_vlc:
- *result = broken;
- Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
- */
- - movl 12(%esp),%ebx /* %ebx is maxbits */
- + movl 16(%esp),%ebx /* %ebx is maxbits */
- subl %ecx,%ebx
- sbbl %ebx,%ebx
- orl %ebx,%edx
-
- - movl 16(%esp),%eax
- + movl 20(%esp),%eax
- movl %edx,(%eax)
- -
- + popl %ebp
- popl %ebx
- ret
-
- @@ -80,21 +89,28 @@ dv_decode_vlc:
- .type __dv_decode_vlc,@function
- __dv_decode_vlc:
- pushl %ebx
- + pushl %ebp
- +
- + LOAD_PIC_REG(bp)
-
- - /* Args are at 8(%esp). */
- - movl 8(%esp),%eax /* %eax is bits */
- + /* Args are at 12(%esp). */
- + movl 12(%esp),%eax /* %eax is bits */
-
- movl %eax,%edx /* %edx is class */
- andl $0xfe00,%edx
- sarl $9,%edx
- +#ifdef __PIC__
- + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
- +#else
- movsbl dv_vlc_class_lookup5(%edx),%edx
- -
- - movl dv_vlc_index_mask(,%edx,4),%ebx
- - movl dv_vlc_index_rshift(,%edx,4),%ecx
- +#endif
- +
- + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
- + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
- andl %eax,%ebx
- sarl %cl,%ebx
-
- - movl dv_vlc_lookups(,%edx,4),%edx
- + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
- movl (%edx,%ebx,4),%edx
-
- /* Now %edx holds result, like this:
- @@ -112,7 +128,7 @@ __dv_decode_vlc:
- movl %edx,%ecx
- sarl $8,%ecx
- andl $0xff,%ecx
- - movl sign_mask(,%ecx,4),%ecx
- + movl MUNG_ARR(sign_mask,%ecx,4),%ecx
- andl %ecx,%eax
- negl %eax
- sarl $31,%eax
- @@ -127,9 +143,9 @@ __dv_decode_vlc:
- xorl %eax,%edx
- subl %eax,%edx
-
- - movl 12(%esp),%eax
- + movl 16(%esp),%eax
- movl %edx,(%eax)
- -
- + popl %ebp
- popl %ebx
- ret
-
- @@ -140,14 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
- */
- .text
- .align 4
- +.globl asm_dv_parse_ac_coeffs_pass0
- +.hidden asm_dv_parse_ac_coeffs_pass0
- + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
- +
- .globl dv_parse_ac_coeffs_pass0
- .type dv_parse_ac_coeffs_pass0,@function
- dv_parse_ac_coeffs_pass0:
- pushl %ebx
- pushl %edi
- pushl %esi
- pushl %ebp
-
- + LOAD_PIC_REG(si)
- +
- #define ARGn(N) (20+(4*(N)))(%esp)
-
- /*
- @@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0:
- ebp bl
- */
- movl ARGn(2),%ebp
- +#ifndef __PIC__
- movl ARGn(0),%esi
- movl bitstream_t_buf(%esi),%esi
- +#endif
- movl dv_block_t_offset(%ebp),%edi
- movl dv_block_t_reorder(%ebp),%ebx
-
- @@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0:
-
- movq dv_block_t_coeffs(%ebp),%mm1
- pxor %mm0,%mm0
- +#ifdef __PIC__
- + pand const_f_0_0_0@GOTOFF(%esi),%mm1
- +#else
- pand const_f_0_0_0,%mm1
- +#endif
- movq %mm1,dv_block_t_coeffs(%ebp)
- movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
- movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
- @@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0:
- readloop:
- movl %edi,%ecx
- shrl $3,%ecx
- +#ifdef __PIC__
- + movl ARGn(0),%eax
- + addl bitstream_t_buf(%eax),%ecx
- + movzbl (%ecx),%eax
- + movzbl 1(%ecx),%edx
- + movzbl 2(%ecx),%ecx
- +#else
- movzbl (%esi,%ecx,1),%eax
- movzbl 1(%esi,%ecx,1),%edx
- movzbl 2(%esi,%ecx,1),%ecx
- +#endif
- shll $16,%eax
- shll $8,%edx
- orl %ecx,%eax
- @@ -217,7 +254,11 @@ readloop:
-
- /* Attempt to use the shortcut first. If it hits, then
- this vlc term has been decoded. */
- +#ifdef __PIC__
- + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
- +#else
- movl dv_vlc_class1_shortcut(,%ecx,4),%edx
- +#endif
- test $0x80,%edx
- je done_decode
-
- @@ -228,12 +269,19 @@ readloop:
- movl %ebx,dv_block_t_reorder(%ebp)
-
- /* %eax is bits */
- -
- +#ifdef __PIC__
- + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
- +
- + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
- + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
- + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
- +#else
- movsbl dv_vlc_class_lookup5(%ecx),%ecx
-
- movl dv_vlc_index_mask(,%ecx,4),%ebx
- movl dv_vlc_lookups(,%ecx,4),%edx
- movl dv_vlc_index_rshift(,%ecx,4),%ecx
- +#endif
- andl %eax,%ebx
- sarl %cl,%ebx
-
- @@ -256,7 +304,11 @@ readloop:
- movl %edx,%ecx
- sarl $8,%ecx
- andl $0xff,%ecx
- +#ifdef __PIC__
- + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
- +#else
- movl sign_mask(,%ecx,4),%ecx
- +#endif
- andl %ecx,%eax
- negl %eax
- sarl $31,%eax
- @@ -326,10 +378,16 @@ alldone:
-
- slowpath:
- /* slow path: use dv_decode_vlc */;
- +#ifdef __PIC__
- + pushl %esi
- + leal vlc@GOTOFF(%esi),%esi
- + xchgl %esi,(%esp) /* last parameter is &vlc */
- +#else
- pushl $vlc /* last parameter is &vlc */
- +#endif
- pushl %edx /* bits_left */
- pushl %eax /* bits */
- - call dv_decode_vlc
- + call asm_dv_decode_vlc
- addl $12,%esp
- test $0x80,%edx /* If (vlc.run < 0) break */
- jne escape
- @@ -359,6 +417,8 @@ show16:
- pushl %esi
- pushl %ebp
-
- + LOAD_PIC_REG(si)
- +
- #define ARGn(N) (20+(4*(N)))(%esp)
-
- movl ARGn(1),%eax /* quality */
- @@ -373,7 +434,11 @@ dv_parse_video_segment:
- jz its_mono
- movl $6,%ebx
- its_mono:
- +#ifdef __PIC__
- + movl %ebx,n_blocks@GOTOFF(%esi)
- +#else
- movl %ebx,n_blocks
- +#endif
-
- /*
- * ebx seg/b
- @@ -384,15 +449,22 @@ its_mono:
- * ebp bl
- */
- movl ARGn(0),%ebx
- +#ifndef __PIC__
- movl dv_videosegment_t_bs(%ebx),%esi
- movl bitstream_t_buf(%esi),%esi
- +#endif
- leal dv_videosegment_t_mb(%ebx),%edi
-
- movl $0,%eax
- movl $0,%ecx
- macloop:
- +#ifdef __PIC__
- + movl %eax,m@GOTOFF(%esi)
- + movl %ecx,mb_start@GOTOFF(%esi)
- +#else
- movl %eax,m
- movl %ecx,mb_start
- +#endif
-
- movl ARGn(0),%ebx
-
- @@ -400,7 +472,13 @@ macloop:
- /* mb->qno = bitstream_get(bs,4); */
- movl %ecx,%edx
- shr $3,%edx
- +#ifdef __PIC__
- + movl dv_videosegment_t_bs(%ebx),%ecx
- + movl bitstream_t_buf(%ecx),%ecx
- + movzbl 3(%ecx,%edx,1),%edx
- +#else
- movzbl 3(%esi,%edx,1),%edx
- +#endif
- andl $0xf,%edx
- movl %edx,dv_macroblock_t_qno(%edi)
-
- @@ -411,7 +489,11 @@ macloop:
- movl %edx,dv_macroblock_t_eob_count(%edi)
-
- /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
- +#ifdef __PIC__
- + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
- +#else
- movl dv_super_map_vertical(,%eax,4),%edx
- +#endif
- movl dv_videosegment_t_i(%ebx),%ecx
- addl %ecx,%edx
-
- @@ -422,11 +504,20 @@ skarly:
- andl $1,%ecx
- shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
-
- +#ifdef __PIC__
- + leal mod_10@GOTOFF(%esi),%edx
- + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
- +#else
- movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
- +#endif
- movl %edx,dv_macroblock_t_i(%edi)
-
- /* mb->j = dv_super_map_horizontal[m]; */
- +#ifdef __PIC__
- + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
- +#else
- movl dv_super_map_horizontal(,%eax,4),%edx
- +#endif
- movl %edx,dv_macroblock_t_j(%edi)
-
- /* mb->k = seg->k; */
- @@ -445,12 +536,28 @@ blkloop:
- +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
- */
- /* dc = bitstream_get(bs,9); */
- +#ifdef __PIC__
- + movl mb_start@GOTOFF(%esi),%ecx
- +#else
- movl mb_start,%ecx
- +#endif
- shr $3,%ecx
- +#ifdef __PIC__
- + movzbl blk_start@GOTOFF(%esi,%ebx),%edx
- +#else
- movzbl blk_start(%ebx),%edx
- +#endif
- addl %ecx,%edx
- +#ifdef __PIC__
- + movl ARGn(0),%ecx
- + movl dv_videosegment_t_bs(%ecx),%ecx
- + movl bitstream_t_buf(%ecx),%ecx
- + movzbl (%ecx,%edx,1),%eax /* hi byte */
- + movzbl 1(%ecx,%edx,1),%ecx /* lo byte */
- +#else
- movzbl (%esi,%edx,1),%eax /* hi byte */
- movzbl 1(%esi,%edx,1),%ecx /* lo byte */
- +#endif
- shll $8,%eax
- orl %ecx,%eax
-
- @@ -477,7 +584,11 @@ blkloop:
-
- /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
- shll $6,%eax
- +#ifdef __PIC__
- + leal dv_reorder@GOTOFF+1(%esi,%eax),%eax
- +#else
- addl $(dv_reorder+1),%eax
- +#endif
- movl %eax,dv_block_t_reorder(%ebp)
-
- /* bl->reorder_sentinel = bl->reorder + 63; */
- @@ -485,13 +596,22 @@ blkloop:
- movl %eax,dv_block_t_reorder_sentinel(%ebp)
-
- /* bl->offset= mb_start + dv_parse_bit_start[b]; */
- +#ifdef __PIC__
- + movl mb_start@GOTOFF(%esi),%ecx
- + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
- +#else
- movl mb_start,%ecx
- movl dv_parse_bit_start(,%ebx,4),%eax
- +#endif
- addl %ecx,%eax
- movl %eax,dv_block_t_offset(%ebp)
-
- /* bl->end= mb_start + dv_parse_bit_end[b]; */
- +#ifdef __PIC__
- + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
- +#else
- movl dv_parse_bit_end(,%ebx,4),%eax
- +#endif
- addl %ecx,%eax
- movl %eax,dv_block_t_end(%ebp)
-
- @@ -503,7 +623,11 @@ blkloop:
- /* no AC pass. Just zero out the remaining coeffs */
- movq dv_block_t_coeffs(%ebp),%mm1
- pxor %mm0,%mm0
- +#ifdef __PIC__
- + pand const_f_0_0_0@GOTOFF(%esi),%mm1
- +#else
- pand const_f_0_0_0,%mm1
- +#endif
- movq %mm1,dv_block_t_coeffs(%ebp)
- movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
- movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
- @@ -528,18 +652,27 @@ do_ac_pass:
- pushl %ebp
- pushl %edi
- pushl %eax
- - call dv_parse_ac_coeffs_pass0
- + call asm_dv_parse_ac_coeffs_pass0
- addl $12,%esp
- done_ac:
-
- +#ifdef __PIC__
- + movl n_blocks@GOTOFF(%esi),%eax
- +#else
- movl n_blocks,%eax
- +#endif
- addl $dv_block_t_size,%ebp
- incl %ebx
- cmpl %eax,%ebx
- jnz blkloop
-
- +#ifdef __PIC__
- + movl m@GOTOFF(%esi),%eax
- + movl mb_start@GOTOFF(%esi),%ecx
- +#else
- movl m,%eax
- movl mb_start,%ecx
- +#endif
- addl $(8 * 80),%ecx
- addl $dv_macroblock_t_size,%edi
- incl %eax
- @@ -557,7 +690,7 @@ done_ac:
-
- andl $DV_QUALITY_AC_MASK,%eax
- cmpl $DV_QUALITY_AC_2,%eax
- - jz dv_parse_ac_coeffs
- + jz asm_dv_parse_ac_coeffs
- movl $0,%eax
- ret
-
|