libdv-1.0.0-pic.patch 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635
  1. Patch downloaded from
  2. http://bugs.gentoo.org/show_bug.cgi?id=121871
  3. http://bugs.gentoo.org/attachment.cgi?id=98094
  4. --- libdv-0.104-old/libdv/asm_common.S
  5. +++ libdv-0.104/libdv/asm_common.S
  6. @@ -0,0 +1,29 @@
  7. +/* public domain, do what you want */
  8. +
  9. +#ifdef __PIC__
  10. +# define MUNG(sym) sym##@GOTOFF(%ebp)
  11. +# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args)
  12. +#else
  13. +# define MUNG(sym) sym
  14. +# define MUNG_ARR(sym, args...) sym(,##args)
  15. +#endif
  16. +
  17. +#ifdef __PIC__
  18. +# undef __i686 /* gcc define gets in our way */
  19. +# define LOAD_PIC_REG(reg) \
  20. + .ifndef __i686.get_pc_thunk.reg; \
  21. + .section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \
  22. + .global __i686.get_pc_thunk.reg; \
  23. + .hidden __i686.get_pc_thunk.reg; \
  24. + .type __i686.get_pc_thunk.reg,@function; \
  25. + __i686.get_pc_thunk.reg: \
  26. + movl (%esp), %e##reg; \
  27. + ret; \
  28. + .size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \
  29. + .previous; \
  30. + .endif; \
  31. + call __i686.get_pc_thunk.reg; \
  32. + addl $_GLOBAL_OFFSET_TABLE_, %e##reg
  33. +#else
  34. +# define LOAD_PIC_REG(reg)
  35. +#endif
  36. --- libdv-0.104-old/libdv/dct_block_mmx.S
  37. +++ libdv-0.104/libdv/dct_block_mmx.S
  38. @@ -53,19 +53,22 @@ scratch2: .quad 0
  39. .section .note.GNU-stack, "", @progbits
  40. +#include "asm_common.S"
  41. +
  42. .text
  43. .align 8
  44. .global _dv_dct_88_block_mmx
  45. .hidden _dv_dct_88_block_mmx
  46. .type _dv_dct_88_block_mmx,@function
  47. _dv_dct_88_block_mmx:
  48. pushl %ebp
  49. - movl %esp, %ebp
  50. pushl %esi
  51. - movl 8(%ebp), %esi # source
  52. + LOAD_PIC_REG(bp)
  53. +
  54. + movl 12(%esp), %esi # source
  55. # column 0
  56. movq 16*0(%esi), %mm0 # v0
  57. @@ -86,22 +91,22 @@ _dv_dct_88_block_mmx:
  58. movq 16*3(%esi), %mm5 # v3
  59. movq 16*4(%esi), %mm7 # v4
  60. - movq %mm7, scratch1 # scratch1: v4 ;
  61. + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
  62. movq %mm5, %mm7 # duplicate v3
  63. - paddw scratch1, %mm5 # v03: v3+v4
  64. - psubw scratch1, %mm7 # v04: v3-v4
  65. - movq %mm5, scratch2 # scratch2: v03
  66. + paddw MUNG(scratch1), %mm5 # v03: v3+v4
  67. + psubw MUNG(scratch1), %mm7 # v04: v3-v4
  68. + movq %mm5, MUNG(scratch2) # scratch2: v03
  69. movq %mm0, %mm5 # mm5: v00
  70. - paddw scratch2, %mm0 # v10: v00+v03
  71. - psubw scratch2, %mm5 # v13: v00-v03
  72. - movq %mm3, scratch3 # scratch3: v02
  73. + paddw MUNG(scratch2), %mm0 # v10: v00+v03
  74. + psubw MUNG(scratch2), %mm5 # v13: v00-v03
  75. + movq %mm3, MUNG(scratch3) # scratch3: v02
  76. movq %mm1, %mm3 # duplicate v01
  77. - paddw scratch3, %mm1 # v11: v01+v02
  78. - psubw scratch3, %mm3 # v12: v01-v02
  79. + paddw MUNG(scratch3), %mm1 # v11: v01+v02
  80. + psubw MUNG(scratch3), %mm3 # v12: v01-v02
  81. - movq %mm6, scratch4 # scratch4: v05
  82. + movq %mm6, MUNG(scratch4) # scratch4: v05
  83. movq %mm0, %mm6 # duplicate v10
  84. paddw %mm1, %mm0 # v10+v11
  85. @@ -111,10 +116,10 @@ _dv_dct_88_block_mmx:
  86. movq %mm6, 16*4(%esi) # out4: v10-v11
  87. movq %mm4, %mm0 # mm0: v06
  88. - paddw scratch4, %mm4 # v15: v05+v06
  89. + paddw MUNG(scratch4), %mm4 # v15: v05+v06
  90. paddw %mm2, %mm0 # v16: v07+v06
  91. - pmulhw WA3, %mm4 # v35~: WA3*v15
  92. + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
  93. psllw $1, %mm4 # v35: compensate the coeefient scale
  94. movq %mm4, %mm6 # duplicate v35
  95. @@ -123,7 +128,7 @@ _dv_dct_88_block_mmx:
  96. paddw %mm5, %mm3 # v22: v12+v13
  97. - pmulhw WA1, %mm3 # v32~: WA1*v22
  98. + pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22
  99. psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
  100. movq %mm5, %mm6 # duplicate v13
  101. @@ -134,13 +139,13 @@ _dv_dct_88_block_mmx:
  102. movq %mm6, 16*6(%esi) # out6: v13-v32
  103. - paddw scratch4, %mm7 # v14n: v04+v05
  104. + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
  105. movq %mm0, %mm5 # duplicate v16
  106. psubw %mm7, %mm0 # va1: v16-v14n
  107. - pmulhw WA5, %mm0 # va0~: va1*WA5
  108. - pmulhw WA4, %mm5 # v36~~: v16*WA4
  109. - pmulhw WA2, %mm7 # v34~~: v14n*WA2
  110. + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
  111. + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
  112. + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
  113. psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale
  114. psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale
  115. @@ -188,22 +193,22 @@ _dv_dct_88_block_mmx:
  116. movq 16*3(%esi), %mm5 # v3
  117. movq 16*4(%esi), %mm7 # v4
  118. - movq %mm7, scratch1 # scratch1: v4 ;
  119. + movq %mm7, MUNG(scratch1) # scratch1: v4 ;
  120. movq %mm5, %mm7 # duplicate v3
  121. - paddw scratch1, %mm5 # v03: v3+v4
  122. - psubw scratch1, %mm7 # v04: v3-v4
  123. - movq %mm5, scratch2 # scratch2: v03
  124. + paddw MUNG(scratch1), %mm5 # v03: v3+v4
  125. + psubw MUNG(scratch1), %mm7 # v04: v3-v4
  126. + movq %mm5, MUNG(scratch2) # scratch2: v03
  127. movq %mm0, %mm5 # mm5: v00
  128. - paddw scratch2, %mm0 # v10: v00+v03
  129. - psubw scratch2, %mm5 # v13: v00-v03
  130. - movq %mm3, scratch3 # scratc3: v02
  131. + paddw MUNG(scratch2), %mm0 # v10: v00+v03
  132. + psubw MUNG(scratch2), %mm5 # v13: v00-v03
  133. + movq %mm3, MUNG(scratch3) # scratc3: v02
  134. movq %mm1, %mm3 # duplicate v01
  135. - paddw scratch3, %mm1 # v11: v01+v02
  136. - psubw scratch3, %mm3 # v12: v01-v02
  137. + paddw MUNG(scratch3), %mm1 # v11: v01+v02
  138. + psubw MUNG(scratch3), %mm3 # v12: v01-v02
  139. - movq %mm6, scratch4 # scratc4: v05
  140. + movq %mm6, MUNG(scratch4) # scratc4: v05
  141. movq %mm0, %mm6 # duplicate v10
  142. paddw %mm1, %mm0 # v10+v11
  143. @@ -213,10 +218,10 @@ _dv_dct_88_block_mmx:
  144. movq %mm6, 16*4(%esi) # out4: v10-v11
  145. movq %mm4, %mm0 # mm0: v06
  146. - paddw scratch4, %mm4 # v15: v05+v06
  147. + paddw MUNG(scratch4), %mm4 # v15: v05+v06
  148. paddw %mm2, %mm0 # v16: v07+v06
  149. - pmulhw WA3, %mm4 # v35~: WA3*v15
  150. + pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15
  151. psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale
  152. movq %mm4, %mm6 # duplicate v35
  153. @@ -225,7 +230,7 @@ _dv_dct_88_block_mmx:
  154. paddw %mm5, %mm3 # v22: v12+v13
  155. - pmulhw WA1, %mm3 # v32~: WA3*v15
  156. + pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15
  157. psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale
  158. movq %mm5, %mm6 # duplicate v13
  159. @@ -235,13 +240,13 @@ _dv_dct_88_block_mmx:
  160. movq %mm5, 16*2(%esi) # out2: v13+v32
  161. movq %mm6, 16*6(%esi) # out6: v13-v32
  162. - paddw scratch4, %mm7 # v14n: v04+v05
  163. + paddw MUNG(scratch4), %mm7 # v14n: v04+v05
  164. movq %mm0, %mm5 # duplicate v16
  165. psubw %mm7, %mm0 # va1: v16-v14n
  166. - pmulhw WA2, %mm7 # v34~~: v14n*WA2
  167. - pmulhw WA5, %mm0 # va0~: va1*WA5
  168. - pmulhw WA4, %mm5 # v36~~: v16*WA4
  169. + pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2
  170. + pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5
  171. + pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4
  172. psllw $16-NSHIFT, %mm7
  173. psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient
  174. # scale note that WA4 is shifted 1 bit less than the others
  175. @@ -748,11 +755,12 @@ _dv_dct_block_mmx_postscale_88:
  176. _dv_dct_248_block_mmx:
  177. pushl %ebp
  178. - movl %esp, %ebp
  179. pushl %esi
  180. pushl %edi
  181. - movl 8(%ebp), %esi # source
  182. + LOAD_PIC_REG(bp)
  183. +
  184. + movl 16(%esp), %esi # source
  185. # column 0
  186. @@ -779,7 +789,7 @@ _dv_dct_248_block_mmx:
  187. paddw %mm1, %mm0 # v20: v10+v11
  188. psubw %mm1, %mm3 # v21: v10-v11
  189. - pmulhw WA1, %mm5 # v32~: WA1*v22
  190. + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
  191. movq %mm4, %mm2
  192. psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
  193. @@ -818,7 +828,7 @@ _dv_dct_248_block_mmx:
  194. paddw %mm1, %mm0 # v20: v10+v11
  195. psubw %mm1, %mm3 # v21: v10-v11
  196. - pmulhw WA1, %mm5 # v32~: WA1*v22
  197. + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
  198. movq %mm4, %mm2
  199. psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
  200. @@ -855,7 +865,7 @@ _dv_dct_248_block_mmx:
  201. paddw %mm1, %mm0 # v20: v10+v11
  202. psubw %mm1, %mm3 # v21: v10-v11
  203. - pmulhw WA1, %mm5 # v32~: WA1*v22
  204. + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
  205. movq %mm4, %mm2
  206. psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
  207. @@ -892,7 +902,7 @@ _dv_dct_248_block_mmx:
  208. paddw %mm1, %mm0 # v20: v10+v11
  209. psubw %mm1, %mm3 # v21: v10-v11
  210. - pmulhw WA1, %mm5 # v32~: WA1*v22
  211. + pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22
  212. movq %mm4, %mm2
  213. psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale
  214. --- libdv-0.104-old/libdv/dv.c
  215. +++ libdv-0.104/libdv/dv.c
  216. @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
  217. } /* dv_reconfigure */
  218. +extern uint8_t dv_quant_offset[4];
  219. +extern uint8_t dv_quant_shifts[22][4];
  220. +
  221. static inline void
  222. dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
  223. int i;
  224. @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
  225. dv_idct_248 (co248, mb->b[i].coeffs);
  226. } else {
  227. #if ARCH_X86
  228. - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
  229. + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
  230. _dv_idct_88(mb->b[i].coeffs);
  231. #elif ARCH_X86_64
  232. _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
  233. @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
  234. dv_idct_248 (co248, mb->b[b].coeffs);
  235. } else {
  236. #if ARCH_X86
  237. - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
  238. + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
  239. _dv_weight_88_inverse(bl->coeffs);
  240. _dv_idct_88(bl->coeffs);
  241. #elif ARCH_X86_64
  242. --- libdv-0.104-old/libdv/encode.c
  243. +++ libdv-0.104/libdv/encode.c
  244. @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
  245. }
  246. extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
  247. - dv_vlc_entry_t ** out);
  248. + dv_vlc_entry_t ** out,
  249. + dv_vlc_entry_t * lookup);
  250. extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
  251. dv_vlc_entry_t ** out);
  252. @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
  253. #elif ARCH_X86
  254. int num_bits;
  255. - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
  256. + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
  257. emms();
  258. #else
  259. int num_bits;
  260. @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
  261. return num_bits;
  262. }
  263. -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
  264. +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
  265. extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
  266. extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
  267. @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
  268. #elif ARCH_X86_64
  269. return _dv_vlc_num_bits_block_x86_64(coeffs);
  270. #else
  271. - return _dv_vlc_num_bits_block_x86(coeffs);
  272. + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
  273. #endif
  274. }
  275. --- libdv-0.104-old/libdv/encode_x86.S
  276. +++ libdv-0.104/libdv/encode_x86.S
  277. @@ -23,9 +23,6 @@
  278. * The libdv homepage is http://libdv.sourceforge.net/.
  279. */
  280. -.data
  281. -ALLONE: .word 1,1,1,1
  282. -VLCADDMASK: .byte 255,0,0,0,255,0,0,0
  283. .section .note.GNU-stack, "", @progbits
  284. @@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx:
  285. movl $63, %ecx
  286. - movl vlc_encode_lookup, %esi
  287. + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup
  288. pxor %mm0, %mm0
  289. pxor %mm2, %mm2
  290. - movq VLCADDMASK, %mm1
  291. + pushl $0x000000FF # these four lines
  292. + pushl $0x000000FF # load VLCADDMASK
  293. + movq (%esp), %mm1 # into %mm1 off the stack
  294. + addl $8, %esp # --> no TEXTRELs
  295. xorl %ebp, %ebp
  296. subl $8, %edx
  297. vlc_encode_block_mmx_loop:
  298. @@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86:
  299. addl $2, %edi
  300. movl $63, %ecx
  301. - movl vlc_num_bits_lookup, %esi
  302. + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup
  303. vlc_num_bits_block_x86_loop:
  304. movw (%edi), %ax
  305. @@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows:
  306. paddw %mm5, %mm1
  307. paddw %mm1, %mm0
  308. -
  309. - pmaddwd ALLONE, %mm0
  310. +
  311. + pushl $0x00010001 # these four lines
  312. + pushl $0x00010001 # load ALLONE
  313. + pmaddwd (%esp), %mm0 # into %mm0 off the stack
  314. + addl $8, %esp # --> no TEXTRELs
  315. movq %mm0, %mm1
  316. psrlq $32, %mm1
  317. paddd %mm1, %mm0
  318. --- libdv-0.104-old/libdv/idct_block_mmx.S
  319. +++ libdv-0.104/libdv/idct_block_mmx.S
  320. @@ -8,17 +8,21 @@
  321. .section .note.GNU-stack, "", @progbits
  322. +#include "asm_common.S"
  323. +
  324. .text
  325. .align 4
  326. .global _dv_idct_block_mmx
  327. .hidden _dv_idct_block_mmx
  328. .type _dv_idct_block_mmx,@function
  329. _dv_idct_block_mmx:
  330. pushl %ebp
  331. - movl %esp,%ebp
  332. pushl %esi
  333. - leal preSC, %ecx
  334. - movl 8(%ebp),%esi /* source matrix */
  335. +
  336. + LOAD_PIC_REG(bp)
  337. +
  338. + leal MUNG(preSC), %ecx
  339. + movl 12(%esp),%esi /* source matrix */
  340. /*
  341. * column 0: even part
  342. @@ -35,7 +41,7 @@ _dv_idct_block_mmx:
  343. movq %mm1, %mm2 /* added 11/1/96 */
  344. pmulhw 8*8(%esi),%mm5 /* V8 */
  345. psubsw %mm0, %mm1 /* V16 */
  346. - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
  347. + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
  348. paddsw %mm0, %mm2 /* V17 */
  349. movq %mm2, %mm0 /* duplicate V17 */
  350. psraw $1, %mm2 /* t75=t82 */
  351. @@ -76,7 +82,7 @@ _dv_idct_block_mmx:
  352. paddsw %mm0, %mm3 /* V29 ; free mm0 */
  353. movq %mm7, %mm1 /* duplicate V26 */
  354. psraw $1, %mm3 /* t91=t94 */
  355. - pmulhw x539f539f539f539f,%mm7 /* V33 */
  356. + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
  357. psraw $1, %mm1 /* t96 */
  358. movq %mm5, %mm0 /* duplicate V2 */
  359. psraw $2, %mm4 /* t85=t87 */
  360. @@ -84,15 +90,15 @@ _dv_idct_block_mmx:
  361. psubsw %mm4, %mm0 /* V28 ; free mm4 */
  362. movq %mm0, %mm2 /* duplicate V28 */
  363. psraw $1, %mm5 /* t90=t93 */
  364. - pmulhw x4546454645464546,%mm0 /* V35 */
  365. + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
  366. psraw $1, %mm2 /* t97 */
  367. movq %mm5, %mm4 /* duplicate t90=t93 */
  368. psubsw %mm2, %mm1 /* V32 ; free mm2 */
  369. - pmulhw x61f861f861f861f8,%mm1 /* V36 */
  370. + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
  371. psllw $1, %mm7 /* t107 */
  372. paddsw %mm3, %mm5 /* V31 */
  373. psubsw %mm3, %mm4 /* V30 ; free mm3 */
  374. - pmulhw x5a825a825a825a82,%mm4 /* V34 */
  375. + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
  376. nop
  377. psubsw %mm1, %mm0 /* V38 */
  378. psubsw %mm7, %mm1 /* V37 ; free mm7 */
  379. @@ -159,7 +165,7 @@ _dv_idct_block_mmx:
  380. psubsw %mm7, %mm1 /* V50 */
  381. pmulhw 8*9(%esi), %mm5 /* V9 */
  382. paddsw %mm7, %mm2 /* V51 */
  383. - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
  384. + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
  385. movq %mm2, %mm6 /* duplicate V51 */
  386. psraw $1, %mm2 /* t138=t144 */
  387. movq %mm3, %mm4 /* duplicate V1 */
  388. @@ -200,11 +206,11 @@ _dv_idct_block_mmx:
  389. * even more by doing the correction step in a later stage when the number
  390. * is actually multiplied by 16
  391. */
  392. - paddw x0005000200010001, %mm4
  393. + paddw MUNG(x0005000200010001), %mm4
  394. psubsw %mm6, %mm3 /* V60 ; free mm6 */
  395. psraw $1, %mm0 /* t154=t156 */
  396. movq %mm3, %mm1 /* duplicate V60 */
  397. - pmulhw x539f539f539f539f, %mm1 /* V67 */
  398. + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
  399. movq %mm5, %mm6 /* duplicate V3 */
  400. psraw $2, %mm4 /* t148=t150 */
  401. paddsw %mm4, %mm5 /* V61 */
  402. @@ -213,13 +219,13 @@ _dv_idct_block_mmx:
  403. psllw $1, %mm1 /* t169 */
  404. paddsw %mm0, %mm5 /* V65 -> result */
  405. psubsw %mm0, %mm4 /* V64 ; free mm0 */
  406. - pmulhw x5a825a825a825a82, %mm4 /* V68 */
  407. + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
  408. psraw $1, %mm3 /* t158 */
  409. psubsw %mm6, %mm3 /* V66 */
  410. movq %mm5, %mm2 /* duplicate V65 */
  411. - pmulhw x61f861f861f861f8, %mm3 /* V70 */
  412. + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
  413. psllw $1, %mm6 /* t165 */
  414. - pmulhw x4546454645464546, %mm6 /* V69 */
  415. + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
  416. psraw $1, %mm2 /* t172 */
  417. /* moved from next block */
  418. movq 8*5(%esi), %mm0 /* V56 */
  419. @@ -344,7 +350,7 @@ _dv_idct_block_mmx:
  420. * movq 8*13(%esi), %mm4 tmt13
  421. */
  422. psubsw %mm4, %mm3 /* V134 */
  423. - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
  424. + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
  425. movq 8*9(%esi), %mm6 /* tmt9 */
  426. paddsw %mm4, %mm5 /* V135 ; mm4 free */
  427. movq %mm0, %mm4 /* duplicate tmt1 */
  428. @@ -373,17 +379,17 @@ _dv_idct_block_mmx:
  429. psubsw %mm7, %mm0 /* V144 */
  430. movq %mm0, %mm3 /* duplicate V144 */
  431. paddsw %mm7, %mm2 /* V147 ; free mm7 */
  432. - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
  433. + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
  434. movq %mm1, %mm7 /* duplicate tmt3 */
  435. paddsw %mm5, %mm7 /* V145 */
  436. psubsw %mm5, %mm1 /* V146 ; free mm5 */
  437. psubsw %mm1, %mm3 /* V150 */
  438. movq %mm7, %mm5 /* duplicate V145 */
  439. - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
  440. + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
  441. psubsw %mm2, %mm5 /* V148 */
  442. - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
  443. + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
  444. psllw $2, %mm0 /* t311 */
  445. - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
  446. + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
  447. paddsw %mm2, %mm7 /* V149 ; free mm2 */
  448. psllw $1, %mm1 /* t313 */
  449. nop /* without the nop - freeze here for one clock */
  450. @@ -409,7 +415,7 @@ _dv_idct_block_mmx:
  451. paddsw %mm3, %mm6 /* V164 ; free mm3 */
  452. movq %mm4, %mm3 /* duplicate V142 */
  453. psubsw %mm5, %mm4 /* V165 ; free mm5 */
  454. - movq %mm2, scratch7 /* out7 */
  455. + movq %mm2, MUNG(scratch7) /* out7 */
  456. psraw $4, %mm6
  457. psraw $4, %mm4
  458. paddsw %mm5, %mm3 /* V162 */
  459. @@ -420,11 +426,11 @@ _dv_idct_block_mmx:
  460. */
  461. movq %mm6, 8*9(%esi) /* out9 */
  462. paddsw %mm1, %mm0 /* V161 */
  463. - movq %mm3, scratch5 /* out5 */
  464. + movq %mm3, MUNG(scratch5) /* out5 */
  465. psubsw %mm1, %mm5 /* V166 ; free mm1 */
  466. movq %mm4, 8*11(%esi) /* out11 */
  467. psraw $4, %mm5
  468. - movq %mm0, scratch3 /* out3 */
  469. + movq %mm0, MUNG(scratch3) /* out3 */
  470. movq %mm2, %mm4 /* duplicate V140 */
  471. movq %mm5, 8*13(%esi) /* out13 */
  472. paddsw %mm7, %mm2 /* V160 */
  473. @@ -434,7 +440,7 @@ _dv_idct_block_mmx:
  474. /* moved from the next block */
  475. movq 8*3(%esi), %mm7
  476. psraw $4, %mm4
  477. - movq %mm2, scratch1 /* out1 */
  478. + movq %mm2, MUNG(scratch1) /* out1 */
  479. /* moved from the next block */
  480. movq %mm0, %mm1
  481. movq %mm4, 8*15(%esi) /* out15 */
  482. @@ -491,15 +497,15 @@ _dv_idct_block_mmx:
  483. paddsw %mm4, %mm3 /* V113 ; free mm4 */
  484. movq %mm0, %mm4 /* duplicate V110 */
  485. paddsw %mm1, %mm2 /* V111 */
  486. - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
  487. + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
  488. psubsw %mm1, %mm5 /* V112 ; free mm1 */
  489. psubsw %mm5, %mm4 /* V116 */
  490. movq %mm2, %mm1 /* duplicate V111 */
  491. - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
  492. + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
  493. psubsw %mm3, %mm2 /* V114 */
  494. - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
  495. + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
  496. paddsw %mm3, %mm1 /* V115 ; free mm3 */
  497. - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
  498. + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
  499. psllw $2, %mm0 /* t266 */
  500. movq %mm1, (%esi) /* save V115 */
  501. psllw $1, %mm5 /* t268 */
  502. @@ -517,7 +523,7 @@ _dv_idct_block_mmx:
  503. movq %mm6, %mm3 /* duplicate tmt4 */
  504. psubsw %mm0, %mm6 /* V100 */
  505. paddsw %mm0, %mm3 /* V101 ; free mm0 */
  506. - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
  507. + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
  508. movq %mm7, %mm5 /* duplicate tmt0 */
  509. movq 8*8(%esi), %mm1 /* tmt8 */
  510. paddsw %mm1, %mm7 /* V103 */
  511. @@ -551,10 +557,10 @@ _dv_idct_block_mmx:
  512. movq 8*2(%esi), %mm3 /* V123 */
  513. paddsw %mm4, %mm7 /* out0 */
  514. /* moved up from next block */
  515. - movq scratch3, %mm0
  516. + movq MUNG(scratch3), %mm0
  517. psraw $4, %mm7
  518. /* moved up from next block */
  519. - movq scratch5, %mm6
  520. + movq MUNG(scratch5), %mm6
  521. psubsw %mm4, %mm1 /* out14 ; free mm4 */
  522. paddsw %mm3, %mm5 /* out2 */
  523. psraw $4, %mm1
  524. @@ -565,7 +571,7 @@ _dv_idct_block_mmx:
  525. movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
  526. psraw $4, %mm2
  527. /* moved up to the prev block */
  528. - movq scratch7, %mm4
  529. + movq MUNG(scratch7), %mm4
  530. /* moved up to the prev block */
  531. psraw $4, %mm0
  532. movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
  533. @@ -579,7 +585,7 @@ _dv_idct_block_mmx:
  534. * psraw $4, %mm0
  535. * psraw $4, %mm6
  536. */
  537. - movq scratch1, %mm1
  538. + movq MUNG(scratch1), %mm1
  539. psraw $4, %mm4
  540. movq %mm0, 8*3(%esi) /* out3 */
  541. psraw $4, %mm1
  542. --- libdv-0.104-old/libdv/parse.c
  543. +++ libdv-0.104/libdv/parse.c
  544. @@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
  545. exit(0);
  546. #endif
  547. } /* dv_parse_ac_coeffs */
  548. +#if defined __GNUC__ && __ELF__
  549. +# define dv_strong_hidden_alias(name, aliasname) \
  550. + extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
  551. +dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
  552. +#else
  553. +int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
  554. +#endif
  555. /* ---------------------------------------------------------------------------
  556. */
  557. --- libdv-0.104-old/libdv/quant.c
  558. +++ libdv-0.104/libdv/quant.c
  559. @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1
  560. uint32_t dv_quant_248_mul_tab [2] [22] [64];
  561. uint32_t dv_quant_88_mul_tab [2] [22] [64];
  562. -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
  563. +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
  564. extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
  565. static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
  566. static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
  567. @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
  568. _dv_quant_x86_64(block, qno, klass);
  569. emms();
  570. #else
  571. - _dv_quant_x86(block, qno, klass);
  572. + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
  573. emms();
  574. #endif
  575. }
  576. --- libdv-0.104-old/libdv/quant.h
  577. +++ libdv-0.104/libdv/quant.h
  578. @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
  579. extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
  580. extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
  581. dv_248_coeff_t *co);
  582. -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
  583. +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
  584. extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
  585. extern void dv_quant_init (void);
  586. #ifdef __cplusplus
  587. --- libdv-0.104-old/libdv/quant_x86.S
  588. +++ libdv-0.104/libdv/quant_x86.S
  589. @@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86:
  590. /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
  591. movl ARGn(1),%eax /* qno */
  592. + movl ARGn(3),%ebx /* dv_quant_offset */
  593. + addl ARGn(2),%ebx /* class */
  594. + movzbl (%ebx),%ecx
  595. movl ARGn(2),%ebx /* class */
  596. - movzbl dv_quant_offset(%ebx),%ecx
  597. addl %ecx,%eax
  598. - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
  599. + movl ARGn(4),%edx /* dv_quant_shifts */
  600. + leal (%edx,%eax,4),%edx /* edx is pq */
  601. /* extra = (class == 3); */
  602. /* 0 1 2 3 */
  603. @@ -212,11 +219,13 @@ _dv_quant_x86:
  604. /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
  605. movl ARGn(1),%eax /* qno */
  606. + movl ARGn(3),%ebx /* offset */
  607. + addl ARGn(2),%ebx /* class */
  608. + movzbl (%ebx),%ecx
  609. movl ARGn(2),%ebx /* class */
  610. -
  611. - movzbl dv_quant_offset(%ebx),%ecx
  612. + movl ARGn(4),%edx /* shifts */
  613. addl %ecx,%eax
  614. - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */
  615. + leal (%edx,%eax,4),%edx /* edx is pq */
  616. /* extra = (class == 3); */
  617. /* 0 1 2 3 */
  618. --- libdv-0.104-old/libdv/rgbtoyuv.S
  619. +++ libdv-0.104/libdv/rgbtoyuv.S
  620. @@ -41,9 +41,6 @@
  621. #define DV_WIDTH_SHORT_HALF 720
  622. #define DV_WIDTH_BYTE_HALF 360
  623. -.global _dv_rgbtoycb_mmx
  624. -# .global yuvtoycb_mmx
  625. -
  626. .data
  627. .align 8
  628. @@ -110,25 +107,26 @@ VR0GR: .long 0,0
  629. VBG0B: .long 0,0
  630. #endif
  631. -
  632. +
  633. +#include "asm_common.S"
  634. +
  635. .section .note.GNU-stack, "", @progbits
  636. .text
  637. -#define _inPtr 8
  638. -#define _rows 12
  639. -#define _columns 16
  640. -#define _outyPtr 20
  641. -#define _outuPtr 24
  642. -#define _outvPtr 28
  643. +#define _inPtr 24+8
  644. +#define _rows 24+12
  645. +#define _columns 24+16
  646. +#define _outyPtr 24+20
  647. +#define _outuPtr 24+24
  648. +#define _outvPtr 24+28
  649. .global _dv_rgbtoycb_mmx
  650. .hidden _dv_rgbtoycb_mmx
  651. .type _dv_rgbtoycb_mmx,@function
  652. _dv_rgbtoycb_mmx:
  653. pushl %ebp
  654. - movl %esp, %ebp
  655. pushl %eax
  656. pushl %ebx
  657. pushl %ecx
  658. @@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx:
  659. pushl %esi
  660. pushl %edi
  661. - leal ZEROSX, %eax #This section gets around a bug
  662. + LOAD_PIC_REG(bp)
  663. +
  664. + leal MUNG(ZEROSX), %eax #This section gets around a bug
  665. movq (%eax), %mm0 #unlikely to persist
  666. - movq %mm0, ZEROS
  667. - leal OFFSETDX, %eax
  668. + movq %mm0, MUNG(ZEROS)
  669. + leal MUNG(OFFSETDX), %eax
  670. movq (%eax), %mm0
  671. - movq %mm0, OFFSETD
  672. - leal OFFSETWX, %eax
  673. + movq %mm0, MUNG(OFFSETD)
  674. + leal MUNG(OFFSETWX), %eax
  675. movq (%eax), %mm0
  676. - movq %mm0, OFFSETW
  677. - leal OFFSETBX, %eax
  678. + movq %mm0, MUNG(OFFSETW)
  679. + leal MUNG(OFFSETBX), %eax
  680. movq (%eax), %mm0
  681. - movq %mm0, OFFSETB
  682. - leal YR0GRX, %eax
  683. + movq %mm0, MUNG(OFFSETB)
  684. + leal MUNG(YR0GRX), %eax
  685. movq (%eax), %mm0
  686. - movq %mm0, YR0GR
  687. - leal YBG0BX, %eax
  688. + movq %mm0, MUNG(YR0GR)
  689. + leal MUNG(YBG0BX), %eax
  690. movq (%eax), %mm0
  691. - movq %mm0, YBG0B
  692. - leal UR0GRX, %eax
  693. + movq %mm0, MUNG(YBG0B)
  694. + leal MUNG(UR0GRX), %eax
  695. movq (%eax), %mm0
  696. - movq %mm0, UR0GR
  697. - leal UBG0BX, %eax
  698. + movq %mm0, MUNG(UR0GR)
  699. + leal MUNG(UBG0BX), %eax
  700. movq (%eax), %mm0
  701. - movq %mm0, UBG0B
  702. - leal VR0GRX, %eax
  703. + movq %mm0, MUNG(UBG0B)
  704. + leal MUNG(VR0GRX), %eax
  705. movq (%eax), %mm0
  706. - movq %mm0, VR0GR
  707. - leal VBG0BX, %eax
  708. + movq %mm0, MUNG(VR0GR)
  709. + leal MUNG(VBG0BX), %eax
  710. movq (%eax), %mm0
  711. - movq %mm0, VBG0B
  712. -
  713. - movl _rows(%ebp), %eax
  714. - movl _columns(%ebp), %ebx
  715. + movq %mm0, MUNG(VBG0B)
  716. + movl _rows(%esp), %eax
  717. + movl _columns(%esp), %ebx
  718. mull %ebx #number pixels
  719. shrl $3, %eax #number of loops
  720. movl %eax, %edi #loop counter in edi
  721. - movl _inPtr(%ebp), %eax
  722. - movl _outyPtr(%ebp), %ebx
  723. - movl _outuPtr(%ebp), %ecx
  724. - movl _outvPtr(%ebp), %edx
  725. + movl _inPtr(%esp), %eax
  726. + movl _outyPtr(%esp), %ebx
  727. + movl _outuPtr(%esp), %ecx
  728. + movl _outvPtr(%esp), %edx
  729. rgbtoycb_mmx_loop:
  730. movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0
  731. pxor %mm6, %mm6 #0 -> mm6
  732. @@ -184,29 +186,29 @@ rgbtoycb_mmx_loop:
  733. punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1
  734. movq %mm0, %mm2 #R1B0G0R0 -> mm2
  735. - pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0
  736. + pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0
  737. movq %mm1, %mm3 #B1G1R1B0 -> mm3
  738. - pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1
  739. + pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1
  740. movq %mm2, %mm4 #R1B0G0R0 -> mm4
  741. - pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2
  742. + pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2
  743. movq %mm3, %mm5 #B1G1R1B0 -> mm5
  744. - pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3
  745. + pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3
  746. punpckhbw %mm6, %mm7 # 00G2R2 -> mm7
  747. - pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4
  748. + pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4
  749. paddd %mm1, %mm0 #Y1Y0 -> mm0
  750. - pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5
  751. + pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5
  752. movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1
  753. paddd %mm3, %mm2 #U1U0 -> mm2
  754. movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6
  755. - punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1
  756. + punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1
  757. paddd %mm5, %mm4 #V1V0 -> mm4
  758. movq %mm1, %mm5 #B3G3R3B2 -> mm5
  759. @@ -214,29 +216,29 @@ rgbtoycb_mmx_loop:
  760. paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1
  761. - punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6
  762. + punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6
  763. movq %mm1, %mm3 #R3B2G2R2 -> mm3
  764. - pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1
  765. + pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1
  766. movq %mm5, %mm7 #B3G3R3B2 -> mm7
  767. - pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5
  768. + pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5
  769. psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0
  770. - movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0
  771. + movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0
  772. movq %mm3, %mm6 #R3B2G2R2 -> mm6
  773. - pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6
  774. + pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6
  775. psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2
  776. paddd %mm5, %mm1 #Y3Y2 -> mm1
  777. movq %mm7, %mm5 #B3G3R3B2 -> mm5
  778. - pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2
  779. + pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2
  780. psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
  781. - pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2
  782. + pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2
  783. packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0
  784. - pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5
  785. + pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5
  786. psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4
  787. movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7
  788. @@ -251,58 +253,58 @@ rgbtoycb_mmx_loop:
  789. movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5
  790. psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3
  791. - paddw OFFSETY, %mm0
  792. + paddw MUNG(OFFSETY), %mm0
  793. movq %mm0, (%ebx) #store Y3Y2Y1Y0
  794. packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2
  795. - movq TEMP0, %mm0 #R5B4G4R4 -> mm0
  796. + movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0
  797. addl $8, %ebx
  798. -
  799. - punpcklbw ZEROS, %mm7 #B5G500 -> mm7
  800. +
  801. + punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7
  802. movq %mm0, %mm6 #R5B4G4R4 -> mm6
  803. - movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU
  804. + movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU
  805. psrlq $32, %mm0 #00R5B4 -> mm0
  806. paddw %mm0, %mm7 #B5G5R5B4 -> mm7
  807. movq %mm6, %mm2 #B5B4G4R4 -> mm2
  808. - pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2
  809. + pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2
  810. movq %mm7, %mm0 #B5G5R5B4 -> mm0
  811. - pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7
  812. + pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7
  813. packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4
  814. addl $24, %eax #increment RGB count
  815. - movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4
  816. + movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4
  817. movq %mm6, %mm4 #B5B4G4R4 -> mm4
  818. - pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4
  819. + pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4
  820. movq %mm0, %mm3 #B5G5R5B4 -> mm0
  821. - pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4
  822. + pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4
  823. paddd %mm7, %mm2 #Y5Y4 -> mm2
  824. - pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4
  825. + pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4
  826. pxor %mm7, %mm7 #0 -> mm7
  827. - pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3
  828. + pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3
  829. punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1
  830. paddd %mm6, %mm0 #U5U4 -> mm0
  831. movq %mm1, %mm6 #B7G7R7B6 -> mm6
  832. - pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6
  833. + pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6
  834. punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5
  835. movq %mm5, %mm7 #R7B6G6R6 -> mm7
  836. paddd %mm4, %mm3 #V5V4 -> mm3
  837. - pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5
  838. + pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5
  839. movq %mm1, %mm4 #B7G7R7B6 -> mm4
  840. - pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4
  841. + pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4
  842. psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0
  843. psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2
  844. @@ -310,25 +312,25 @@ rgbtoycb_mmx_loop:
  845. paddd %mm5, %mm6 #Y7Y6 -> mm6
  846. movq %mm7, %mm5 #R7B6G6R6 -> mm5
  847. - pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7
  848. + pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7
  849. psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3
  850. - pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1
  851. + pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1
  852. psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
  853. packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2
  854. - pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5
  855. + pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5
  856. paddd %mm4, %mm7 #U7U6 -> mm7
  857. psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7
  858. - paddw OFFSETY, %mm2
  859. + paddw MUNG(OFFSETY), %mm2
  860. movq %mm2, (%ebx) #store Y7Y6Y5Y4
  861. - movq ALLONE, %mm6
  862. + movq MUNG(ALLONE), %mm6
  863. packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0
  864. - movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4
  865. + movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4
  866. pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
  867. pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
  868. @@ -338,8 +340,8 @@ rgbtoycb_mmx_loop:
  869. psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1
  870. psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4
  871. -
  872. - movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5
  873. +
  874. + movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5
  875. movq %mm4, (%ecx) # store U
  876. @@ -422,14 +426,15 @@ _dv_ppm_copy_y_block_mmx:
  877. _dv_pgm_copy_y_block_mmx:
  878. pushl %ebp
  879. - movl %esp, %ebp
  880. pushl %esi
  881. pushl %edi
  882. -
  883. - movl 8(%ebp), %edi # dest
  884. - movl 12(%ebp), %esi # src
  885. - movq OFFSETY, %mm7
  886. + LOAD_PIC_REG(bp)
  887. +
  888. + movl 16(%esp), %edi # dest
  889. + movl 20(%esp), %esi # src
  890. +
  891. + movq MUNG(OFFSETY), %mm7
  892. pxor %mm6, %mm6
  893. movq (%esi), %mm0
  894. @@ -564,14 +571,15 @@ _dv_pgm_copy_y_block_mmx:
  895. _dv_video_copy_y_block_mmx:
  896. pushl %ebp
  897. - movl %esp, %ebp
  898. pushl %esi
  899. pushl %edi
  900. -
  901. - movl 8(%ebp), %edi # dest
  902. - movl 12(%ebp), %esi # src
  903. - movq OFFSETBX, %mm7
  904. + LOAD_PIC_REG(bp)
  905. +
  906. + movl 16(%esp), %edi # dest
  907. + movl 20(%esp), %esi # src
  908. +
  909. + movq MUNG(OFFSETBX), %mm7
  910. pxor %mm6, %mm6
  911. movq (%esi), %mm0
  912. @@ -852,16 +864,16 @@ _dv_ppm_copy_pal_c_block_mmx:
  913. _dv_pgm_copy_pal_c_block_mmx:
  914. pushl %ebp
  915. - movl %esp, %ebp
  916. pushl %esi
  917. pushl %edi
  918. pushl %ebx
  919. -
  920. - movl 8(%ebp), %edi # dest
  921. - movl 12(%ebp), %esi # src
  922. + LOAD_PIC_REG(bp)
  923. +
  924. + movl 20(%esp), %edi # dest
  925. + movl 24(%esp), %esi # src
  926. - movq OFFSETBX, %mm7
  927. + movq MUNG(OFFSETBX), %mm7
  928. pxor %mm6, %mm6
  929. @@ -1000,15 +1014,16 @@ _dv_pgm_copy_pal_c_block_mmx:
  930. _dv_video_copy_pal_c_block_mmx:
  931. pushl %ebp
  932. - movl %esp, %ebp
  933. pushl %esi
  934. pushl %edi
  935. pushl %ebx
  936. -
  937. - movl 8(%ebp), %edi # dest
  938. - movl 12(%ebp), %esi # src
  939. - movq OFFSETBX, %mm7
  940. + LOAD_PIC_REG(bp)
  941. +
  942. + movl 20(%esp), %edi # dest
  943. + movl 24(%esp), %esi # src
  944. +
  945. + movq MUNG(OFFSETBX), %mm7
  946. paddw %mm7, %mm7
  947. pxor %mm6, %mm6
  948. @@ -1095,18 +1112,18 @@ video_copy_pal_c_block_mmx_loop:
  949. _dv_ppm_copy_ntsc_c_block_mmx:
  950. pushl %ebp
  951. - movl %esp, %ebp
  952. pushl %esi
  953. pushl %edi
  954. pushl %ebx
  955. -
  956. - movl 8(%ebp), %edi # dest
  957. - movl 12(%ebp), %esi # src
  958. +
  959. + LOAD_PIC_REG(bp)
  960. +
  961. + movl 20(%esp), %edi # dest
  962. + movl 24(%esp), %esi # src
  963. movl $4, %ebx
  964. - movq ALLONE, %mm6
  965. -
  966. + movq MUNG(ALLONE), %mm6
  967. ppm_copy_ntsc_c_block_mmx_loop:
  968. movq (%esi), %mm0
  969. @@ -1168,14 +1187,15 @@ ppm_copy_ntsc_c_block_mmx_loop:
  970. _dv_pgm_copy_ntsc_c_block_mmx:
  971. pushl %ebp
  972. - movl %esp, %ebp
  973. pushl %esi
  974. pushl %edi
  975. -
  976. - movl 8(%ebp), %edi # dest
  977. - movl 12(%ebp), %esi # src
  978. - movq OFFSETBX, %mm7
  979. + LOAD_PIC_REG(bp)
  980. +
  981. + movl 16(%esp), %edi # dest
  982. + movl 20(%esp), %esi # src
  983. +
  984. + movq MUNG(OFFSETBX), %mm7
  985. paddw %mm7, %mm7
  986. pxor %mm6, %mm6
  987. @@ -1325,15 +1347,16 @@ _dv_pgm_copy_ntsc_c_block_mmx:
  988. _dv_video_copy_ntsc_c_block_mmx:
  989. pushl %ebp
  990. - movl %esp, %ebp
  991. pushl %esi
  992. pushl %edi
  993. pushl %ebx
  994. -
  995. - movl 8(%ebp), %edi # dest
  996. - movl 12(%ebp), %esi # src
  997. - movq OFFSETBX, %mm7
  998. + LOAD_PIC_REG(bp)
  999. +
  1000. + movl 20(%esp), %edi # dest
  1001. + movl 24(%esp), %esi # src
  1002. +
  1003. + movq MUNG(OFFSETBX), %mm7
  1004. paddw %mm7, %mm7
  1005. pxor %mm6, %mm6
  1006. --- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S
  1007. +++ libdv-0.104/libdv/rgbtoyuv_x86_64.S
  1008. @@ -41,9 +41,6 @@
  1009. #define DV_WIDTH_SHORT_HALF 720
  1010. #define DV_WIDTH_BYTE_HALF 360
  1011. -.global _dv_rgbtoycb_mmx_x86_64
  1012. -# .global yuvtoycb_mmx_x86_64
  1013. -
  1014. .data
  1015. .align 8
  1016. --- libdv-0.104-old/libdv/vlc_x86.S
  1017. +++ libdv-0.104/libdv/vlc_x86.S
  1018. @@ -1,31 +1,39 @@
  1019. #include "asmoff.h"
  1020. .section .note.GNU-stack, "", @progbits
  1021. + #include "asm_common.S"
  1022. .text
  1023. .align 4
  1024. .globl dv_decode_vlc
  1025. +.globl asm_dv_decode_vlc
  1026. +.hidden asm_dv_decode_vlc
  1027. +asm_dv_decode_vlc = dv_decode_vlc
  1028. +
  1029. .type dv_decode_vlc,@function
  1030. dv_decode_vlc:
  1031. pushl %ebx
  1032. + pushl %ebp
  1033. +
  1034. + LOAD_PIC_REG(bp)
  1035. - /* Args are at 8(%esp). */
  1036. - movl 8(%esp),%eax /* %eax is bits */
  1037. - movl 12(%esp),%ebx /* %ebx is maxbits */
  1038. + /* Args are at 12(%esp). */
  1039. + movl 12(%esp),%eax /* %eax is bits */
  1040. + movl 16(%esp),%ebx /* %ebx is maxbits */
  1041. andl $0x3f,%ebx /* limit index range STL*/
  1042. - movl dv_vlc_class_index_mask(,%ebx,4),%edx
  1043. + movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
  1044. andl %eax,%edx
  1045. - movl dv_vlc_class_index_rshift(,%ebx,4),%ecx
  1046. + movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
  1047. sarl %cl,%edx
  1048. - movl dv_vlc_classes(,%ebx,4),%ecx
  1049. + movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
  1050. movsbl (%ecx,%edx,1),%edx /* %edx is class */
  1051. - movl dv_vlc_index_mask(,%edx,4),%ebx
  1052. - movl dv_vlc_index_rshift(,%edx,4),%ecx
  1053. + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
  1054. + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
  1055. andl %eax,%ebx
  1056. sarl %cl,%ebx
  1057. - movl dv_vlc_lookups(,%edx,4),%edx
  1058. + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
  1059. movl (%edx,%ebx,4),%edx
  1060. /* Now %edx holds result, like this:
  1061. @@ -42,7 +51,7 @@ dv_decode_vlc:
  1062. movl %edx,%ecx
  1063. sarl $8,%ecx
  1064. andl $0xff,%ecx
  1065. - movl sign_mask(,%ecx,4),%ebx
  1066. + movl MUNG_ARR(sign_mask,%ecx,4),%ebx
  1067. andl %ebx,%eax
  1068. negl %eax
  1069. sarl $31,%eax
  1070. @@ -63,14 +72,14 @@ dv_decode_vlc:
  1071. *result = broken;
  1072. Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
  1073. */
  1074. - movl 12(%esp),%ebx /* %ebx is maxbits */
  1075. + movl 16(%esp),%ebx /* %ebx is maxbits */
  1076. subl %ecx,%ebx
  1077. sbbl %ebx,%ebx
  1078. orl %ebx,%edx
  1079. - movl 16(%esp),%eax
  1080. + movl 20(%esp),%eax
  1081. movl %edx,(%eax)
  1082. -
  1083. + popl %ebp
  1084. popl %ebx
  1085. ret
  1086. @@ -80,21 +89,28 @@ dv_decode_vlc:
  1087. .type __dv_decode_vlc,@function
  1088. __dv_decode_vlc:
  1089. pushl %ebx
  1090. + pushl %ebp
  1091. +
  1092. + LOAD_PIC_REG(bp)
  1093. - /* Args are at 8(%esp). */
  1094. - movl 8(%esp),%eax /* %eax is bits */
  1095. + /* Args are at 12(%esp). */
  1096. + movl 12(%esp),%eax /* %eax is bits */
  1097. movl %eax,%edx /* %edx is class */
  1098. andl $0xfe00,%edx
  1099. sarl $9,%edx
  1100. +#ifdef __PIC__
  1101. + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
  1102. +#else
  1103. movsbl dv_vlc_class_lookup5(%edx),%edx
  1104. -
  1105. - movl dv_vlc_index_mask(,%edx,4),%ebx
  1106. - movl dv_vlc_index_rshift(,%edx,4),%ecx
  1107. +#endif
  1108. +
  1109. + movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
  1110. + movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
  1111. andl %eax,%ebx
  1112. sarl %cl,%ebx
  1113. - movl dv_vlc_lookups(,%edx,4),%edx
  1114. + movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
  1115. movl (%edx,%ebx,4),%edx
  1116. /* Now %edx holds result, like this:
  1117. @@ -112,7 +128,7 @@ __dv_decode_vlc:
  1118. movl %edx,%ecx
  1119. sarl $8,%ecx
  1120. andl $0xff,%ecx
  1121. - movl sign_mask(,%ecx,4),%ecx
  1122. + movl MUNG_ARR(sign_mask,%ecx,4),%ecx
  1123. andl %ecx,%eax
  1124. negl %eax
  1125. sarl $31,%eax
  1126. @@ -127,9 +143,9 @@ __dv_decode_vlc:
  1127. xorl %eax,%edx
  1128. subl %eax,%edx
  1129. - movl 12(%esp),%eax
  1130. + movl 16(%esp),%eax
  1131. movl %edx,(%eax)
  1132. -
  1133. + popl %ebp
  1134. popl %ebx
  1135. ret
  1136. @@ -140,14 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
  1137. */
  1138. .text
  1139. .align 4
  1140. +.globl asm_dv_parse_ac_coeffs_pass0
  1141. +.hidden asm_dv_parse_ac_coeffs_pass0
  1142. + asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
  1143. +
  1144. .globl dv_parse_ac_coeffs_pass0
  1145. .type dv_parse_ac_coeffs_pass0,@function
  1146. dv_parse_ac_coeffs_pass0:
  1147. pushl %ebx
  1148. pushl %edi
  1149. pushl %esi
  1150. pushl %ebp
  1151. + LOAD_PIC_REG(si)
  1152. +
  1153. #define ARGn(N) (20+(4*(N)))(%esp)
  1154. /*
  1155. @@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0:
  1156. ebp bl
  1157. */
  1158. movl ARGn(2),%ebp
  1159. +#ifndef __PIC__
  1160. movl ARGn(0),%esi
  1161. movl bitstream_t_buf(%esi),%esi
  1162. +#endif
  1163. movl dv_block_t_offset(%ebp),%edi
  1164. movl dv_block_t_reorder(%ebp),%ebx
  1165. @@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0:
  1166. movq dv_block_t_coeffs(%ebp),%mm1
  1167. pxor %mm0,%mm0
  1168. +#ifdef __PIC__
  1169. + pand const_f_0_0_0@GOTOFF(%esi),%mm1
  1170. +#else
  1171. pand const_f_0_0_0,%mm1
  1172. +#endif
  1173. movq %mm1,dv_block_t_coeffs(%ebp)
  1174. movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
  1175. movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
  1176. @@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0:
  1177. readloop:
  1178. movl %edi,%ecx
  1179. shrl $3,%ecx
  1180. +#ifdef __PIC__
  1181. + movl ARGn(0),%eax
  1182. + addl bitstream_t_buf(%eax),%ecx
  1183. + movzbl (%ecx),%eax
  1184. + movzbl 1(%ecx),%edx
  1185. + movzbl 2(%ecx),%ecx
  1186. +#else
  1187. movzbl (%esi,%ecx,1),%eax
  1188. movzbl 1(%esi,%ecx,1),%edx
  1189. movzbl 2(%esi,%ecx,1),%ecx
  1190. +#endif
  1191. shll $16,%eax
  1192. shll $8,%edx
  1193. orl %ecx,%eax
  1194. @@ -217,7 +254,11 @@ readloop:
  1195. /* Attempt to use the shortcut first. If it hits, then
  1196. this vlc term has been decoded. */
  1197. +#ifdef __PIC__
  1198. + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
  1199. +#else
  1200. movl dv_vlc_class1_shortcut(,%ecx,4),%edx
  1201. +#endif
  1202. test $0x80,%edx
  1203. je done_decode
  1204. @@ -228,12 +269,19 @@ readloop:
  1205. movl %ebx,dv_block_t_reorder(%ebp)
  1206. /* %eax is bits */
  1207. -
  1208. +#ifdef __PIC__
  1209. + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
  1210. +
  1211. + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
  1212. + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
  1213. + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
  1214. +#else
  1215. movsbl dv_vlc_class_lookup5(%ecx),%ecx
  1216. movl dv_vlc_index_mask(,%ecx,4),%ebx
  1217. movl dv_vlc_lookups(,%ecx,4),%edx
  1218. movl dv_vlc_index_rshift(,%ecx,4),%ecx
  1219. +#endif
  1220. andl %eax,%ebx
  1221. sarl %cl,%ebx
  1222. @@ -256,7 +304,11 @@ readloop:
  1223. movl %edx,%ecx
  1224. sarl $8,%ecx
  1225. andl $0xff,%ecx
  1226. +#ifdef __PIC__
  1227. + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx
  1228. +#else
  1229. movl sign_mask(,%ecx,4),%ecx
  1230. +#endif
  1231. andl %ecx,%eax
  1232. negl %eax
  1233. sarl $31,%eax
  1234. @@ -326,10 +378,16 @@ alldone:
  1235. slowpath:
  1236. /* slow path: use dv_decode_vlc */;
  1237. +#ifdef __PIC__
  1238. + pushl %esi
  1239. + leal vlc@GOTOFF(%esi),%esi
  1240. + xchgl %esi,(%esp) /* last parameter is &vlc */
  1241. +#else
  1242. pushl $vlc /* last parameter is &vlc */
  1243. +#endif
  1244. pushl %edx /* bits_left */
  1245. pushl %eax /* bits */
  1246. - call dv_decode_vlc
  1247. + call asm_dv_decode_vlc
  1248. addl $12,%esp
  1249. test $0x80,%edx /* If (vlc.run < 0) break */
  1250. jne escape
  1251. @@ -359,6 +417,8 @@ show16:
  1252. pushl %esi
  1253. pushl %ebp
  1254. + LOAD_PIC_REG(si)
  1255. +
  1256. #define ARGn(N) (20+(4*(N)))(%esp)
  1257. movl ARGn(1),%eax /* quality */
  1258. @@ -373,7 +434,11 @@ dv_parse_video_segment:
  1259. jz its_mono
  1260. movl $6,%ebx
  1261. its_mono:
  1262. +#ifdef __PIC__
  1263. + movl %ebx,n_blocks@GOTOFF(%esi)
  1264. +#else
  1265. movl %ebx,n_blocks
  1266. +#endif
  1267. /*
  1268. * ebx seg/b
  1269. @@ -384,15 +449,22 @@ its_mono:
  1270. * ebp bl
  1271. */
  1272. movl ARGn(0),%ebx
  1273. +#ifndef __PIC__
  1274. movl dv_videosegment_t_bs(%ebx),%esi
  1275. movl bitstream_t_buf(%esi),%esi
  1276. +#endif
  1277. leal dv_videosegment_t_mb(%ebx),%edi
  1278. movl $0,%eax
  1279. movl $0,%ecx
  1280. macloop:
  1281. +#ifdef __PIC__
  1282. + movl %eax,m@GOTOFF(%esi)
  1283. + movl %ecx,mb_start@GOTOFF(%esi)
  1284. +#else
  1285. movl %eax,m
  1286. movl %ecx,mb_start
  1287. +#endif
  1288. movl ARGn(0),%ebx
  1289. @@ -400,7 +472,13 @@ macloop:
  1290. /* mb->qno = bitstream_get(bs,4); */
  1291. movl %ecx,%edx
  1292. shr $3,%edx
  1293. +#ifdef __PIC__
  1294. + movl dv_videosegment_t_bs(%ebx),%ecx
  1295. + movl bitstream_t_buf(%ecx),%ecx
  1296. + movzbl 3(%ecx,%edx,1),%edx
  1297. +#else
  1298. movzbl 3(%esi,%edx,1),%edx
  1299. +#endif
  1300. andl $0xf,%edx
  1301. movl %edx,dv_macroblock_t_qno(%edi)
  1302. @@ -411,7 +489,11 @@ macloop:
  1303. movl %edx,dv_macroblock_t_eob_count(%edi)
  1304. /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
  1305. +#ifdef __PIC__
  1306. + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
  1307. +#else
  1308. movl dv_super_map_vertical(,%eax,4),%edx
  1309. +#endif
  1310. movl dv_videosegment_t_i(%ebx),%ecx
  1311. addl %ecx,%edx
  1312. @@ -422,11 +504,20 @@ skarly:
  1313. andl $1,%ecx
  1314. shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */
  1315. +#ifdef __PIC__
  1316. + leal mod_10@GOTOFF(%esi),%edx
  1317. + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */
  1318. +#else
  1319. movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */
  1320. +#endif
  1321. movl %edx,dv_macroblock_t_i(%edi)
  1322. /* mb->j = dv_super_map_horizontal[m]; */
  1323. +#ifdef __PIC__
  1324. + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
  1325. +#else
  1326. movl dv_super_map_horizontal(,%eax,4),%edx
  1327. +#endif
  1328. movl %edx,dv_macroblock_t_j(%edi)
  1329. /* mb->k = seg->k; */
  1330. @@ -445,12 +536,28 @@ blkloop:
  1331. +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
  1332. */
  1333. /* dc = bitstream_get(bs,9); */
  1334. +#ifdef __PIC__
  1335. + movl mb_start@GOTOFF(%esi),%ecx
  1336. +#else
  1337. movl mb_start,%ecx
  1338. +#endif
  1339. shr $3,%ecx
  1340. +#ifdef __PIC__
  1341. + movzbl blk_start@GOTOFF(%esi,%ebx),%edx
  1342. +#else
  1343. movzbl blk_start(%ebx),%edx
  1344. +#endif
  1345. addl %ecx,%edx
  1346. +#ifdef __PIC__
  1347. + movl ARGn(0),%ecx
  1348. + movl dv_videosegment_t_bs(%ecx),%ecx
  1349. + movl bitstream_t_buf(%ecx),%ecx
  1350. + movzbl (%ecx,%edx,1),%eax /* hi byte */
  1351. + movzbl 1(%ecx,%edx,1),%ecx /* lo byte */
  1352. +#else
  1353. movzbl (%esi,%edx,1),%eax /* hi byte */
  1354. movzbl 1(%esi,%edx,1),%ecx /* lo byte */
  1355. +#endif
  1356. shll $8,%eax
  1357. orl %ecx,%eax
  1358. @@ -477,7 +584,11 @@ blkloop:
  1359. /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
  1360. shll $6,%eax
  1361. +#ifdef __PIC__
  1362. + leal dv_reorder@GOTOFF+1(%esi,%eax),%eax
  1363. +#else
  1364. addl $(dv_reorder+1),%eax
  1365. +#endif
  1366. movl %eax,dv_block_t_reorder(%ebp)
  1367. /* bl->reorder_sentinel = bl->reorder + 63; */
  1368. @@ -485,13 +596,22 @@ blkloop:
  1369. movl %eax,dv_block_t_reorder_sentinel(%ebp)
  1370. /* bl->offset= mb_start + dv_parse_bit_start[b]; */
  1371. +#ifdef __PIC__
  1372. + movl mb_start@GOTOFF(%esi),%ecx
  1373. + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
  1374. +#else
  1375. movl mb_start,%ecx
  1376. movl dv_parse_bit_start(,%ebx,4),%eax
  1377. +#endif
  1378. addl %ecx,%eax
  1379. movl %eax,dv_block_t_offset(%ebp)
  1380. /* bl->end= mb_start + dv_parse_bit_end[b]; */
  1381. +#ifdef __PIC__
  1382. + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
  1383. +#else
  1384. movl dv_parse_bit_end(,%ebx,4),%eax
  1385. +#endif
  1386. addl %ecx,%eax
  1387. movl %eax,dv_block_t_end(%ebp)
  1388. @@ -503,7 +623,11 @@ blkloop:
  1389. /* no AC pass. Just zero out the remaining coeffs */
  1390. movq dv_block_t_coeffs(%ebp),%mm1
  1391. pxor %mm0,%mm0
  1392. +#ifdef __PIC__
  1393. + pand const_f_0_0_0@GOTOFF(%esi),%mm1
  1394. +#else
  1395. pand const_f_0_0_0,%mm1
  1396. +#endif
  1397. movq %mm1,dv_block_t_coeffs(%ebp)
  1398. movq %mm0,(dv_block_t_coeffs + 8)(%ebp)
  1399. movq %mm0,(dv_block_t_coeffs + 16)(%ebp)
  1400. @@ -528,18 +652,27 @@ do_ac_pass:
  1401. pushl %ebp
  1402. pushl %edi
  1403. pushl %eax
  1404. - call dv_parse_ac_coeffs_pass0
  1405. + call asm_dv_parse_ac_coeffs_pass0
  1406. addl $12,%esp
  1407. done_ac:
  1408. +#ifdef __PIC__
  1409. + movl n_blocks@GOTOFF(%esi),%eax
  1410. +#else
  1411. movl n_blocks,%eax
  1412. +#endif
  1413. addl $dv_block_t_size,%ebp
  1414. incl %ebx
  1415. cmpl %eax,%ebx
  1416. jnz blkloop
  1417. +#ifdef __PIC__
  1418. + movl m@GOTOFF(%esi),%eax
  1419. + movl mb_start@GOTOFF(%esi),%ecx
  1420. +#else
  1421. movl m,%eax
  1422. movl mb_start,%ecx
  1423. +#endif
  1424. addl $(8 * 80),%ecx
  1425. addl $dv_macroblock_t_size,%edi
  1426. incl %eax
  1427. @@ -557,7 +690,7 @@ done_ac:
  1428. andl $DV_QUALITY_AC_MASK,%eax
  1429. cmpl $DV_QUALITY_AC_2,%eax
  1430. - jz dv_parse_ac_coeffs
  1431. + jz asm_dv_parse_ac_coeffs
  1432. movl $0,%eax
  1433. ret