openjpeg-20070821svn-t1-updateflags-x86_64.patch 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. diff -urN -x '*.orig' -x '*.rej' -x '*~' -x '.*' OpenJPEG.orig/libopenjpeg/t1.c OpenJPEG.patched/libopenjpeg/t1.c
  2. --- OpenJPEG.orig/libopenjpeg/t1.c 2007-08-23 05:53:17.000000000 -0500
  3. +++ OpenJPEG.patched/libopenjpeg/t1.c 2007-08-23 05:56:33.000000000 -0500
  4. @@ -45,7 +45,11 @@
  5. static char t1_getspb(int f);
  6. static short t1_getnmsedec_sig(int x, int bitpos);
  7. static short t1_getnmsedec_ref(int x, int bitpos);
  8. +#ifdef __amd64__
  9. +static INLINE void t1_updateflags(flag_t *flagsp, int s, int stride);
  10. +#else
  11. static void t1_updateflags(flag_t *flagsp, int s, int stride);
  12. +#endif
  13. /**
  14. Encode significant pass
  15. */
  16. @@ -258,6 +262,38 @@
  17. return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
  18. }
  19. +#ifdef __amd64__
  20. +
  21. +/* On 64 bit platforms we can set three flags at a time. (SWAR) */
  22. +/* FIXME: Assumes little endian? */
  23. +
  24. +#define VEC(x,y,z) (int64)(x)|((int64)(y)<<16)|((int64)(z)<<32)
  25. +
  26. +static void t1_updateflags(flag_t *flagsp, int s, int stride) {
  27. + static const int64 mod[] = {
  28. + VEC(T1_SIG_SE, T1_SIG_E, T1_SIG_NE),
  29. + VEC(T1_SIG_SE, T1_SIG_E|T1_SGN_E, T1_SIG_NE),
  30. + VEC(T1_SIG_S, T1_SIG, T1_SIG_N),
  31. + VEC(T1_SIG_S|T1_SGN_S, T1_SIG, T1_SIG_N|T1_SGN_N),
  32. + VEC(T1_SIG_SW, T1_SIG_W, T1_SIG_NW),
  33. + VEC(T1_SIG_SW, T1_SIG_W|T1_SGN_W, T1_SIG_NW)
  34. + };
  35. +
  36. + int64 tmp1 = *(int64*)((void*)&flagsp[-1 - stride]);
  37. + int64 tmp2 = *(int64*)((void*)&flagsp[-1 ]);
  38. + int64 tmp3 = *(int64*)((void*)&flagsp[-1 + stride]);
  39. +
  40. + tmp1 |= mod[s];
  41. + tmp2 |= mod[s+2];
  42. + tmp3 |= mod[s+4];
  43. +
  44. + *(int64*)((void*)&flagsp[-1 - stride]) = tmp1;
  45. + *(int64*)((void*)&flagsp[-1 ]) = tmp2;
  46. + *(int64*)((void*)&flagsp[-1 + stride]) = tmp3;
  47. +}
  48. +
  49. +#else
  50. +
  51. static void t1_updateflags(flag_t *flagsp, int s, int stride) {
  52. static const flag_t mod[] = {
  53. T1_SIG_E, T1_SIG_E|T1_SGN_E,
  54. @@ -279,6 +315,8 @@
  55. flagsp[ 1 + stride] |= T1_SIG_NW;
  56. }
  57. +#endif
  58. +
  59. static void t1_enc_sigpass_step(
  60. opj_t1_t *t1,
  61. flag_t *flagsp,
  62. @@ -670,6 +708,8 @@
  63. for (i = 0; i < t1->w; ++i) {
  64. if (k + 3 < t1->h) {
  65. #ifdef __amd64__
  66. + /* 64 bit SWAR */
  67. + /* FIXME: Assumes little endian? */
  68. int64 tmp = *((int64*)&t1->flags[(k+1) + (i+1)*(t1->h+2)]);
  69. if (cblksty & J2K_CCP_CBLKSTY_VSC) {
  70. tmp &= ~((int64)(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S)<<48);
  71. @@ -780,6 +820,11 @@
  72. memset(t1->data,0,datasize * sizeof(int));
  73. flagssize=(h+2) * (w+2);
  74. +#ifdef __amd64__
  75. + /* 64 bit SIMD/SWAR in t1_updateflags requires one short of headroom
  76. + because three shorts = 48 bits. */
  77. + ++flagssize;
  78. +#endif
  79. if(flagssize > t1->flagssize){
  80. opj_aligned_free(t1->flags);