12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- diff -urN -x '*.orig' -x '*.rej' -x '*~' -x '.*' OpenJPEG.orig/libopenjpeg/t1.c OpenJPEG.patched/libopenjpeg/t1.c
- --- OpenJPEG.orig/libopenjpeg/t1.c 2007-08-23 05:53:17.000000000 -0500
- +++ OpenJPEG.patched/libopenjpeg/t1.c 2007-08-23 05:56:33.000000000 -0500
- @@ -45,7 +45,11 @@
- static char t1_getspb(int f);
- static short t1_getnmsedec_sig(int x, int bitpos);
- static short t1_getnmsedec_ref(int x, int bitpos);
- +#ifdef __amd64__
- +static INLINE void t1_updateflags(flag_t *flagsp, int s, int stride);
- +#else
- static void t1_updateflags(flag_t *flagsp, int s, int stride);
- +#endif
- /**
- Encode significant pass
- */
- @@ -258,6 +262,38 @@
- return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
- }
-
- +#ifdef __amd64__
- +
- +/* On 64 bit platforms we can set three flags at a time. (SWAR) */
- +/* FIXME: Assumes little endian? */
- +
- +#define VEC(x,y,z) (int64)(x)|((int64)(y)<<16)|((int64)(z)<<32)
- +
- +static void t1_updateflags(flag_t *flagsp, int s, int stride) {
- + static const int64 mod[] = {
- + VEC(T1_SIG_SE, T1_SIG_E, T1_SIG_NE),
- + VEC(T1_SIG_SE, T1_SIG_E|T1_SGN_E, T1_SIG_NE),
- + VEC(T1_SIG_S, T1_SIG, T1_SIG_N),
- + VEC(T1_SIG_S|T1_SGN_S, T1_SIG, T1_SIG_N|T1_SGN_N),
- + VEC(T1_SIG_SW, T1_SIG_W, T1_SIG_NW),
- + VEC(T1_SIG_SW, T1_SIG_W|T1_SGN_W, T1_SIG_NW)
- + };
- +
- + int64 tmp1 = *(int64*)((void*)&flagsp[-1 - stride]);
- + int64 tmp2 = *(int64*)((void*)&flagsp[-1 ]);
- + int64 tmp3 = *(int64*)((void*)&flagsp[-1 + stride]);
- +
- + tmp1 |= mod[s];
- + tmp2 |= mod[s+2];
- + tmp3 |= mod[s+4];
- +
- + *(int64*)((void*)&flagsp[-1 - stride]) = tmp1;
- + *(int64*)((void*)&flagsp[-1 ]) = tmp2;
- + *(int64*)((void*)&flagsp[-1 + stride]) = tmp3;
- +}
- +
- +#else
- +
- static void t1_updateflags(flag_t *flagsp, int s, int stride) {
- static const flag_t mod[] = {
- T1_SIG_E, T1_SIG_E|T1_SGN_E,
- @@ -279,6 +315,8 @@
- flagsp[ 1 + stride] |= T1_SIG_NW;
- }
-
- +#endif
- +
- static void t1_enc_sigpass_step(
- opj_t1_t *t1,
- flag_t *flagsp,
- @@ -670,6 +708,8 @@
- for (i = 0; i < t1->w; ++i) {
- if (k + 3 < t1->h) {
- #ifdef __amd64__
- + /* 64 bit SWAR */
- + /* FIXME: Assumes little endian? */
- int64 tmp = *((int64*)&t1->flags[(k+1) + (i+1)*(t1->h+2)]);
- if (cblksty & J2K_CCP_CBLKSTY_VSC) {
- tmp &= ~((int64)(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S)<<48);
- @@ -780,6 +820,11 @@
- memset(t1->data,0,datasize * sizeof(int));
-
- flagssize=(h+2) * (w+2);
- +#ifdef __amd64__
- + /* 64 bit SIMD/SWAR in t1_updateflags requires one short of headroom
- + because three shorts = 48 bits. */
- + ++flagssize;
- +#endif
-
- if(flagssize > t1->flagssize){
- opj_aligned_free(t1->flags);
|