summaryrefslogtreecommitdiffstats
path: root/gst/rtjpeg/RTjpeg.c
diff options
context:
space:
mode:
Diffstat (limited to 'gst/rtjpeg/RTjpeg.c')
-rw-r--r--gst/rtjpeg/RTjpeg.c4434
1 files changed, 2223 insertions, 2211 deletions
diff --git a/gst/rtjpeg/RTjpeg.c b/gst/rtjpeg/RTjpeg.c
index ab87fcc7..d2e7b67d 100644
--- a/gst/rtjpeg/RTjpeg.c
+++ b/gst/rtjpeg/RTjpeg.c
@@ -52,38 +52,47 @@ typedef unsigned long long __u64;
#include "mmx.h"
#endif
-static const unsigned char RTjpeg_ZZ[64]={
-0,
-8, 1,
-2, 9, 16,
-24, 17, 10, 3,
-4, 11, 18, 25, 32,
-40, 33, 26, 19, 12, 5,
-6, 13, 20, 27, 34, 41, 48,
-56, 49, 42, 35, 28, 21, 14, 7,
-15, 22, 29, 36, 43, 50, 57,
-58, 51, 44, 37, 30, 23,
-31, 38, 45, 52, 59,
-60, 53, 46, 39,
-47, 54, 61,
-62, 55,
-63 };
-
-static const __u64 RTjpeg_aan_tab[64]={
-4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
-5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL,
-5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL,
-5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL,
-4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
-3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL,
-2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL,
-1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL,
+static const unsigned char RTjpeg_ZZ[64] = {
+ 0,
+ 8, 1,
+ 2, 9, 16,
+ 24, 17, 10, 3,
+ 4, 11, 18, 25, 32,
+ 40, 33, 26, 19, 12, 5,
+ 6, 13, 20, 27, 34, 41, 48,
+ 56, 49, 42, 35, 28, 21, 14, 7,
+ 15, 22, 29, 36, 43, 50, 57,
+ 58, 51, 44, 37, 30, 23,
+ 31, 38, 45, 52, 59,
+ 60, 53, 46, 39,
+ 47, 54, 61,
+ 62, 55,
+ 63
+};
+
+static const __u64 RTjpeg_aan_tab[64] = {
+ 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL,
+ 3374581504ULL, 2324432128ULL, 1184891264ULL,
+ 5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL,
+ 4680582144ULL, 3224107520ULL, 1643641088ULL,
+ 5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL,
+ 4408998912ULL, 3036936960ULL, 1548224000ULL,
+ 5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL,
+ 3968072960ULL, 2733115392ULL, 1393296000ULL,
+ 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL,
+ 3374581504ULL, 2324432128ULL, 1184891264ULL,
+ 3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL,
+ 2651326208ULL, 1826357504ULL, 931136000ULL,
+ 2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL,
+ 1826357504ULL, 1258030336ULL, 641204288ULL,
+ 1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL,
+ 931136000ULL, 641204288ULL, 326894240ULL,
};
#ifndef HAVE_LIBMMX
-static __s32 RTjpeg_ws[64+31];
+static __s32 RTjpeg_ws[64 + 31];
#endif
-__u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32];
+__u8 RTjpeg_alldata[2 * 64 + 4 * 64 + 4 * 64 + 4 * 64 + 4 * 64 + 32];
__s16 *RTjpeg_block;
__s32 *RTjpeg_lqt;
@@ -97,7 +106,7 @@ int RTjpeg_width, RTjpeg_height;
int RTjpeg_Ywidth, RTjpeg_Cwidth;
int RTjpeg_Ysize, RTjpeg_Csize;
-__s16 *RTjpeg_old=NULL;
+__s16 *RTjpeg_old = NULL;
#ifdef HAVE_LIBMMX
mmx_t RTjpeg_lmask;
@@ -106,173 +115,169 @@ mmx_t RTjpeg_cmask;
__u16 RTjpeg_lmask;
__u16 RTjpeg_cmask;
#endif
-int RTjpeg_mtest=0;
+int RTjpeg_mtest = 0;
static const unsigned char RTjpeg_lum_quant_tbl[64] = {
- 16, 11, 10, 16, 24, 40, 51, 61,
- 12, 12, 14, 19, 26, 58, 60, 55,
- 14, 13, 16, 24, 40, 57, 69, 56,
- 14, 17, 22, 29, 51, 87, 80, 62,
- 18, 22, 37, 56, 68, 109, 103, 77,
- 24, 35, 55, 64, 81, 104, 113, 92,
- 49, 64, 78, 87, 103, 121, 120, 101,
- 72, 92, 95, 98, 112, 100, 103, 99
- };
+ 16, 11, 10, 16, 24, 40, 51, 61,
+ 12, 12, 14, 19, 26, 58, 60, 55,
+ 14, 13, 16, 24, 40, 57, 69, 56,
+ 14, 17, 22, 29, 51, 87, 80, 62,
+ 18, 22, 37, 56, 68, 109, 103, 77,
+ 24, 35, 55, 64, 81, 104, 113, 92,
+ 49, 64, 78, 87, 103, 121, 120, 101,
+ 72, 92, 95, 98, 112, 100, 103, 99
+};
static const unsigned char RTjpeg_chrom_quant_tbl[64] = {
- 17, 18, 24, 47, 99, 99, 99, 99,
- 18, 21, 26, 66, 99, 99, 99, 99,
- 24, 26, 56, 99, 99, 99, 99, 99,
- 47, 66, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99
- };
-
-int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
+ 17, 18, 24, 47, 99, 99, 99, 99,
+ 18, 21, 26, 66, 99, 99, 99, 99,
+ 24, 26, 56, 99, 99, 99, 99, 99,
+ 47, 66, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99
+};
+
+int
+RTjpeg_b2s (__s16 * data, __s8 * strm, __u8 bt8)
{
- register int ci, co=1, tmp;
- register __s16 ZZvalue;
-
- strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
-
- for(ci=1; ci<=bt8; ci++)
- {
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- if(ZZvalue>0)
- {
- strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
- }
- else
- {
- strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
- }
- }
-
- for(; ci<64; ci++)
- {
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- if(ZZvalue>0)
- {
- strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue;
- }
- else if(ZZvalue<0)
- {
- strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue;
- }
- else /* compress zeros */
- {
- tmp=ci;
- do
- {
- ci++;
- }
- while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0));
+ register int ci, co = 1, tmp;
+ register __s16 ZZvalue;
+
+ strm[0] =
+ (__u8) (data[RTjpeg_ZZ[0]] > 254) ? 254 : ((data[RTjpeg_ZZ[0]] <
+ 0) ? 0 : data[RTjpeg_ZZ[0]]);
+
+ for (ci = 1; ci <= bt8; ci++) {
+ ZZvalue = data[RTjpeg_ZZ[ci]];
+
+ if (ZZvalue > 0) {
+ strm[co++] = (__s8) (ZZvalue > 127) ? 127 : ZZvalue;
+ } else {
+ strm[co++] = (__s8) (ZZvalue < -128) ? -128 : ZZvalue;
+ }
+ }
+
+ for (; ci < 64; ci++) {
+ ZZvalue = data[RTjpeg_ZZ[ci]];
+
+ if (ZZvalue > 0) {
+ strm[co++] = (__s8) (ZZvalue > 63) ? 63 : ZZvalue;
+ } else if (ZZvalue < 0) {
+ strm[co++] = (__s8) (ZZvalue < -64) ? -64 : ZZvalue;
+ } else { /* compress zeros */
- strm[co++]=(__s8)(63+(ci-tmp));
- ci--;
+ tmp = ci;
+ do {
+ ci++;
+ }
+ while ((ci < 64) && (data[RTjpeg_ZZ[ci]] == 0));
+
+ strm[co++] = (__s8) (63 + (ci - tmp));
+ ci--;
+ }
}
- }
- return (int)co;
+ return (int) co;
}
-int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
+int
+RTjpeg_s2b (__s16 * data, __s8 * strm, __u8 bt8, __u32 * qtbl)
{
- int ci=1, co=1, tmp;
- register int i;
-
- i=RTjpeg_ZZ[0];
- data[i]=((__u8)strm[0])*qtbl[i];
-
- for(co=1; co<=bt8; co++)
- {
- i=RTjpeg_ZZ[co];
- data[i]=strm[ci++]*qtbl[i];
- }
-
- for(; co<64; co++)
- {
- if(strm[ci]>63)
- {
- tmp=co+strm[ci]-63;
- for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0;
- co--;
- } else
- {
- i=RTjpeg_ZZ[co];
- data[i]=strm[ci]*qtbl[i];
+ int ci = 1, co = 1, tmp;
+ register int i;
+
+ i = RTjpeg_ZZ[0];
+ data[i] = ((__u8) strm[0]) * qtbl[i];
+
+ for (co = 1; co <= bt8; co++) {
+ i = RTjpeg_ZZ[co];
+ data[i] = strm[ci++] * qtbl[i];
+ }
+
+ for (; co < 64; co++) {
+ if (strm[ci] > 63) {
+ tmp = co + strm[ci] - 63;
+ for (; co < tmp; co++)
+ data[RTjpeg_ZZ[co]] = 0;
+ co--;
+ } else {
+ i = RTjpeg_ZZ[co];
+ data[i] = strm[ci] * qtbl[i];
+ }
+ ci++;
}
- ci++;
- }
- return (int)ci;
+ return (int) ci;
}
#if defined(HAVE_LIBMMX)
-void RTjpeg_quant_init(void)
+void
+RTjpeg_quant_init (void)
{
- int i;
- __s16 *qtbl;
-
- qtbl=(__s16 *)RTjpeg_lqt;
- for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i];
-
- qtbl=(__s16 *)RTjpeg_cqt;
- for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i];
+ int i;
+ __s16 *qtbl;
+
+ qtbl = (__s16 *) RTjpeg_lqt;
+ for (i = 0; i < 64; i++)
+ qtbl[i] = (__s16) RTjpeg_lqt[i];
+
+ qtbl = (__s16 *) RTjpeg_cqt;
+ for (i = 0; i < 64; i++)
+ qtbl[i] = (__s16) RTjpeg_cqt[i];
}
-static mmx_t RTjpeg_ones=(mmx_t)(long long)0x0001000100010001LL;
-static mmx_t RTjpeg_half=(mmx_t)(long long)0x7fff7fff7fff7fffLL;
+static mmx_t RTjpeg_ones = (mmx_t) (long long) 0x0001000100010001LL;
+static mmx_t RTjpeg_half = (mmx_t) (long long) 0x7fff7fff7fff7fffLL;
-void RTjpeg_quant(__s16 *block, __s32 *qtbl)
+void
+RTjpeg_quant (__s16 * block, __s32 * qtbl)
{
- int i;
- mmx_t *bl, *ql;
-
- ql=(mmx_t *)qtbl;
- bl=(mmx_t *)block;
-
- movq_m2r(RTjpeg_ones, mm6);
- movq_m2r(RTjpeg_half, mm7);
-
- for(i=16; i; i--)
- {
- movq_m2r(*(ql++), mm0); /* quant vals (4) */
- movq_m2r(*bl, mm2); /* block vals (4) */
- movq_r2r(mm0, mm1);
- movq_r2r(mm2, mm3);
-
- punpcklwd_r2r(mm6, mm0); /* 1 qb 1 qa */
- punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */
-
- punpcklwd_r2r(mm7, mm2); /* 32767 bb 32767 ba */
- punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */
-
- pmaddwd_r2r(mm2, mm0); /* 32767+bb*qb 32767+ba*qa */
- pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */
-
- psrad_i2r(16, mm0);
- psrad_i2r(16, mm1);
-
- packssdw_r2r(mm1, mm0);
-
- movq_r2m(mm0, *(bl++));
-
- }
+ int i;
+ mmx_t *bl, *ql;
+
+ ql = (mmx_t *) qtbl;
+ bl = (mmx_t *) block;
+
+ movq_m2r (RTjpeg_ones, mm6);
+ movq_m2r (RTjpeg_half, mm7);
+
+ for (i = 16; i; i--) {
+ movq_m2r (*(ql++), mm0); /* quant vals (4) */
+ movq_m2r (*bl, mm2); /* block vals (4) */
+ movq_r2r (mm0, mm1);
+ movq_r2r (mm2, mm3);
+
+ punpcklwd_r2r (mm6, mm0); /* 1 qb 1 qa */
+ punpckhwd_r2r (mm6, mm1); /* 1 qd 1 qc */
+
+ punpcklwd_r2r (mm7, mm2); /* 32767 bb 32767 ba */
+ punpckhwd_r2r (mm7, mm3); /* 32767 bd 32767 bc */
+
+ pmaddwd_r2r (mm2, mm0); /* 32767+bb*qb 32767+ba*qa */
+ pmaddwd_r2r (mm3, mm1); /* 32767+bd*qd 32767+bc*qc */
+
+ psrad_i2r (16, mm0);
+ psrad_i2r (16, mm1);
+
+ packssdw_r2r (mm1, mm0);
+
+ movq_r2m (mm0, *(bl++));
+
+ }
}
#else
-void RTjpeg_quant_init(void)
+void
+RTjpeg_quant_init (void)
{
}
-void RTjpeg_quant(__s16 *block, __s32 *qtbl)
+void
+RTjpeg_quant (__s16 * block, __s32 * qtbl)
{
- int i;
-
- for(i=0; i<64; i++)
- block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16);
+ int i;
+
+ for (i = 0; i < 64; i++)
+ block[i] = (__s16) ((block[i] * qtbl[i] + 32767) >> 16);
}
#endif
@@ -280,36 +285,37 @@ void RTjpeg_quant(__s16 *block, __s32 *qtbl)
* Perform the forward DCT on one block of samples.
*/
#ifdef HAVE_LIBMMX
-static mmx_t RTjpeg_C4 =(mmx_t)(long long)0x2D412D412D412D41LL;
-static mmx_t RTjpeg_C6 =(mmx_t)(long long)0x187E187E187E187ELL;
-static mmx_t RTjpeg_C2mC6=(mmx_t)(long long)0x22A322A322A322A3LL;
-static mmx_t RTjpeg_C2pC6=(mmx_t)(long long)0x539F539F539F539FLL;
-static mmx_t RTjpeg_zero =(mmx_t)(long long)0x0000000000000000LL;
+static mmx_t RTjpeg_C4 = (mmx_t) (long long) 0x2D412D412D412D41LL;
+static mmx_t RTjpeg_C6 = (mmx_t) (long long) 0x187E187E187E187ELL;
+static mmx_t RTjpeg_C2mC6 = (mmx_t) (long long) 0x22A322A322A322A3LL;
+static mmx_t RTjpeg_C2pC6 = (mmx_t) (long long) 0x539F539F539F539FLL;
+static mmx_t RTjpeg_zero = (mmx_t) (long long) 0x0000000000000000LL;
#else
-#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */
-#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */
-#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */
-#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */
+#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */
+#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */
+#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */
+#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */
#define DESCALE10(x) (__s16)( ((x)+128) >> 8)
#define DESCALE20(x) (__s16)(((x)+32768) >> 16)
#define D_MULTIPLY(var,const) ((__s32) ((var) * (const)))
#endif
-void RTjpeg_dct_init(void)
+void
+RTjpeg_dct_init (void)
{
- int i;
-
- for(i=0; i<64; i++)
- {
- RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]);
- RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]);
- }
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ RTjpeg_lqt[i] = (((__u64) RTjpeg_lqt[i] << 32) / RTjpeg_aan_tab[i]);
+ RTjpeg_cqt[i] = (((__u64) RTjpeg_cqt[i] << 32) / RTjpeg_aan_tab[i]);
+ }
}
-void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
+void
+RTjpeg_dctY (__u8 * idata, __s16 * odata, int rskip)
{
#ifndef HAVE_LIBMMX
__s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
@@ -331,42 +337,42 @@ void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
tmp5 = idataptr[2] - idataptr[5];
tmp3 = idataptr[3] + idataptr[4];
tmp4 = idataptr[3] - idataptr[4];
-
+
tmp10 = (tmp0 + tmp3); /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = (tmp1 + tmp2);
tmp12 = tmp1 - tmp2;
-
- wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */
- wsptr[4] = (tmp10 - tmp11)<<8;
-
- z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
- wsptr[2] = (tmp13<<8) + z1; /* phase 5 */
- wsptr[6] = (tmp13<<8) - z1;
-
+
+ wsptr[0] = (tmp10 + tmp11) << 8; /* phase 3 */
+ wsptr[4] = (tmp10 - tmp11) << 8;
+
+ z1 = D_MULTIPLY (tmp12 + tmp13, FIX_0_707106781); /* c4 */
+ wsptr[2] = (tmp13 << 8) + z1; /* phase 5 */
+ wsptr[6] = (tmp13 << 8) - z1;
+
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
- z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
- z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
- z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
- z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+ z5 = D_MULTIPLY (tmp10 - tmp12, FIX_0_382683433); /* c6 */
+ z2 = D_MULTIPLY (tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+ z4 = D_MULTIPLY (tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+ z3 = D_MULTIPLY (tmp11, FIX_0_707106781); /* c4 */
- z11 = (tmp7<<8) + z3; /* phase 5 */
- z13 = (tmp7<<8) - z3;
+ z11 = (tmp7 << 8) + z3; /* phase 5 */
+ z13 = (tmp7 << 8) - z3;
wsptr[5] = z13 + z2; /* phase 6 */
wsptr[3] = z13 - z2;
wsptr[1] = z11 + z4;
wsptr[7] = z11 - z4;
- idataptr += rskip<<3; /* advance pointer to next row */
+ idataptr += rskip << 3; /* advance pointer to next row */
wsptr += 8;
}
wsptr = RTjpeg_ws;
- odataptr=odata;
+ odataptr = odata;
for (ctr = 7; ctr >= 0; ctr--) {
tmp0 = wsptr[0] + wsptr[56];
tmp7 = wsptr[0] - wsptr[56];
@@ -376,848 +382,848 @@ void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
tmp5 = wsptr[16] - wsptr[40];
tmp3 = wsptr[24] + wsptr[32];
tmp4 = wsptr[24] - wsptr[32];
-
+
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
-
- odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */
- odataptr[32] = DESCALE10(tmp10 - tmp11);
-
- z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
- odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */
- odataptr[48] = DESCALE20((tmp13<<8) - z1);
+
+ odataptr[0] = DESCALE10 (tmp10 + tmp11); /* phase 3 */
+ odataptr[32] = DESCALE10 (tmp10 - tmp11);
+
+ z1 = D_MULTIPLY (tmp12 + tmp13, FIX_0_707106781); /* c4 */
+ odataptr[16] = DESCALE20 ((tmp13 << 8) + z1); /* phase 5 */
+ odataptr[48] = DESCALE20 ((tmp13 << 8) - z1);
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
- z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
- z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
- z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
- z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+ z5 = D_MULTIPLY (tmp10 - tmp12, FIX_0_382683433); /* c6 */
+ z2 = D_MULTIPLY (tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+ z4 = D_MULTIPLY (tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+ z3 = D_MULTIPLY (tmp11, FIX_0_707106781); /* c4 */
- z11 = (tmp7<<8) + z3; /* phase 5 */
- z13 = (tmp7<<8) - z3;
+ z11 = (tmp7 << 8) + z3; /* phase 5 */
+ z13 = (tmp7 << 8) - z3;
- odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */
- odataptr[24] = DESCALE20(z13 - z2);
- odataptr[8] = DESCALE20(z11 + z4);
- odataptr[56] = DESCALE20(z11 - z4);
+ odataptr[40] = DESCALE20 (z13 + z2); /* phase 6 */
+ odataptr[24] = DESCALE20 (z13 - z2);
+ odataptr[8] = DESCALE20 (z11 + z4);
+ odataptr[56] = DESCALE20 (z11 - z4);
odataptr++; /* advance pointer to next column */
wsptr++;
}
#else
mmx_t tmp6, tmp7;
- register mmx_t *dataptr = (mmx_t *)odata;
- mmx_t *idata2 = (mmx_t *)idata;
+ register mmx_t *dataptr = (mmx_t *) odata;
+ mmx_t *idata2 = (mmx_t *) idata;
+
+ /* first copy the input 8 bit to the destination 16 bits */
+
+ movq_m2r (RTjpeg_zero, mm2);
+
- /* first copy the input 8 bit to the destination 16 bits */
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- movq_m2r(RTjpeg_zero, mm2);
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr));
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 1));
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ idata2 += rskip;
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr));
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+1));
-
- idata2 += rskip;
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 2));
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 3));
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+2));
+ idata2 += rskip;
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+3));
-
- idata2 += rskip;
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 4));
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+4));
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 5));
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+5));
-
- idata2 += rskip;
+ idata2 += rskip;
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+6));
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 6));
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+7));
-
- idata2 += rskip;
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 7));
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ idata2 += rskip;
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+8));
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+9));
-
- idata2 += rskip;
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 8));
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 9));
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+10));
+ idata2 += rskip;
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+11));
-
- idata2 += rskip;
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 10));
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+12));
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 11));
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+13));
-
- idata2 += rskip;
+ idata2 += rskip;
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+14));
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 12));
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+15));
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 13));
+
+ idata2 += rskip;
+
+ movq_m2r (*idata2, mm0);
+ movq_r2r (mm0, mm1);
+
+ punpcklbw_r2r (mm2, mm0);
+ movq_r2m (mm0, *(dataptr + 14));
+
+ punpckhbw_r2r (mm2, mm1);
+ movq_r2m (mm1, *(dataptr + 15));
/* Start Transpose to do calculations on rows */
- movq_m2r(*(dataptr+9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into m5 */
+ movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into m5 */
- movq_m2r(*(dataptr+13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
- movq_r2r(mm7, mm5);
+ movq_m2r (*(dataptr + 13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
+ movq_r2r (mm7, mm5);
- punpcklwd_m2r(*(dataptr+11), mm7); /* m11:m01|m10:m00 - interleave first and second lines */
- movq_r2r(mm6, mm2);
+ punpcklwd_m2r (*(dataptr + 11), mm7); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r (mm6, mm2);
- punpcklwd_m2r(*(dataptr+15), mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */
- movq_r2r(mm7, mm1);
+ punpcklwd_m2r (*(dataptr + 15), mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r (mm7, mm1);
- movq_m2r(*(dataptr+11), mm3); /* m13:m13|m11:m10 - second line */
- punpckldq_r2r(mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */
+ movq_m2r (*(dataptr + 11), mm3); /* m13:m13|m11:m10 - second line */
+ punpckldq_r2r (mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */
- movq_m2r(*(dataptr+15), mm0); /* m13:m13|m11:m10 - fourth line */
- punpckhdq_r2r(mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
+ movq_m2r (*(dataptr + 15), mm0); /* m13:m13|m11:m10 - fourth line */
+ punpckhdq_r2r (mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2m(mm7,*(dataptr+9)); /* write result 1 */
- punpckhwd_r2r(mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */
-
- movq_r2m(mm1,*(dataptr+11)); /* write result 2 */
- punpckhwd_r2r(mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ movq_r2m (mm7, *(dataptr + 9)); /* write result 1 */
+ punpckhwd_r2r (mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */
- movq_r2r(mm5, mm1);
- punpckldq_r2r(mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */
+ movq_r2m (mm1, *(dataptr + 11)); /* write result 2 */
+ punpckhwd_r2r (mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */
- movq_m2r(*(dataptr+1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */
- punpckhdq_r2r(mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */
+ movq_r2r (mm5, mm1);
+ punpckldq_r2r (mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */
- movq_r2m(mm5,*(dataptr+13)); /* write result 3 */
+ movq_m2r (*(dataptr + 1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */
+ punpckhdq_r2r (mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */
- /* last 4x4 done */
+ movq_r2m (mm5, *(dataptr + 13)); /* write result 3 */
- movq_r2m(mm1, *(dataptr+15)); /* write result 4, last 4x4 */
+ /* last 4x4 done */
- movq_m2r(*(dataptr+5), mm2); /* m23:m22|m21:m20 - third line */
- movq_r2r(mm0, mm6);
+ movq_r2m (mm1, *(dataptr + 15)); /* write result 4, last 4x4 */
- punpcklwd_m2r(*(dataptr+3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
- movq_r2r(mm2, mm7);
+ movq_m2r (*(dataptr + 5), mm2); /* m23:m22|m21:m20 - third line */
+ movq_r2r (mm0, mm6);
- punpcklwd_m2r(*(dataptr+7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */
- movq_r2r(mm0, mm4);
+ punpcklwd_m2r (*(dataptr + 3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r (mm2, mm7);
-
- movq_m2r(*(dataptr+8), mm1); /* n03:n02|n01:n00 - first line */
- punpckldq_r2r(mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */
+ punpcklwd_m2r (*(dataptr + 7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r (mm0, mm4);
- movq_m2r(*(dataptr+12), mm3); /* n23:n22|n21:n20 - third line */
- punpckhdq_r2r(mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */
- punpckhwd_m2r(*(dataptr+3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */
- movq_r2r(mm1, mm2); /* copy first line */
+ movq_m2r (*(dataptr + 8), mm1); /* n03:n02|n01:n00 - first line */
+ punpckldq_r2r (mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */
- punpckhwd_m2r(*(dataptr+7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */
- movq_r2r(mm6, mm5); /* copy first intermediate result */
+ movq_m2r (*(dataptr + 12), mm3); /* n23:n22|n21:n20 - third line */
+ punpckhdq_r2r (mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */
- movq_r2m(mm0, *(dataptr+8)); /* write result 1 */
- punpckhdq_r2r(mm7, mm5); /* m33:m23|m13:m03 - produce third result */
+ punpckhwd_m2r (*(dataptr + 3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */
+ movq_r2r (mm1, mm2); /* copy first line */
- punpcklwd_m2r(*(dataptr+10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */
- movq_r2r(mm3, mm0); /* copy third line */
+ punpckhwd_m2r (*(dataptr + 7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ movq_r2r (mm6, mm5); /* copy first intermediate result */
- punpckhwd_m2r(*(dataptr+10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */
+ movq_r2m (mm0, *(dataptr + 8)); /* write result 1 */
+ punpckhdq_r2r (mm7, mm5); /* m33:m23|m13:m03 - produce third result */
- movq_r2m(mm4, *(dataptr+10)); /* write result 2 out */
- punpckldq_r2r(mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */
+ punpcklwd_m2r (*(dataptr + 10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */
+ movq_r2r (mm3, mm0); /* copy third line */
- punpcklwd_m2r(*(dataptr+14), mm3); /* n31:n21|n30:n20 - interleave third and fourth lines */
- movq_r2r(mm1, mm4);
+ punpckhwd_m2r (*(dataptr + 10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */
- movq_r2m(mm6, *(dataptr+12)); /* write result 3 out */
- punpckldq_r2r(mm3, mm1); /* n30:n20|n10:n00 - produce first result */
+ movq_r2m (mm4, *(dataptr + 10)); /* write result 2 out */
+ punpckldq_r2r (mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */
- punpckhwd_m2r(*(dataptr+14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */
- movq_r2r(mm2, mm6);
+ punpcklwd_m2r (*(dataptr + 14), mm3); /* n31:n21|n30:n20 - interleave third and fourth lines */
+ movq_r2r (mm1, mm4);
- movq_r2m(mm5, *(dataptr+14)); /* write result 4 out */
- punpckhdq_r2r(mm3, mm4); /* n31:n21|n11:n01- produce second result */
+ movq_r2m (mm6, *(dataptr + 12)); /* write result 3 out */
+ punpckldq_r2r (mm3, mm1); /* n30:n20|n10:n00 - produce first result */
- movq_r2m(mm1, *(dataptr+1)); /* write result 5 out - (first result for other 4 x 4 block) */
- punpckldq_r2r(mm0, mm2); /* n32:n22|n12:n02- produce third result */
+ punpckhwd_m2r (*(dataptr + 14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */
+ movq_r2r (mm2, mm6);
- movq_r2m(mm4, *(dataptr+3)); /* write result 6 out */
- punpckhdq_r2r(mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */
+ movq_r2m (mm5, *(dataptr + 14)); /* write result 4 out */
+ punpckhdq_r2r (mm3, mm4); /* n31:n21|n11:n01- produce second result */
- movq_r2m(mm2, *(dataptr+5)); /* write result 7 out*/
+ movq_r2m (mm1, *(dataptr + 1)); /* write result 5 out - (first result for other 4 x 4 block) */
+ punpckldq_r2r (mm0, mm2); /* n32:n22|n12:n02- produce third result */
- movq_m2r(*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */
+ movq_r2m (mm4, *(dataptr + 3)); /* write result 6 out */
+ punpckhdq_r2r (mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */
- movq_r2m(mm6, *(dataptr+7)); /* write result 8 out */
+ movq_r2m (mm2, *(dataptr + 5)); /* write result 7 out */
+
+ movq_m2r (*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */
+
+ movq_r2m (mm6, *(dataptr + 7)); /* write result 8 out */
/* Do first 4x4 quadrant, which is used in the beginning of the DCT: */
- movq_m2r(*(dataptr+4), mm7); /* m23:m22|m21:m20 - third line */
- movq_r2r(mm0, mm2);
+ movq_m2r (*(dataptr + 4), mm7); /* m23:m22|m21:m20 - third line */
+ movq_r2r (mm0, mm2);
- punpcklwd_m2r(*(dataptr+2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
- movq_r2r(mm7, mm4);
+ punpcklwd_m2r (*(dataptr + 2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r (mm7, mm4);
- punpcklwd_m2r(*(dataptr+6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */
- movq_r2r(mm0, mm1);
+ punpcklwd_m2r (*(dataptr + 6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r (mm0, mm1);
- movq_m2r(*(dataptr+2), mm6); /* m13:m12|m11:m10 - second line */
- punpckldq_r2r(mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */
+ movq_m2r (*(dataptr + 2), mm6); /* m13:m12|m11:m10 - second line */
+ punpckldq_r2r (mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */
- movq_m2r(*(dataptr+6), mm5); /* m33:m32|m31:m30 - fourth line */
- punpckhdq_r2r(mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
+ movq_m2r (*(dataptr + 6), mm5); /* m33:m32|m31:m30 - fourth line */
+ punpckhdq_r2r (mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2r(mm0, mm7); /* write result 1 */
- punpckhwd_r2r(mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */
+ movq_r2r (mm0, mm7); /* write result 1 */
+ punpckhwd_r2r (mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */
- psubw_m2r(*(dataptr+14), mm7); /* tmp07=x0-x7: Stage 1 */
- movq_r2r(mm1, mm6); /* write result 2 */
+ psubw_m2r (*(dataptr + 14), mm7); /* tmp07=x0-x7: Stage 1 */
+ movq_r2r (mm1, mm6); /* write result 2 */
- paddw_m2r(*(dataptr+14), mm0); /* tmp00=x0+x7: Stage 1 */
- punpckhwd_r2r(mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ paddw_m2r (*(dataptr + 14), mm0); /* tmp00=x0+x7: Stage 1 */
+ punpckhwd_r2r (mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */
- paddw_m2r(*(dataptr+12), mm1); /* tmp01=x1+x6: Stage 1 */
- movq_r2r(mm2, mm3); /* copy first intermediate result */
+ paddw_m2r (*(dataptr + 12), mm1); /* tmp01=x1+x6: Stage 1 */
+ movq_r2r (mm2, mm3); /* copy first intermediate result */
- psubw_m2r(*(dataptr+12), mm6); /* tmp06=x1-x6: Stage 1 */
- punpckldq_r2r(mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */
+ psubw_m2r (*(dataptr + 12), mm6); /* tmp06=x1-x6: Stage 1 */
+ punpckldq_r2r (mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */
- movq_r2m(mm7, tmp7);
- movq_r2r(mm2, mm5); /* write result 3 */
+ movq_r2m (mm7, tmp7);
+ movq_r2r (mm2, mm5); /* write result 3 */
- movq_r2m(mm6, tmp6);
- punpckhdq_r2r(mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */
+ movq_r2m (mm6, tmp6);
+ punpckhdq_r2r (mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */
- paddw_m2r(*(dataptr+10), mm2); /* tmp02=x2+5: Stage 1 */
- movq_r2r(mm3, mm4); /* write result 4 */
+ paddw_m2r (*(dataptr + 10), mm2); /* tmp02=x2+5: Stage 1 */
+ movq_r2r (mm3, mm4); /* write result 4 */
/************************************************************************************************
End of Transpose
************************************************************************************************/
- paddw_m2r(*(dataptr+8), mm3); /* tmp03=x3+x4: stage 1 */
- movq_r2r(mm0, mm7);
+ paddw_m2r (*(dataptr + 8), mm3); /* tmp03=x3+x4: stage 1 */
+ movq_r2r (mm0, mm7);
- psubw_m2r(*(dataptr+8), mm4); /* tmp04=x3-x4: stage 1 */
- movq_r2r(mm1, mm6);
+ psubw_m2r (*(dataptr + 8), mm4); /* tmp04=x3-x4: stage 1 */
+ movq_r2r (mm1, mm6);
- paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */
- psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */
+ paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */
+ psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */
- psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */
- paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */
+ psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */
+ paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */
- psubw_m2r(*(dataptr+10), mm5); /* tmp05=x2-x5: stage 1 */
- paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
+ psubw_m2r (*(dataptr + 10), mm5); /* tmp05=x2-x5: stage 1 */
+ paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */
- /* stage 3 */
+ /* stage 3 */
- movq_m2r(tmp6, mm2);
- movq_r2r(mm0, mm3);
+ movq_m2r (tmp6, mm2);
+ movq_r2r (mm0, mm3);
- psllw_i2r(2, mm6); /* m8 * 2^2 */
- paddw_r2r(mm1, mm0);
+ psllw_i2r (2, mm6); /* m8 * 2^2 */
+ paddw_r2r (mm1, mm0);
- pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
- psubw_r2r(mm1, mm3);
+ pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */
+ psubw_r2r (mm1, mm3);
- movq_r2m(mm0, *dataptr);
- movq_r2r(mm7, mm0);
-
- /* Odd part */
- movq_r2m(mm3, *(dataptr+8));
- paddw_r2r(mm5, mm4); /* tmp10 */
+ movq_r2m (mm0, *dataptr);
+ movq_r2r (mm7, mm0);
- movq_m2r(tmp7, mm3);
- paddw_r2r(mm6, mm0); /* tmp32 */
+ /* Odd part */
+ movq_r2m (mm3, *(dataptr + 8));
+ paddw_r2r (mm5, mm4); /* tmp10 */
- paddw_r2r(mm2, mm5); /* tmp11 */
- psubw_r2r(mm6, mm7); /* tmp33 */
+ movq_m2r (tmp7, mm3);
+ paddw_r2r (mm6, mm0); /* tmp32 */
- movq_r2m(mm0, *(dataptr+4));
- paddw_r2r(mm3, mm2); /* tmp12 */
+ paddw_r2r (mm2, mm5); /* tmp11 */
+ psubw_r2r (mm6, mm7); /* tmp33 */
- /* stage 4 */
+ movq_r2m (mm0, *(dataptr + 4));
+ paddw_r2r (mm3, mm2); /* tmp12 */
- movq_r2m(mm7, *(dataptr+12));
- movq_r2r(mm4, mm1); /* copy of tmp10 */
+ /* stage 4 */
- psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
- psllw_i2r(2, mm4); /* m8 * 2^2 */
+ movq_r2m (mm7, *(dataptr + 12));
+ movq_r2r (mm4, mm1); /* copy of tmp10 */
- movq_m2r(RTjpeg_C2mC6, mm0);
- psllw_i2r(2, mm1);
+ psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r (2, mm4); /* m8 * 2^2 */
- pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
- psllw_i2r(2, mm2);
+ movq_m2r (RTjpeg_C2mC6, mm0);
+ psllw_i2r (2, mm1);
- pmulhw_r2r(mm0, mm4); /* z5 */
+ pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */
+ psllw_i2r (2, mm2);
- /* stage 5 */
+ pmulhw_r2r (mm0, mm4); /* z5 */
- pmulhw_m2r(RTjpeg_C2pC6, mm2);
- psllw_i2r(2, mm5);
+ /* stage 5 */
- pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
- movq_r2r(mm3, mm0); /* copy tmp7 */
+ pmulhw_m2r (RTjpeg_C2pC6, mm2);
+ psllw_i2r (2, mm5);
- movq_m2r(*(dataptr+1), mm7);
- paddw_r2r(mm1, mm4); /* z2 */
+ pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */
+ movq_r2r (mm3, mm0); /* copy tmp7 */
- paddw_r2r(mm1, mm2); /* z4 */
+ movq_m2r (*(dataptr + 1), mm7);
+ paddw_r2r (mm1, mm4); /* z2 */
- paddw_r2r(mm5, mm0); /* z11 */
- psubw_r2r(mm5, mm3); /* z13 */
+ paddw_r2r (mm1, mm2); /* z4 */
- /* stage 6 */
+ paddw_r2r (mm5, mm0); /* z11 */
+ psubw_r2r (mm5, mm3); /* z13 */
- movq_r2r(mm3, mm5); /* copy z13 */
- psubw_r2r(mm4, mm3); /* y3=z13 - z2 */
+ /* stage 6 */
- paddw_r2r(mm4, mm5); /* y5=z13 + z2 */
- movq_r2r(mm0, mm6); /* copy z11 */
+ movq_r2r (mm3, mm5); /* copy z13 */
+ psubw_r2r (mm4, mm3); /* y3=z13 - z2 */
- movq_r2m(mm3, *(dataptr+6)); /*save y3 */
- psubw_r2r(mm2, mm0); /* y7=z11 - z4 */
+ paddw_r2r (mm4, mm5); /* y5=z13 + z2 */
+ movq_r2r (mm0, mm6); /* copy z11 */
- movq_r2m(mm5, *(dataptr+10)); /*save y5 */
- paddw_r2r(mm2, mm6); /* y1=z11 + z4 */
+ movq_r2m (mm3, *(dataptr + 6)); /*save y3 */
+ psubw_r2r (mm2, mm0); /* y7=z11 - z4 */
- movq_r2m(mm0, *(dataptr+14)); /*save y7 */
+ movq_r2m (mm5, *(dataptr + 10)); /*save y5 */
+ paddw_r2r (mm2, mm6); /* y1=z11 + z4 */
+
+ movq_r2m (mm0, *(dataptr + 14)); /*save y7 */
/************************************************
* End of 1st 4 rows
************************************************/
- movq_m2r(*(dataptr+3), mm1); /* load x1: stage 1 */
- movq_r2r(mm7, mm0); /* copy x0 */
+ movq_m2r (*(dataptr + 3), mm1); /* load x1: stage 1 */
+ movq_r2r (mm7, mm0); /* copy x0 */
+
+ movq_r2m (mm6, *(dataptr + 2)); /*save y1 */
- movq_r2m(mm6, *(dataptr+2)); /*save y1 */
+ movq_m2r (*(dataptr + 5), mm2); /* load x2: stage 1 */
+ movq_r2r (mm1, mm6); /* copy x1 */
- movq_m2r(*(dataptr+5), mm2); /* load x2: stage 1 */
- movq_r2r(mm1, mm6); /* copy x1 */
+ paddw_m2r (*(dataptr + 15), mm0); /* tmp00 = x0 + x7 */
- paddw_m2r(*(dataptr+15), mm0); /* tmp00 = x0 + x7 */
+ movq_m2r (*(dataptr + 7), mm3); /* load x3 : stage 1 */
+ movq_r2r (mm2, mm5); /* copy x2 */
- movq_m2r(*(dataptr+7), mm3); /* load x3 : stage 1 */
- movq_r2r(mm2, mm5); /* copy x2 */
+ psubw_m2r (*(dataptr + 15), mm7); /* tmp07 = x0 - x7 */
+ movq_r2r (mm3, mm4); /* copy x3 */
- psubw_m2r(*(dataptr+15), mm7); /* tmp07 = x0 - x7 */
- movq_r2r(mm3, mm4); /* copy x3 */
+ paddw_m2r (*(dataptr + 13), mm1); /* tmp01 = x1 + x6 */
- paddw_m2r(*(dataptr+13), mm1); /* tmp01 = x1 + x6 */
+ movq_r2m (mm7, tmp7); /* save tmp07 */
+ movq_r2r (mm0, mm7); /* copy tmp00 */
- movq_r2m(mm7, tmp7); /* save tmp07 */
- movq_r2r(mm0, mm7); /* copy tmp00 */
+ psubw_m2r (*(dataptr + 13), mm6); /* tmp06 = x1 - x6 */
- psubw_m2r(*(dataptr+13), mm6); /* tmp06 = x1 - x6 */
+ /* stage 2, Even Part */
- /* stage 2, Even Part */
+ paddw_m2r (*(dataptr + 9), mm3); /* tmp03 = x3 + x4 */
- paddw_m2r(*(dataptr+9), mm3); /* tmp03 = x3 + x4 */
+ movq_r2m (mm6, tmp6); /* save tmp07 */
+ movq_r2r (mm1, mm6); /* copy tmp01 */
- movq_r2m(mm6, tmp6); /* save tmp07 */
- movq_r2r(mm1, mm6); /* copy tmp01 */
+ paddw_m2r (*(dataptr + 11), mm2); /* tmp02 = x2 + x5 */
+ paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03 */
- paddw_m2r(*(dataptr+11), mm2); /* tmp02 = x2 + x5 */
- paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03 */
+ psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03 */
- psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03 */
+ psubw_m2r (*(dataptr + 9), mm4); /* tmp04 = x3 - x4 */
+ psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02 */
- psubw_m2r(*(dataptr+9), mm4); /* tmp04 = x3 - x4 */
- psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02 */
+ paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02 */
- paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02 */
+ psubw_m2r (*(dataptr + 11), mm5); /* tmp05 = x2 - x5 */
+ paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */
- psubw_m2r(*(dataptr+11), mm5); /* tmp05 = x2 - x5 */
- paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
+ /* stage 3, Even and stage 4 & 5 even */
- /* stage 3, Even and stage 4 & 5 even */
+ movq_m2r (tmp6, mm2); /* load tmp6 */
+ movq_r2r (mm0, mm3); /* copy tmp10 */
- movq_m2r(tmp6, mm2); /* load tmp6 */
- movq_r2r(mm0, mm3); /* copy tmp10 */
+ psllw_i2r (2, mm6); /* shift z1 */
+ paddw_r2r (mm1, mm0); /* y0=tmp10 + tmp11 */
- psllw_i2r(2, mm6); /* shift z1 */
- paddw_r2r(mm1, mm0); /* y0=tmp10 + tmp11 */
+ pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */
+ psubw_r2r (mm1, mm3); /* y4=tmp10 - tmp11 */
- pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
- psubw_r2r(mm1, mm3); /* y4=tmp10 - tmp11 */
+ movq_r2m (mm0, *(dataptr + 1)); /*save y0 */
+ movq_r2r (mm7, mm0); /* copy tmp13 */
- movq_r2m(mm0, *(dataptr+1)); /*save y0 */
- movq_r2r(mm7, mm0); /* copy tmp13 */
-
- /* odd part */
+ /* odd part */
- movq_r2m(mm3, *(dataptr+9)); /*save y4 */
- paddw_r2r(mm5, mm4); /* tmp10 = tmp4 + tmp5 */
+ movq_r2m (mm3, *(dataptr + 9)); /*save y4 */
+ paddw_r2r (mm5, mm4); /* tmp10 = tmp4 + tmp5 */
- movq_m2r(tmp7, mm3); /* load tmp7 */
- paddw_r2r(mm6, mm0); /* tmp32 = tmp13 + z1 */
+ movq_m2r (tmp7, mm3); /* load tmp7 */
+ paddw_r2r (mm6, mm0); /* tmp32 = tmp13 + z1 */
- paddw_r2r(mm2, mm5); /* tmp11 = tmp5 + tmp6 */
- psubw_r2r(mm6, mm7); /* tmp33 = tmp13 - z1 */
+ paddw_r2r (mm2, mm5); /* tmp11 = tmp5 + tmp6 */
+ psubw_r2r (mm6, mm7); /* tmp33 = tmp13 - z1 */
- movq_r2m(mm0, *(dataptr+5)); /*save y2 */
- paddw_r2r(mm3, mm2); /* tmp12 = tmp6 + tmp7 */
+ movq_r2m (mm0, *(dataptr + 5)); /*save y2 */
+ paddw_r2r (mm3, mm2); /* tmp12 = tmp6 + tmp7 */
- /* stage 4 */
+ /* stage 4 */
- movq_r2m(mm7, *(dataptr+13)); /*save y6 */
- movq_r2r(mm4, mm1); /* copy tmp10 */
+ movq_r2m (mm7, *(dataptr + 13)); /*save y6 */
+ movq_r2r (mm4, mm1); /* copy tmp10 */
- psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
- psllw_i2r(2, mm4); /* shift tmp10 */
+ psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r (2, mm4); /* shift tmp10 */
- movq_m2r(RTjpeg_C2mC6, mm0); /* load C2mC6 */
- psllw_i2r(2, mm1); /* shift (tmp10-tmp12) */
+ movq_m2r (RTjpeg_C2mC6, mm0); /* load C2mC6 */
+ psllw_i2r (2, mm1); /* shift (tmp10-tmp12) */
- pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
- psllw_i2r(2, mm5); /* prepare for multiply */
+ pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */
+ psllw_i2r (2, mm5); /* prepare for multiply */
- pmulhw_r2r(mm0, mm4); /* multiply by converted real */
+ pmulhw_r2r (mm0, mm4); /* multiply by converted real */
- /* stage 5 */
+ /* stage 5 */
- pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
- psllw_i2r(2, mm2); /* prepare for multiply */
+ pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */
+ psllw_i2r (2, mm2); /* prepare for multiply */
- pmulhw_m2r(RTjpeg_C2pC6, mm2); /* multiply */
- movq_r2r(mm3, mm0); /* copy tmp7 */
+ pmulhw_m2r (RTjpeg_C2pC6, mm2); /* multiply */
+ movq_r2r (mm3, mm0); /* copy tmp7 */
- movq_m2r(*(dataptr+9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
- paddw_r2r(mm1, mm4); /* z2 */
+ movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
+ paddw_r2r (mm1, mm4); /* z2 */
- paddw_r2r(mm5, mm0); /* z11 */
- psubw_r2r(mm5, mm3); /* z13 */
+ paddw_r2r (mm5, mm0); /* z11 */
+ psubw_r2r (mm5, mm3); /* z13 */
- /* stage 6 */
+ /* stage 6 */
- movq_r2r(mm3, mm5); /* copy z13 */
- paddw_r2r(mm1, mm2); /* z4 */
+ movq_r2r (mm3, mm5); /* copy z13 */
+ paddw_r2r (mm1, mm2); /* z4 */
- movq_r2r(mm0, mm6); /* copy z11 */
- psubw_r2r(mm4, mm5); /* y3 */
+ movq_r2r (mm0, mm6); /* copy z11 */
+ psubw_r2r (mm4, mm5); /* y3 */
- paddw_r2r(mm2, mm6); /* y1 */
- paddw_r2r(mm4, mm3); /* y5 */
+ paddw_r2r (mm2, mm6); /* y1 */
+ paddw_r2r (mm4, mm3); /* y5 */
- movq_r2m(mm5, *(dataptr+7)); /*save y3 */
+ movq_r2m (mm5, *(dataptr + 7)); /*save y3 */
+
+ movq_r2m (mm6, *(dataptr + 3)); /*save y1 */
+ psubw_r2r (mm2, mm0); /* y7 */
- movq_r2m(mm6, *(dataptr+3)); /*save y1 */
- psubw_r2r(mm2, mm0); /* y7 */
-
/************************************************************************************************
Start of Transpose
************************************************************************************************/
- movq_m2r(*(dataptr+13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
- movq_r2r(mm7, mm5); /* copy first line */
+ movq_m2r (*(dataptr + 13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
+ movq_r2r (mm7, mm5); /* copy first line */
- punpcklwd_r2r(mm3, mm7); /* m11:m01|m10:m00 - interleave first and second lines */
- movq_r2r(mm6, mm2); /* copy third line */
+ punpcklwd_r2r (mm3, mm7); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r (mm6, mm2); /* copy third line */
- punpcklwd_r2r(mm0, mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */
- movq_r2r(mm7, mm1); /* copy first intermediate result */
+ punpcklwd_r2r (mm0, mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r (mm7, mm1); /* copy first intermediate result */
- punpckldq_r2r(mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */
+ punpckldq_r2r (mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */
- punpckhdq_r2r(mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
+ punpckhdq_r2r (mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2m(mm7, *(dataptr+9)); /* write result 1 */
- punpckhwd_r2r(mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */
+ movq_r2m (mm7, *(dataptr + 9)); /* write result 1 */
+ punpckhwd_r2r (mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */
- movq_r2m(mm1, *(dataptr+11)); /* write result 2 */
- punpckhwd_r2r(mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ movq_r2m (mm1, *(dataptr + 11)); /* write result 2 */
+ punpckhwd_r2r (mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */
- movq_r2r(mm5, mm1); /* copy first intermediate result */
- punpckldq_r2r(mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */
+ movq_r2r (mm5, mm1); /* copy first intermediate result */
+ punpckldq_r2r (mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */
- movq_m2r(*(dataptr+1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */
- punpckhdq_r2r(mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */
+ movq_m2r (*(dataptr + 1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */
+ punpckhdq_r2r (mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */
- movq_r2m(mm5, *(dataptr+13)); /* write result 3 */
+ movq_r2m (mm5, *(dataptr + 13)); /* write result 3 */
/****** last 4x4 done */
- movq_r2m(mm1, *(dataptr+15)); /* write result 4, last 4x4 */
+ movq_r2m (mm1, *(dataptr + 15)); /* write result 4, last 4x4 */
+
+ movq_m2r (*(dataptr + 5), mm2); /* m23:m22|m21:m20 - third line */
+ movq_r2r (mm0, mm6); /* copy first line */
- movq_m2r(*(dataptr+5), mm2); /* m23:m22|m21:m20 - third line */
- movq_r2r(mm0, mm6); /* copy first line */
+ punpcklwd_m2r (*(dataptr + 3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r (mm2, mm7); /* copy third line */
- punpcklwd_m2r(*(dataptr+3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
- movq_r2r(mm2, mm7); /* copy third line */
+ punpcklwd_m2r (*(dataptr + 7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r (mm0, mm4); /* copy first intermediate result */
- punpcklwd_m2r(*(dataptr+7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */
- movq_r2r(mm0, mm4); /* copy first intermediate result */
-
- movq_m2r(*(dataptr+8), mm1); /* n03:n02|n01:n00 - first line */
- punpckldq_r2r(mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */
+ movq_m2r (*(dataptr + 8), mm1); /* n03:n02|n01:n00 - first line */
+ punpckldq_r2r (mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */
- movq_m2r(*(dataptr+12), mm3); /* n23:n22|n21:n20 - third line */
- punpckhdq_r2r(mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */
+ movq_m2r (*(dataptr + 12), mm3); /* n23:n22|n21:n20 - third line */
+ punpckhdq_r2r (mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */
- punpckhwd_m2r(*(dataptr+3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */
- movq_r2r(mm1, mm2); /* copy first line */
+ punpckhwd_m2r (*(dataptr + 3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */
+ movq_r2r (mm1, mm2); /* copy first line */
- punpckhwd_m2r(*(dataptr+7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */
- movq_r2r(mm6, mm5); /* copy first intermediate result */
+ punpckhwd_m2r (*(dataptr + 7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ movq_r2r (mm6, mm5); /* copy first intermediate result */
- movq_r2m(mm0, *(dataptr+8)); /* write result 1 */
- punpckhdq_r2r(mm7, mm5); /* m33:m23|m13:m03 - produce third result */
+ movq_r2m (mm0, *(dataptr + 8)); /* write result 1 */
+ punpckhdq_r2r (mm7, mm5); /* m33:m23|m13:m03 - produce third result */
- punpcklwd_m2r(*(dataptr+10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */
- movq_r2r(mm3, mm0); /* copy third line */
+ punpcklwd_m2r (*(dataptr + 10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */
+ movq_r2r (mm3, mm0); /* copy third line */
- punpckhwd_m2r(*(dataptr+10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */
+ punpckhwd_m2r (*(dataptr + 10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */
- movq_r2m(mm4, *(dataptr+10)); /* write result 2 out */
- punpckldq_r2r(mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */
+ movq_r2m (mm4, *(dataptr + 10)); /* write result 2 out */
+ punpckldq_r2r (mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */
- punpcklwd_m2r(*(dataptr+14), mm3); /* n33:n23|n32:n22 - interleave third and fourth lines */
- movq_r2r(mm1, mm4); /* copy second intermediate result */
+ punpcklwd_m2r (*(dataptr + 14), mm3); /* n33:n23|n32:n22 - interleave third and fourth lines */
+ movq_r2r (mm1, mm4); /* copy second intermediate result */
- movq_r2m(mm6, *(dataptr+12)); /* write result 3 out */
- punpckldq_r2r(mm3, mm1); /* */
+ movq_r2m (mm6, *(dataptr + 12)); /* write result 3 out */
+ punpckldq_r2r (mm3, mm1); /* */
- punpckhwd_m2r(*(dataptr+14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */
- movq_r2r(mm2, mm6); /* copy second intermediate result */
+ punpckhwd_m2r (*(dataptr + 14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */
+ movq_r2r (mm2, mm6); /* copy second intermediate result */
- movq_r2m(mm5, *(dataptr+14)); /* write result 4 out */
- punpckhdq_r2r(mm3, mm4); /* n31:n21|n11:n01- produce second result */
+ movq_r2m (mm5, *(dataptr + 14)); /* write result 4 out */
+ punpckhdq_r2r (mm3, mm4); /* n31:n21|n11:n01- produce second result */
- movq_r2m(mm1, *(dataptr+1)); /* write result 5 out - (first result for other 4 x 4 block) */
- punpckldq_r2r(mm0, mm2); /* n32:n22|n12:n02- produce third result */
+ movq_r2m (mm1, *(dataptr + 1)); /* write result 5 out - (first result for other 4 x 4 block) */
+ punpckldq_r2r (mm0, mm2); /* n32:n22|n12:n02- produce third result */
- movq_r2m(mm4, *(dataptr+3)); /* write result 6 out */
- punpckhdq_r2r(mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */
+ movq_r2m (mm4, *(dataptr + 3)); /* write result 6 out */
+ punpckhdq_r2r (mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */
- movq_r2m(mm2, *(dataptr+5)); /* write result 7 out */
+ movq_r2m (mm2, *(dataptr + 5)); /* write result 7 out */
- movq_m2r(*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */
+ movq_m2r (*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */
- movq_r2m(mm6, *(dataptr+7)); /* write result 8 out */
+ movq_r2m (mm6, *(dataptr + 7)); /* write result 8 out */
/* Do first 4x4 quadrant, which is used in the beginning of the DCT: */
- movq_m2r(*(dataptr+4), mm7); /* m23:m22|m21:m20 - third line */
- movq_r2r(mm0, mm2); /* copy first line */
+ movq_m2r (*(dataptr + 4), mm7); /* m23:m22|m21:m20 - third line */
+ movq_r2r (mm0, mm2); /* copy first line */
+
+ punpcklwd_m2r (*(dataptr + 2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r (mm7, mm4); /* copy third line */
- punpcklwd_m2r(*(dataptr+2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
- movq_r2r(mm7, mm4); /* copy third line */
-
- punpcklwd_m2r(*(dataptr+6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */
- movq_r2r(mm0, mm1); /* copy first intermediate result */
+ punpcklwd_m2r (*(dataptr + 6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r (mm0, mm1); /* copy first intermediate result */
- movq_m2r(*(dataptr+2), mm6); /* m13:m12|m11:m10 - second line */
- punpckldq_r2r(mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */
+ movq_m2r (*(dataptr + 2), mm6); /* m13:m12|m11:m10 - second line */
+ punpckldq_r2r (mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */
- movq_m2r(*(dataptr+6), mm5); /* m33:m32|m31:m30 - fourth line */
- punpckhdq_r2r(mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
+ movq_m2r (*(dataptr + 6), mm5); /* m33:m32|m31:m30 - fourth line */
+ punpckhdq_r2r (mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2r(mm0, mm7); /* write result 1 */
- punpckhwd_r2r(mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */
+ movq_r2r (mm0, mm7); /* write result 1 */
+ punpckhwd_r2r (mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */
- psubw_m2r(*(dataptr+14), mm7); /* tmp07=x0-x7: Stage 1 */
- movq_r2r(mm1, mm6); /* write result 2 */
+ psubw_m2r (*(dataptr + 14), mm7); /* tmp07=x0-x7: Stage 1 */
+ movq_r2r (mm1, mm6); /* write result 2 */
- paddw_m2r(*(dataptr+14), mm0); /* tmp00=x0+x7: Stage 1 */
- punpckhwd_r2r(mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ paddw_m2r (*(dataptr + 14), mm0); /* tmp00=x0+x7: Stage 1 */
+ punpckhwd_r2r (mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */
- paddw_m2r(*(dataptr+12), mm1); /* tmp01=x1+x6: Stage 1 */
- movq_r2r(mm2, mm3); /* copy first intermediate result */
+ paddw_m2r (*(dataptr + 12), mm1); /* tmp01=x1+x6: Stage 1 */
+ movq_r2r (mm2, mm3); /* copy first intermediate result */
- psubw_m2r(*(dataptr+12), mm6); /* tmp06=x1-x6: Stage 1 */
- punpckldq_r2r(mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */
+ psubw_m2r (*(dataptr + 12), mm6); /* tmp06=x1-x6: Stage 1 */
+ punpckldq_r2r (mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */
- movq_r2m(mm7, tmp7); /* save tmp07 */
- movq_r2r(mm2, mm5); /* write result 3 */
+ movq_r2m (mm7, tmp7); /* save tmp07 */
+ movq_r2r (mm2, mm5); /* write result 3 */
- movq_r2m(mm6, tmp6); /* save tmp06 */
+ movq_r2m (mm6, tmp6); /* save tmp06 */
- punpckhdq_r2r(mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */
+ punpckhdq_r2r (mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */
- paddw_m2r(*(dataptr+10), mm2); /* tmp02=x2+x5: stage 1 */
- movq_r2r(mm3, mm4); /* write result 4 */
+ paddw_m2r (*(dataptr + 10), mm2); /* tmp02=x2+x5: stage 1 */
+ movq_r2r (mm3, mm4); /* write result 4 */
/************************************************************************************************
End of Transpose 2
************************************************************************************************/
- paddw_m2r(*(dataptr+8), mm3); /* tmp03=x3+x4: stage 1 */
- movq_r2r(mm0, mm7);
+ paddw_m2r (*(dataptr + 8), mm3); /* tmp03=x3+x4: stage 1 */
+ movq_r2r (mm0, mm7);
- psubw_m2r(*(dataptr+8), mm4); /* tmp04=x3-x4: stage 1 */
- movq_r2r(mm1, mm6);
+ psubw_m2r (*(dataptr + 8), mm4); /* tmp04=x3-x4: stage 1 */
+ movq_r2r (mm1, mm6);
- paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */
- psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */
+ paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */
+ psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */
- psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */
- paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */
+ psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */
+ paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */
- psubw_m2r(*(dataptr+10), mm5); /* tmp05=x2-x5: stage 1 */
- paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
+ psubw_m2r (*(dataptr + 10), mm5); /* tmp05=x2-x5: stage 1 */
+ paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */
- /* stage 3 */
+ /* stage 3 */
- movq_m2r(tmp6, mm2);
- movq_r2r(mm0, mm3);
+ movq_m2r (tmp6, mm2);
+ movq_r2r (mm0, mm3);
- psllw_i2r(2, mm6); /* m8 * 2^2 */
- paddw_r2r(mm1, mm0);
+ psllw_i2r (2, mm6); /* m8 * 2^2 */
+ paddw_r2r (mm1, mm0);
- pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
- psubw_r2r(mm1, mm3);
+ pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */
+ psubw_r2r (mm1, mm3);
- movq_r2m(mm0, *dataptr);
- movq_r2r(mm7, mm0);
-
- /* Odd part */
- movq_r2m(mm3, *(dataptr+8));
- paddw_r2r(mm5, mm4); /* tmp10 */
+ movq_r2m (mm0, *dataptr);
+ movq_r2r (mm7, mm0);
- movq_m2r(tmp7, mm3);
- paddw_r2r(mm6, mm0); /* tmp32 */
+ /* Odd part */
+ movq_r2m (mm3, *(dataptr + 8));
+ paddw_r2r (mm5, mm4); /* tmp10 */
- paddw_r2r(mm2, mm5); /* tmp11 */
- psubw_r2r(mm6, mm7); /* tmp33 */
+ movq_m2r (tmp7, mm3);
+ paddw_r2r (mm6, mm0); /* tmp32 */
- movq_r2m(mm0, *(dataptr+4));
- paddw_r2r(mm3, mm2); /* tmp12 */
+ paddw_r2r (mm2, mm5); /* tmp11 */
+ psubw_r2r (mm6, mm7); /* tmp33 */
- /* stage 4 */
- movq_r2m(mm7, *(dataptr+12));
- movq_r2r(mm4, mm1); /* copy of tmp10 */
+ movq_r2m (mm0, *(dataptr + 4));
+ paddw_r2r (mm3, mm2); /* tmp12 */
- psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
- psllw_i2r(2, mm4); /* m8 * 2^2 */
+ /* stage 4 */
+ movq_r2m (mm7, *(dataptr + 12));
+ movq_r2r (mm4, mm1); /* copy of tmp10 */
- movq_m2r(RTjpeg_C2mC6, mm0);
- psllw_i2r(2, mm1);
+ psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r (2, mm4); /* m8 * 2^2 */
- pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
- psllw_i2r(2, mm2);
+ movq_m2r (RTjpeg_C2mC6, mm0);
+ psllw_i2r (2, mm1);
- pmulhw_r2r(mm0, mm4); /* z5 */
+ pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */
+ psllw_i2r (2, mm2);
- /* stage 5 */
+ pmulhw_r2r (mm0, mm4); /* z5 */
- pmulhw_m2r(RTjpeg_C2pC6, mm2);
- psllw_i2r(2, mm5);
+ /* stage 5 */
- pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
- movq_r2r(mm3, mm0); /* copy tmp7 */
+ pmulhw_m2r (RTjpeg_C2pC6, mm2);
+ psllw_i2r (2, mm5);
- movq_m2r(*(dataptr+1), mm7);
- paddw_r2r(mm1, mm4); /* z2 */
+ pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */
+ movq_r2r (mm3, mm0); /* copy tmp7 */
- paddw_r2r(mm1, mm2); /* z4 */
+ movq_m2r (*(dataptr + 1), mm7);
+ paddw_r2r (mm1, mm4); /* z2 */
- paddw_r2r(mm5, mm0); /* z11 */
- psubw_r2r(mm5, mm3); /* z13 */
+ paddw_r2r (mm1, mm2); /* z4 */
- /* stage 6 */
+ paddw_r2r (mm5, mm0); /* z11 */
+ psubw_r2r (mm5, mm3); /* z13 */
- movq_r2r(mm3, mm5); /* copy z13 */
- psubw_r2r(mm4, mm3); /* y3=z13 - z2 */
+ /* stage 6 */
- paddw_r2r(mm4, mm5); /* y5=z13 + z2 */
- movq_r2r(mm0, mm6); /* copy z11 */
+ movq_r2r (mm3, mm5); /* copy z13 */
+ psubw_r2r (mm4, mm3); /* y3=z13 - z2 */
- movq_r2m(mm3, *(dataptr+6)); /*save y3 */
- psubw_r2r(mm2, mm0); /* y7=z11 - z4 */
+ paddw_r2r (mm4, mm5); /* y5=z13 + z2 */
+ movq_r2r (mm0, mm6); /* copy z11 */
- movq_r2m(mm5, *(dataptr+10)); /*save y5 */
- paddw_r2r(mm2, mm6); /* y1=z11 + z4 */
+ movq_r2m (mm3, *(dataptr + 6)); /*save y3 */
+ psubw_r2r (mm2, mm0); /* y7=z11 - z4 */
- movq_r2m(mm0, *(dataptr+14)); /*save y7 */
+ movq_r2m (mm5, *(dataptr + 10)); /*save y5 */
+ paddw_r2r (mm2, mm6); /* y1=z11 + z4 */
+
+ movq_r2m (mm0, *(dataptr + 14)); /*save y7 */
/************************************************
* End of 1st 4 rows
************************************************/
- movq_m2r(*(dataptr+3), mm1); /* load x1 : stage 1 */
- movq_r2r(mm7, mm0); /* copy x0 */
+ movq_m2r (*(dataptr + 3), mm1); /* load x1 : stage 1 */
+ movq_r2r (mm7, mm0); /* copy x0 */
+
+ movq_r2m (mm6, *(dataptr + 2)); /*save y1 */
- movq_r2m(mm6, *(dataptr+2)); /*save y1 */
+ movq_m2r (*(dataptr + 5), mm2); /* load x2 : stage 1 */
+ movq_r2r (mm1, mm6); /* copy x1 */
- movq_m2r(*(dataptr+5), mm2); /* load x2 : stage 1 */
- movq_r2r(mm1, mm6); /* copy x1 */
+ paddw_m2r (*(dataptr + 15), mm0); /* tmp00 = x0 + x7 */
- paddw_m2r(*(dataptr+15), mm0); /* tmp00 = x0 + x7 */
+ movq_m2r (*(dataptr + 7), mm3); /* load x3 : stage 1 */
+ movq_r2r (mm2, mm5); /* copy x2 */
- movq_m2r(*(dataptr+7), mm3); /* load x3 : stage 1 */
- movq_r2r(mm2, mm5); /* copy x2 */
+ psubw_m2r (*(dataptr + 15), mm7); /* tmp07 = x0 - x7 */
+ movq_r2r (mm3, mm4); /* copy x3 */
- psubw_m2r(*(dataptr+15), mm7); /* tmp07 = x0 - x7 */
- movq_r2r(mm3, mm4); /* copy x3 */
+ paddw_m2r (*(dataptr + 13), mm1); /* tmp01 = x1 + x6 */
- paddw_m2r(*(dataptr+13), mm1); /* tmp01 = x1 + x6 */
+ movq_r2m (mm7, tmp7); /* save tmp07 */
+ movq_r2r (mm0, mm7); /* copy tmp00 */
- movq_r2m(mm7, tmp7); /* save tmp07 */
- movq_r2r(mm0, mm7); /* copy tmp00 */
+ psubw_m2r (*(dataptr + 13), mm6); /* tmp06 = x1 - x6 */
- psubw_m2r(*(dataptr+13), mm6); /* tmp06 = x1 - x6 */
+ /* stage 2, Even Part */
- /* stage 2, Even Part */
+ paddw_m2r (*(dataptr + 9), mm3); /* tmp03 = x3 + x4 */
- paddw_m2r(*(dataptr+9), mm3); /* tmp03 = x3 + x4 */
+ movq_r2m (mm6, tmp6); /* save tmp07 */
+ movq_r2r (mm1, mm6); /* copy tmp01 */
- movq_r2m(mm6, tmp6); /* save tmp07 */
- movq_r2r(mm1, mm6); /* copy tmp01 */
+ paddw_m2r (*(dataptr + 11), mm2); /* tmp02 = x2 + x5 */
+ paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03 */
- paddw_m2r(*(dataptr+11), mm2); /* tmp02 = x2 + x5 */
- paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03 */
+ psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03 */
- psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03 */
+ psubw_m2r (*(dataptr + 9), mm4); /* tmp04 = x3 - x4 */
+ psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02 */
- psubw_m2r(*(dataptr+9), mm4); /* tmp04 = x3 - x4 */
- psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02 */
+ paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02 */
- paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02 */
+ psubw_m2r (*(dataptr + 11), mm5); /* tmp05 = x2 - x5 */
+ paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */
- psubw_m2r(*(dataptr+11), mm5); /* tmp05 = x2 - x5 */
- paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
+ /* stage 3, Even and stage 4 & 5 even */
- /* stage 3, Even and stage 4 & 5 even */
+ movq_m2r (tmp6, mm2); /* load tmp6 */
+ movq_r2r (mm0, mm3); /* copy tmp10 */
- movq_m2r(tmp6, mm2); /* load tmp6 */
- movq_r2r(mm0, mm3); /* copy tmp10 */
+ psllw_i2r (2, mm6); /* shift z1 */
+ paddw_r2r (mm1, mm0); /* y0=tmp10 + tmp11 */
- psllw_i2r(2, mm6); /* shift z1 */
- paddw_r2r(mm1, mm0); /* y0=tmp10 + tmp11 */
+ pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */
+ psubw_r2r (mm1, mm3); /* y4=tmp10 - tmp11 */
- pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
- psubw_r2r(mm1, mm3); /* y4=tmp10 - tmp11 */
+ movq_r2m (mm0, *(dataptr + 1)); /*save y0 */
+ movq_r2r (mm7, mm0); /* copy tmp13 */
- movq_r2m(mm0, *(dataptr+1)); /*save y0 */
- movq_r2r(mm7, mm0); /* copy tmp13 */
-
- /* odd part */
+ /* odd part */
- movq_r2m(mm3, *(dataptr+9)); /*save y4 */
- paddw_r2r(mm5, mm4); /* tmp10 = tmp4 + tmp5 */
+ movq_r2m (mm3, *(dataptr + 9)); /*save y4 */
+ paddw_r2r (mm5, mm4); /* tmp10 = tmp4 + tmp5 */
- movq_m2r(tmp7, mm3); /* load tmp7 */
- paddw_r2r(mm6, mm0); /* tmp32 = tmp13 + z1 */
+ movq_m2r (tmp7, mm3); /* load tmp7 */
+ paddw_r2r (mm6, mm0); /* tmp32 = tmp13 + z1 */
- paddw_r2r(mm2, mm5); /* tmp11 = tmp5 + tmp6 */
- psubw_r2r(mm6, mm7); /* tmp33 = tmp13 - z1 */
+ paddw_r2r (mm2, mm5); /* tmp11 = tmp5 + tmp6 */
+ psubw_r2r (mm6, mm7); /* tmp33 = tmp13 - z1 */
- movq_r2m(mm0, *(dataptr+5)); /*save y2 */
- paddw_r2r(mm3, mm2); /* tmp12 = tmp6 + tmp7 */
+ movq_r2m (mm0, *(dataptr + 5)); /*save y2 */
+ paddw_r2r (mm3, mm2); /* tmp12 = tmp6 + tmp7 */
- /* stage 4 */
+ /* stage 4 */
- movq_r2m(mm7, *(dataptr+13)); /*save y6 */
- movq_r2r(mm4, mm1); /* copy tmp10 */
+ movq_r2m (mm7, *(dataptr + 13)); /*save y6 */
+ movq_r2r (mm4, mm1); /* copy tmp10 */
- psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
- psllw_i2r(2, mm4); /* shift tmp10 */
+ psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r (2, mm4); /* shift tmp10 */
- movq_m2r(RTjpeg_C2mC6, mm0); /* load C2mC6 */
- psllw_i2r(2, mm1); /* shift (tmp10-tmp12) */
+ movq_m2r (RTjpeg_C2mC6, mm0); /* load C2mC6 */
+ psllw_i2r (2, mm1); /* shift (tmp10-tmp12) */
- pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
- psllw_i2r(2, mm5); /* prepare for multiply */
+ pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */
+ psllw_i2r (2, mm5); /* prepare for multiply */
- pmulhw_r2r(mm0, mm4); /* multiply by converted real */
+ pmulhw_r2r (mm0, mm4); /* multiply by converted real */
- /* stage 5 */
+ /* stage 5 */
- pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
- psllw_i2r(2, mm2); /* prepare for multiply */
+ pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */
+ psllw_i2r (2, mm2); /* prepare for multiply */
- pmulhw_m2r(RTjpeg_C2pC6, mm2); /* multiply */
- movq_r2r(mm3, mm0); /* copy tmp7 */
+ pmulhw_m2r (RTjpeg_C2pC6, mm2); /* multiply */
+ movq_r2r (mm3, mm0); /* copy tmp7 */
- movq_m2r(*(dataptr+9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
- paddw_r2r(mm1, mm4); /* z2 */
+ movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
+ paddw_r2r (mm1, mm4); /* z2 */
- paddw_r2r(mm5, mm0); /* z11 */
- psubw_r2r(mm5, mm3); /* z13 */
+ paddw_r2r (mm5, mm0); /* z11 */
+ psubw_r2r (mm5, mm3); /* z13 */
- /* stage 6 */
+ /* stage 6 */
- movq_r2r(mm3, mm5); /* copy z13 */
- paddw_r2r(mm1, mm2); /* z4 */
+ movq_r2r (mm3, mm5); /* copy z13 */
+ paddw_r2r (mm1, mm2); /* z4 */
- movq_r2r(mm0, mm6); /* copy z11 */
- psubw_r2r(mm4, mm5); /* y3 */
+ movq_r2r (mm0, mm6); /* copy z11 */
+ psubw_r2r (mm4, mm5); /* y3 */
- paddw_r2r(mm2, mm6); /* y1 */
- paddw_r2r(mm4, mm3); /* y5 */
+ paddw_r2r (mm2, mm6); /* y1 */
+ paddw_r2r (mm4, mm3); /* y5 */
- movq_r2m(mm5, *(dataptr+7)); /*save y3 */
- psubw_r2r(mm2, mm0); /* yè=z11 - z4 */
+ movq_r2m (mm5, *(dataptr + 7)); /*save y3 */
+ psubw_r2r (mm2, mm0); /* yè=z11 - z4 */
- movq_r2m(mm3, *(dataptr+11)); /*save y5 */
+ movq_r2m (mm3, *(dataptr + 11)); /*save y5 */
- movq_r2m(mm6, *(dataptr+3)); /*save y1 */
+ movq_r2m (mm6, *(dataptr + 3)); /*save y1 */
+
+ movq_r2m (mm0, *(dataptr + 15)); /*save y7 */
- movq_r2m(mm0, *(dataptr+15)); /*save y7 */
-
#endif
}
-#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */
-#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */
-#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */
-#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */
+#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */
+#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */
+#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */
+#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */
#define DESCALE(x) (__s16)( ((x)+4) >> 3)
@@ -1226,284 +1232,283 @@ void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
#define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8)
-void RTjpeg_idct_init(void)
+void
+RTjpeg_idct_init (void)
{
- int i;
-
- for(i=0; i<64; i++)
- {
- RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32;
- RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32;
- }
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ RTjpeg_liqt[i] = ((__u64) RTjpeg_liqt[i] * RTjpeg_aan_tab[i]) >> 32;
+ RTjpeg_ciqt[i] = ((__u64) RTjpeg_ciqt[i] * RTjpeg_aan_tab[i]) >> 32;
+ }
}
-void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip)
+void
+RTjpeg_idct (__u8 * odata, __s16 * data, int rskip)
{
#ifdef HAVE_LIBMMX
-static mmx_t fix_141 = (mmx_t)(long long)0x5a825a825a825a82LL;
-static mmx_t fix_184n261 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
-static mmx_t fix_184 = (mmx_t)(long long)0x7641764176417641LL;
-static mmx_t fix_n184 = (mmx_t)(long long)0x896f896f896f896fLL;
-static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
+ static mmx_t fix_141 = (mmx_t) (long long) 0x5a825a825a825a82LL;
+ static mmx_t fix_184n261 = (mmx_t) (long long) 0xcf04cf04cf04cf04LL;
+ static mmx_t fix_184 = (mmx_t) (long long) 0x7641764176417641LL;
+ static mmx_t fix_n184 = (mmx_t) (long long) 0x896f896f896f896fLL;
+ static mmx_t fix_108n184 = (mmx_t) (long long) 0xcf04cf04cf04cf04LL;
mmx_t workspace[64];
mmx_t *wsptr = workspace;
- register mmx_t *dataptr = (mmx_t *)odata;
- mmx_t *idata = (mmx_t *)data;
+ register mmx_t *dataptr = (mmx_t *) odata;
+ mmx_t *idata = (mmx_t *) data;
- rskip = rskip>>3;
+ rskip = rskip >> 3;
/*
* Perform inverse DCT on one block of coefficients.
*/
- /* Odd part */
+ /* Odd part */
+
+ movq_m2r (*(idata + 10), mm1); /* load idata[DCTSIZE*5] */
- movq_m2r(*(idata+10), mm1); /* load idata[DCTSIZE*5] */
+ movq_m2r (*(idata + 6), mm0); /* load idata[DCTSIZE*3] */
- movq_m2r(*(idata+6), mm0); /* load idata[DCTSIZE*3] */
+ movq_m2r (*(idata + 2), mm3); /* load idata[DCTSIZE*1] */
- movq_m2r(*(idata+2), mm3); /* load idata[DCTSIZE*1] */
+ movq_r2r (mm1, mm2); /* copy tmp6 : phase 6 */
+ */movq_m2r (*(idata + 14), mm4); /* load idata[DCTSIZE*7] */
- movq_r2r(mm1, mm2); /* copy tmp6 : phase 6 */ */
+ paddw_r2r (mm0, mm1); /* z13 = tmp6 + tmp5; */
- movq_m2r(*(idata+14), mm4); /* load idata[DCTSIZE*7] */
+ psubw_r2r (mm0, mm2); /* z10 = tmp6 - tmp5 */
- paddw_r2r(mm0, mm1); /* z13 = tmp6 + tmp5; */
+ psllw_i2r (2, mm2); /* shift z10 */
+ movq_r2r (mm2, mm0); /* copy z10 */
- psubw_r2r(mm0, mm2); /* z10 = tmp6 - tmp5 */
+ pmulhw_m2r (fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */
+ movq_r2r (mm3, mm5); /* copy tmp4 */
- psllw_i2r(2, mm2); /* shift z10 */
- movq_r2r(mm2, mm0); /* copy z10 */
+ pmulhw_m2r (fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */
+ paddw_r2r (mm4, mm3); /* z11 = tmp4 + tmp7; */
- pmulhw_m2r(fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */
- movq_r2r(mm3, mm5); /* copy tmp4 */
+ movq_r2r (mm3, mm6); /* copy z11 : phase 5 */
+ psubw_r2r (mm4, mm5); /* z12 = tmp4 - tmp7; */
- pmulhw_m2r(fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */
- paddw_r2r(mm4, mm3); /* z11 = tmp4 + tmp7; */
+ psubw_r2r (mm1, mm6); /* z11-z13 */
+ psllw_i2r (2, mm5); /* shift z12 */
- movq_r2r(mm3, mm6); /* copy z11 : phase 5 */
- psubw_r2r(mm4, mm5); /* z12 = tmp4 - tmp7; */
+ movq_m2r (*(idata + 12), mm4); /* load idata[DCTSIZE*6], even part */
+ movq_r2r (mm5, mm7); /* copy z12 */
- psubw_r2r(mm1, mm6); /* z11-z13 */
- psllw_i2r(2, mm5); /* shift z12 */
+ pmulhw_m2r (fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6): even part */
+ paddw_r2r (mm1, mm3); /* tmp7 = z11 + z13; */
- movq_m2r(*(idata+12), mm4); /* load idata[DCTSIZE*6], even part */
- movq_r2r(mm5, mm7); /* copy z12 */
+ /*ok */
- pmulhw_m2r(fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6): even part */
- paddw_r2r(mm1, mm3); /* tmp7 = z11 + z13; */
+ /* Even part */
+ pmulhw_m2r (fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */
+ psllw_i2r (2, mm6);
- /*ok */
+ movq_m2r (*(idata + 4), mm1); /* load idata[DCTSIZE*2] */
- /* Even part */
- pmulhw_m2r(fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */
- psllw_i2r(2, mm6);
+ paddw_r2r (mm5, mm0); /* tmp10 */
- movq_m2r(*(idata+4), mm1); /* load idata[DCTSIZE*2] */
+ paddw_r2r (mm7, mm2); /* tmp12 */
- paddw_r2r(mm5, mm0); /* tmp10 */
+ pmulhw_m2r (fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */
+ psubw_r2r (mm3, mm2); /* tmp6 = tmp12 - tmp7 */
- paddw_r2r(mm7, mm2); /* tmp12 */
+ movq_r2r (mm1, mm5); /* copy tmp1 */
+ paddw_r2r (mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */
- pmulhw_m2r(fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */
- psubw_r2r(mm3, mm2); /* tmp6 = tmp12 - tmp7 */
+ psubw_r2r (mm4, mm5); /* tmp1-tmp3 */
+ psubw_r2r (mm2, mm6); /* tmp5 = tmp11 - tmp6; */
- movq_r2r(mm1, mm5); /* copy tmp1 */
- paddw_r2r(mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */
+ movq_r2m (mm1, *(wsptr)); /* save tmp13 in workspace */
+ psllw_i2r (2, mm5); /* shift tmp1-tmp3 */
- psubw_r2r(mm4, mm5); /* tmp1-tmp3 */
- psubw_r2r(mm2, mm6); /* tmp5 = tmp11 - tmp6; */
+ movq_m2r (*(idata), mm7); /* load idata[DCTSIZE*0] */
- movq_r2m(mm1, *(wsptr)); /* save tmp13 in workspace */
- psllw_i2r(2, mm5); /* shift tmp1-tmp3 */
-
- movq_m2r(*(idata), mm7); /* load idata[DCTSIZE*0] */
+ pmulhw_m2r (fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
+ paddw_r2r (mm6, mm0); /* tmp4 = tmp10 + tmp5; */
- pmulhw_m2r(fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
- paddw_r2r(mm6, mm0); /* tmp4 = tmp10 + tmp5; */
+ movq_m2r (*(idata + 8), mm4); /* load idata[DCTSIZE*4] */
- movq_m2r(*(idata+8), mm4); /* load idata[DCTSIZE*4] */
-
- psubw_r2r(mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */
+ psubw_r2r (mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */
- movq_r2m(mm0, *(wsptr+4)); /* save tmp4 in workspace */
- movq_r2r(mm7, mm1); /* copy tmp0 : phase 3 */
+ movq_r2m (mm0, *(wsptr + 4)); /* save tmp4 in workspace */
+ movq_r2r (mm7, mm1); /* copy tmp0 : phase 3 */
- movq_r2m(mm5, *(wsptr+2)); /* save tmp12 in workspace */
- psubw_r2r(mm4, mm1); /* tmp11 = tmp0 - tmp2; */
+ movq_r2m (mm5, *(wsptr + 2)); /* save tmp12 in workspace */
+ psubw_r2r (mm4, mm1); /* tmp11 = tmp0 - tmp2; */
- paddw_r2r(mm4, mm7); /* tmp10 = tmp0 + tmp2; */
- movq_r2r(mm1, mm5); /* copy tmp11 */
-
- paddw_m2r(*(wsptr+2), mm1); /* tmp1 = tmp11 + tmp12; */
- movq_r2r(mm7, mm4); /* copy tmp10 : phase 2 */
+ paddw_r2r (mm4, mm7); /* tmp10 = tmp0 + tmp2; */
+ movq_r2r (mm1, mm5); /* copy tmp11 */
- paddw_m2r(*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */
+ paddw_m2r (*(wsptr + 2), mm1); /* tmp1 = tmp11 + tmp12; */
+ movq_r2r (mm7, mm4); /* copy tmp10 : phase 2 */
- psubw_m2r(*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */
- movq_r2r(mm7, mm0); /* copy tmp0 */
+ paddw_m2r (*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */
- psubw_m2r(*(wsptr+2), mm5); /* tmp2 = tmp11 - tmp12; */
- paddw_r2r(mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
-
- psubw_r2r(mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
+ psubw_m2r (*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */
+ movq_r2r (mm7, mm0); /* copy tmp0 */
- movq_r2m(mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */
- movq_r2r(mm1, mm3); /* copy tmp1 */
+ psubw_m2r (*(wsptr + 2), mm5); /* tmp2 = tmp11 - tmp12; */
+ paddw_r2r (mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
- movq_r2m(mm0, *(wsptr+14)); /* wsptr[DCTSIZE*7] */
- paddw_r2r(mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
+ psubw_r2r (mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
- psubw_r2r(mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
+ movq_r2m (mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */
+ movq_r2r (mm1, mm3); /* copy tmp1 */
- movq_r2m(mm1, *(wsptr+2)); /* wsptr[DCTSIZE*1] */
- movq_r2r(mm4, mm1); /* copy tmp3 */
+ movq_r2m (mm0, *(wsptr + 14)); /* wsptr[DCTSIZE*7] */
+ paddw_r2r (mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
- movq_r2m(mm3, *(wsptr+12)); /* wsptr[DCTSIZE*6] */
+ psubw_r2r (mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
- paddw_m2r(*(wsptr+4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
+ movq_r2m (mm1, *(wsptr + 2)); /* wsptr[DCTSIZE*1] */
+ movq_r2r (mm4, mm1); /* copy tmp3 */
- psubw_m2r(*(wsptr+4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
+ movq_r2m (mm3, *(wsptr + 12)); /* wsptr[DCTSIZE*6] */
- movq_r2m(mm4, *(wsptr+8));
- movq_r2r(mm5, mm7); /* copy tmp2 */
+ paddw_m2r (*(wsptr + 4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
- paddw_r2r(mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
+ psubw_m2r (*(wsptr + 4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
- movq_r2m(mm1, *(wsptr+6));
- psubw_r2r(mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
+ movq_r2m (mm4, *(wsptr + 8));
+ movq_r2r (mm5, mm7); /* copy tmp2 */
- movq_r2m(mm5, *(wsptr+4));
+ paddw_r2r (mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
- movq_r2m(mm7, *(wsptr+10));
+ movq_r2m (mm1, *(wsptr + 6));
+ psubw_r2r (mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
- /*ok */
+ movq_r2m (mm5, *(wsptr + 4));
+
+ movq_r2m (mm7, *(wsptr + 10));
+
+ /*ok */
/*****************************************************************/
- idata++;
- wsptr++;
+ idata++;
+ wsptr++;
/*****************************************************************/
- movq_m2r(*(idata+10), mm1); /* load idata[DCTSIZE*5] */
+ movq_m2r (*(idata + 10), mm1); /* load idata[DCTSIZE*5] */
+
+ movq_m2r (*(idata + 6), mm0); /* load idata[DCTSIZE*3] */
+
+ movq_m2r (*(idata + 2), mm3); /* load idata[DCTSIZE*1] */
+ movq_r2r (mm1, mm2); /* copy tmp6 : phase 6 */
+ */movq_m2r (*(idata + 14), mm4); /* load idata[DCTSIZE*7] */
+ paddw_r2r (mm0, mm1); /* z13 = tmp6 + tmp5; */
- movq_m2r(*(idata+6), mm0); /* load idata[DCTSIZE*3] */
+ psubw_r2r (mm0, mm2); /* z10 = tmp6 - tmp5 */
- movq_m2r(*(idata+2), mm3); /* load idata[DCTSIZE*1] */
- movq_r2r(mm1, mm2); /* copy tmp6 : phase 6 */ */
+ psllw_i2r (2, mm2); /* shift z10 */
+ movq_r2r (mm2, mm0); /* copy z10 */
- movq_m2r(*(idata+14), mm4); /* load idata[DCTSIZE*7] */
- paddw_r2r(mm0, mm1); /* z13 = tmp6 + tmp5; */
+ pmulhw_m2r (fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */
+ movq_r2r (mm3, mm5); /* copy tmp4 */
- psubw_r2r(mm0, mm2); /* z10 = tmp6 - tmp5 */
+ pmulhw_m2r (fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */
+ paddw_r2r (mm4, mm3); /* z11 = tmp4 + tmp7; */
- psllw_i2r(2, mm2); /* shift z10 */
- movq_r2r(mm2, mm0); /* copy z10 */
+ movq_r2r (mm3, mm6); /* copy z11 : phase 5 */
+ psubw_r2r (mm4, mm5); /* z12 = tmp4 - tmp7; */
- pmulhw_m2r(fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */
- movq_r2r(mm3, mm5); /* copy tmp4 */
+ psubw_r2r (mm1, mm6); /* z11-z13 */
+ psllw_i2r (2, mm5); /* shift z12 */
- pmulhw_m2r(fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */
- paddw_r2r(mm4, mm3); /* z11 = tmp4 + tmp7; */
+ movq_m2r (*(idata + 12), mm4); /* load idata[DCTSIZE*6], even part */
+ movq_r2r (mm5, mm7); /* copy z12 */
- movq_r2r(mm3, mm6); /* copy z11 : phase 5 */
- psubw_r2r(mm4, mm5); /* z12 = tmp4 - tmp7; */
+ pmulhw_m2r (fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6) even part */
+ paddw_r2r (mm1, mm3); /* tmp7 = z11 + z13; */
- psubw_r2r(mm1, mm6); /* z11-z13 */
- psllw_i2r(2, mm5); /* shift z12 */
+ /*ok */
- movq_m2r(*(idata+12), mm4); /* load idata[DCTSIZE*6], even part */
- movq_r2r(mm5, mm7); /* copy z12 */
+ /* Even part */
+ pmulhw_m2r (fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */
+ psllw_i2r (2, mm6);
- pmulhw_m2r(fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6) even part */
- paddw_r2r(mm1, mm3); /* tmp7 = z11 + z13; */
+ movq_m2r (*(idata + 4), mm1); /* load idata[DCTSIZE*2] */
- /*ok */
+ paddw_r2r (mm5, mm0); /* tmp10 */
- /* Even part */
- pmulhw_m2r(fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */
- psllw_i2r(2, mm6);
+ paddw_r2r (mm7, mm2); /* tmp12 */
- movq_m2r(*(idata+4), mm1); /* load idata[DCTSIZE*2] */
+ pmulhw_m2r (fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */
+ psubw_r2r (mm3, mm2); /* tmp6 = tmp12 - tmp7 */
- paddw_r2r(mm5, mm0); /* tmp10 */
+ movq_r2r (mm1, mm5); /* copy tmp1 */
+ paddw_r2r (mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */
- paddw_r2r(mm7, mm2); /* tmp12 */
+ psubw_r2r (mm4, mm5); /* tmp1-tmp3 */
+ psubw_r2r (mm2, mm6); /* tmp5 = tmp11 - tmp6; */
- pmulhw_m2r(fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */
- psubw_r2r(mm3, mm2); /* tmp6 = tmp12 - tmp7 */
+ movq_r2m (mm1, *(wsptr)); /* save tmp13 in workspace */
+ psllw_i2r (2, mm5); /* shift tmp1-tmp3 */
- movq_r2r(mm1, mm5); /* copy tmp1 */
- paddw_r2r(mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */
+ movq_m2r (*(idata), mm7); /* load idata[DCTSIZE*0] */
+ paddw_r2r (mm6, mm0); /* tmp4 = tmp10 + tmp5; */
- psubw_r2r(mm4, mm5); /* tmp1-tmp3 */
- psubw_r2r(mm2, mm6); /* tmp5 = tmp11 - tmp6; */
+ pmulhw_m2r (fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
- movq_r2m(mm1, *(wsptr)); /* save tmp13 in workspace */
- psllw_i2r(2, mm5); /* shift tmp1-tmp3 */
-
- movq_m2r(*(idata), mm7); /* load idata[DCTSIZE*0] */
- paddw_r2r(mm6, mm0); /* tmp4 = tmp10 + tmp5; */
+ movq_m2r (*(idata + 8), mm4); /* load idata[DCTSIZE*4] */
- pmulhw_m2r(fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
+ psubw_r2r (mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */
- movq_m2r(*(idata+8), mm4); /* load idata[DCTSIZE*4] */
-
- psubw_r2r(mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */
+ movq_r2m (mm0, *(wsptr + 4)); /* save tmp4 in workspace */
+ movq_r2r (mm7, mm1); /* copy tmp0: phase 3 */
- movq_r2m(mm0, *(wsptr+4)); /* save tmp4 in workspace */
- movq_r2r(mm7, mm1); /* copy tmp0: phase 3 */
+ movq_r2m (mm5, *(wsptr + 2)); /* save tmp12 in workspace */
+ psubw_r2r (mm4, mm1); /* tmp11 = tmp0 - tmp2; */
- movq_r2m(mm5, *(wsptr+2)); /* save tmp12 in workspace */
- psubw_r2r(mm4, mm1); /* tmp11 = tmp0 - tmp2; */
+ paddw_r2r (mm4, mm7); /* tmp10 = tmp0 + tmp2; */
+ movq_r2r (mm1, mm5); /* copy tmp11 */
- paddw_r2r(mm4, mm7); /* tmp10 = tmp0 + tmp2; */
- movq_r2r(mm1, mm5); /* copy tmp11 */
-
- paddw_m2r(*(wsptr+2), mm1); /* tmp1 = tmp11 + tmp12; */
- movq_r2r(mm7, mm4); /* copy tmp10: phase 2 */
+ paddw_m2r (*(wsptr + 2), mm1); /* tmp1 = tmp11 + tmp12; */
+ movq_r2r (mm7, mm4); /* copy tmp10: phase 2 */
- paddw_m2r(*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */
+ paddw_m2r (*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */
- psubw_m2r(*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */
- movq_r2r(mm7, mm0); /* copy tmp0 */
+ psubw_m2r (*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */
+ movq_r2r (mm7, mm0); /* copy tmp0 */
- psubw_m2r(*(wsptr+2), mm5); /* tmp2 = tmp11 - tmp12; */
- paddw_r2r(mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
-
- psubw_r2r(mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
+ psubw_m2r (*(wsptr + 2), mm5); /* tmp2 = tmp11 - tmp12; */
+ paddw_r2r (mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
- movq_r2m(mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */
- movq_r2r(mm1, mm3); /* copy tmp1 */
+ psubw_r2r (mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
- movq_r2m(mm0, *(wsptr+14)); /* wsptr[DCTSIZE*7] */
- paddw_r2r(mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
+ movq_r2m (mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */
+ movq_r2r (mm1, mm3); /* copy tmp1 */
- psubw_r2r(mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
+ movq_r2m (mm0, *(wsptr + 14)); /* wsptr[DCTSIZE*7] */
+ paddw_r2r (mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
- movq_r2m(mm1, *(wsptr+2)); /* wsptr[DCTSIZE*1] */
- movq_r2r(mm4, mm1); /* copy tmp3 */
+ psubw_r2r (mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
- movq_r2m(mm3, *(wsptr+12)); /* wsptr[DCTSIZE*6] */
+ movq_r2m (mm1, *(wsptr + 2)); /* wsptr[DCTSIZE*1] */
+ movq_r2r (mm4, mm1); /* copy tmp3 */
- paddw_m2r(*(wsptr+4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
+ movq_r2m (mm3, *(wsptr + 12)); /* wsptr[DCTSIZE*6] */
- psubw_m2r(*(wsptr+4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
+ paddw_m2r (*(wsptr + 4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
- movq_r2m(mm4, *(wsptr+8));
- movq_r2r(mm5, mm7); /* copy tmp2 */
+ psubw_m2r (*(wsptr + 4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
- paddw_r2r(mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
+ movq_r2m (mm4, *(wsptr + 8));
+ movq_r2r (mm5, mm7); /* copy tmp2 */
- movq_r2m(mm1, *(wsptr+6));
- psubw_r2r(mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
+ paddw_r2r (mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
- movq_r2m(mm5, *(wsptr+4));
+ movq_r2m (mm1, *(wsptr + 6));
+ psubw_r2r (mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
- movq_r2m(mm7, *(wsptr+10));
+ movq_r2m (mm5, *(wsptr + 4));
+
+ movq_r2m (mm7, *(wsptr + 10));
/*****************************************************************/
@@ -1512,258 +1517,258 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* and also undo the PASS1_BITS scaling. */
/*****************************************************************/
- /* Even part */
+ /* Even part */
- wsptr--;
+ wsptr--;
/* tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); */
/* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */
/* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */
/* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */
- movq_m2r(*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+ movq_m2r (*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+
+ movq_m2r (*(wsptr + 1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */
+ movq_r2r (mm0, mm2);
+
+ movq_m2r (*(wsptr + 2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ paddw_r2r (mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
+
+ movq_m2r (*(wsptr + 3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */
+ psubw_r2r (mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
- movq_m2r(*(wsptr+1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */
- movq_r2r(mm0, mm2);
-
- movq_m2r(*(wsptr+2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */
- paddw_r2r(mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
+ movq_r2r (mm0, mm6);
+ movq_r2r (mm3, mm5);
- movq_m2r(*(wsptr+3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */
- psubw_r2r(mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
+ paddw_r2r (mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
+ movq_r2r (mm2, mm1);
- movq_r2r(mm0, mm6);
- movq_r2r(mm3, mm5);
-
- paddw_r2r(mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
- movq_r2r(mm2, mm1);
+ psubw_r2r (mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
+ punpcklwd_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
- psubw_r2r(mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
- punpcklwd_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
+ movq_m2r (*(wsptr + 7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ punpckhwd_r2r (mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
- movq_m2r(*(wsptr+7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */
- punpckhwd_r2r(mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
+ movq_m2r (*(wsptr + 4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckldq_r2r (mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
- movq_m2r(*(wsptr+4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */
- punpckldq_r2r(mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
+ punpcklwd_r2r (mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
+ movq_r2r (mm3, mm4);
- punpcklwd_r2r(mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
- movq_r2r(mm3, mm4);
+ movq_m2r (*(wsptr + 6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */
+ punpckhwd_r2r (mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
- movq_m2r(*(wsptr+6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */
- punpckhwd_r2r(mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
+ movq_m2r (*(wsptr + 5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckldq_r2r (mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
- movq_m2r(*(wsptr+5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */
- punpckldq_r2r(mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
-
- paddw_r2r(mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
- movq_r2r(mm6, mm2);
+ paddw_r2r (mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
+ movq_r2r (mm6, mm2);
- psubw_r2r(mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
- paddw_r2r(mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
+ psubw_r2r (mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
+ paddw_r2r (mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
- movq_r2r(mm3, mm5);
- punpcklwd_r2r(mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
-
- psubw_r2r(mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
- punpckhwd_r2r(mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
+ movq_r2r (mm3, mm5);
+ punpcklwd_r2r (mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
- movq_r2r(mm4, mm7);
- punpckldq_r2r(mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
-
- punpcklwd_r2r(mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
+ psubw_r2r (mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
+ punpckhwd_r2r (mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
- punpckhwd_r2r(mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
+ movq_r2r (mm4, mm7);
+ punpckldq_r2r (mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
- punpckldq_r2r(mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
- movq_r2r(mm1, mm6);
+ punpcklwd_r2r (mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
- /*ok */
+ punpckhwd_r2r (mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
+
+ punpckldq_r2r (mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
+ movq_r2r (mm1, mm6);
+
+ /*ok */
/* mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
/* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
- movq_r2r(mm0, mm2);
- punpckhdq_r2r(mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
+ movq_r2r (mm0, mm2);
+ punpckhdq_r2r (mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
- punpckldq_r2r(mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
- psllw_i2r(2, mm6);
+ punpckldq_r2r (mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
+ psllw_i2r (2, mm6);
- pmulhw_m2r(fix_141, mm6);
- punpckldq_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
+ pmulhw_m2r (fix_141, mm6);
+ punpckldq_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
- punpckhdq_r2r(mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
- movq_r2r(mm0, mm7);
+ punpckhdq_r2r (mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
+ movq_r2r (mm0, mm7);
/* tmp0 = tmp10 + tmp13; */
/* tmp3 = tmp10 - tmp13; */
- paddw_r2r(mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
- psubw_r2r(mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
+ paddw_r2r (mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
+ psubw_r2r (mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
/* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */
- psubw_r2r(mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
+ psubw_r2r (mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
/* tmp1 = tmp11 + tmp12; */
/* tmp2 = tmp11 - tmp12; */
- movq_r2r(mm1, mm5);
+ movq_r2r (mm1, mm5);
- /*OK */
+ /*OK */
- /* Odd part */
+ /* Odd part */
/* z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; */
/* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */
/* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */
/* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */
- movq_m2r(*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */
- paddw_r2r(mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
+ movq_m2r (*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+ paddw_r2r (mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
- movq_m2r(*(wsptr+1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */
- psubw_r2r(mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
+ movq_m2r (*(wsptr + 1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */
+ psubw_r2r (mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
- movq_r2r(mm3, mm6);
- punpckldq_r2r(mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */
+ movq_r2r (mm3, mm6);
+ punpckldq_r2r (mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */
- punpckhdq_r2r(mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */
- movq_r2r(mm3, mm2);
+ punpckhdq_r2r (mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */
+ movq_r2r (mm3, mm2);
/*Save tmp0 and tmp1 in wsptr */
- movq_r2m(mm0, *(wsptr)); /* save tmp0 */
- paddw_r2r(mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
+ movq_r2m (mm0, *(wsptr)); /* save tmp0 */
+ paddw_r2r (mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
+
-
/*Continue with z10 --- z13 */
- movq_m2r(*(wsptr+2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */
- psubw_r2r(mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
+ movq_m2r (*(wsptr + 2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ psubw_r2r (mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
- movq_m2r(*(wsptr+3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */
- movq_r2r(mm6, mm4);
+ movq_m2r (*(wsptr + 3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */
+ movq_r2r (mm6, mm4);
- movq_r2m(mm1, *(wsptr+1)); /* save tmp1 */
- punpckldq_r2r(mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */
+ movq_r2m (mm1, *(wsptr + 1)); /* save tmp1 */
+ punpckldq_r2r (mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */
+
+ punpckhdq_r2r (mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */
+ movq_r2r (mm6, mm1);
- punpckhdq_r2r(mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */
- movq_r2r(mm6, mm1);
-
/*Save tmp2 and tmp3 in wsptr */
- paddw_r2r(mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
- movq_r2r(mm2, mm4);
-
+ paddw_r2r (mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
+ movq_r2r (mm2, mm4);
+
/*Continue with z10 --- z13 */
- movq_r2m(mm5, *(wsptr+2)); /* save tmp2 */
- punpcklwd_r2r(mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
+ movq_r2m (mm5, *(wsptr + 2)); /* save tmp2 */
+ punpcklwd_r2r (mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
- psubw_r2r(mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
- punpckhwd_r2r(mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
+ psubw_r2r (mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
+ punpckhwd_r2r (mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
- movq_r2r(mm3, mm0);
- punpcklwd_r2r(mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
+ movq_r2r (mm3, mm0);
+ punpcklwd_r2r (mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
- movq_r2m(mm7, *(wsptr+3)); /* save tmp3 */
- punpckhwd_r2r(mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
+ movq_r2m (mm7, *(wsptr + 3)); /* save tmp3 */
+ punpckhwd_r2r (mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
- movq_m2r(*(wsptr+4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */
- punpckhdq_r2r(mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
+ movq_m2r (*(wsptr + 4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckhdq_r2r (mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
- movq_m2r(*(wsptr+5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */
- punpckhdq_r2r(mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
+ movq_m2r (*(wsptr + 5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckhdq_r2r (mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
- movq_m2r(*(wsptr+6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */
- movq_r2r(mm6, mm4);
+ movq_m2r (*(wsptr + 6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */
+ movq_r2r (mm6, mm4);
- punpckldq_r2r(mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */
- movq_r2r(mm1, mm5);
+ punpckldq_r2r (mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */
+ movq_r2r (mm1, mm5);
- punpckhdq_r2r(mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */
- movq_r2r(mm6, mm2);
-
- movq_m2r(*(wsptr+7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */
- paddw_r2r(mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
+ punpckhdq_r2r (mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */
+ movq_r2r (mm6, mm2);
- psubw_r2r(mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
- punpckldq_r2r(mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */
+ movq_m2r (*(wsptr + 7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ paddw_r2r (mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
- punpckhdq_r2r(mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */
- movq_r2r(mm1, mm7);
+ psubw_r2r (mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
+ punpckldq_r2r (mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */
- paddw_r2r(mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
- psubw_r2r(mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
+ punpckhdq_r2r (mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */
+ movq_r2r (mm1, mm7);
- movq_r2r(mm6, mm5);
- punpcklwd_r2r(mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
+ paddw_r2r (mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
+ psubw_r2r (mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
- punpckhwd_r2r(mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
- movq_r2r(mm2, mm4);
+ movq_r2r (mm6, mm5);
+ punpcklwd_r2r (mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
- punpcklwd_r2r(mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
+ punpckhwd_r2r (mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
+ movq_r2r (mm2, mm4);
- punpckhwd_r2r(mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
+ punpcklwd_r2r (mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
- punpckhdq_r2r(mm6, mm4); /*/ wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
+ punpckhwd_r2r (mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
- punpckhdq_r2r(mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
- movq_r2r(mm0, mm5);
+ punpckhdq_r2r (mm6, mm4); /*/ wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
- punpckldq_r2r(mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
+ punpckhdq_r2r (mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
+ movq_r2r (mm0, mm5);
- punpckhdq_r2r(mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
- movq_r2r(mm3, mm4);
+ punpckldq_r2r (mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
- punpckhdq_r2r(mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
- movq_r2r(mm5, mm1);
+ punpckhdq_r2r (mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
+ movq_r2r (mm3, mm4);
- punpckldq_r2r(mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
+ punpckhdq_r2r (mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
+ movq_r2r (mm5, mm1);
+
+ punpckldq_r2r (mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
/* tmp7 = z11 + z13; : phase 5 */
/* tmp8 = z11 - z13; : phase 5 */
- psubw_r2r(mm4, mm1); /* tmp8 */
+ psubw_r2r (mm4, mm1); /* tmp8 */
- paddw_r2r(mm4, mm5); /* tmp7 */
+ paddw_r2r (mm4, mm5); /* tmp7 */
/* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); 2*c4 */
- psllw_i2r(2, mm1);
+ psllw_i2r (2, mm1);
- psllw_i2r(2, mm0);
+ psllw_i2r (2, mm0);
- pmulhw_m2r(fix_141, mm1); /* tmp21 */
+ pmulhw_m2r (fix_141, mm1); /* tmp21 */
/* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) 2*(c2-c6) */
/* + MULTIPLY(z10, - FIX_1_847759065); : 2*c2 */
- psllw_i2r(2, mm3);
- movq_r2r(mm0, mm7);
+ psllw_i2r (2, mm3);
+ movq_r2r (mm0, mm7);
- pmulhw_m2r(fix_n184, mm7);
- movq_r2r(mm3, mm6);
+ pmulhw_m2r (fix_n184, mm7);
+ movq_r2r (mm3, mm6);
- movq_m2r(*(wsptr), mm2); /* tmp0,final1 */
+ movq_m2r (*(wsptr), mm2); /* tmp0,final1 */
- pmulhw_m2r(fix_108n184, mm6);
+ pmulhw_m2r (fix_108n184, mm6);
/* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) : -2*(c2+c6) */
/* + MULTIPLY(z12, FIX_1_847759065); 2*c2 */
- movq_r2r(mm2, mm4); /* final1 */
-
- pmulhw_m2r(fix_184n261, mm0);
- paddw_r2r(mm5, mm2); /* tmp0+tmp7,final1 */
+ movq_r2r (mm2, mm4); /* final1 */
+
+ pmulhw_m2r (fix_184n261, mm0);
+ paddw_r2r (mm5, mm2); /* tmp0+tmp7,final1 */
- pmulhw_m2r(fix_184, mm3);
- psubw_r2r(mm5, mm4); /* tmp0-tmp7,final1 */
+ pmulhw_m2r (fix_184, mm3);
+ psubw_r2r (mm5, mm4); /* tmp0-tmp7,final1 */
/* tmp6 = tmp22 - tmp7; phase 2 */
- psraw_i2r(3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
+ psraw_i2r (3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
- paddw_r2r(mm6, mm7); /* tmp20 */
- psraw_i2r(3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
+ paddw_r2r (mm6, mm7); /* tmp20 */
+ psraw_i2r (3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
- paddw_r2r(mm0, mm3); /* tmp22 */
+ paddw_r2r (mm0, mm3); /* tmp22 */
/* tmp5 = tmp21 - tmp6; */
- psubw_r2r(mm5, mm3); /* tmp6 */
+ psubw_r2r (mm5, mm3); /* tmp6 */
/* tmp4 = tmp20 + tmp5; */
- movq_m2r(*(wsptr+1), mm0); /* tmp1,final2 */
- psubw_r2r(mm3, mm1); /* tmp5 */
+ movq_m2r (*(wsptr + 1), mm0); /* tmp1,final2 */
+ psubw_r2r (mm3, mm1); /* tmp5 */
- movq_r2r(mm0, mm6); /* final2 */
- paddw_r2r(mm3, mm0); /* tmp1+tmp6,final2 */
+ movq_r2r (mm0, mm6); /* final2 */
+ paddw_r2r (mm3, mm0); /* tmp1+tmp6,final2 */
- /* Final output stage: scale down by a factor of 8 and range-limit */
+ /* Final output stage: scale down by a factor of 8 and range-limit */
/* outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) */
@@ -1776,30 +1781,30 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* & RANGE_MASK]; */
/* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */
/* & RANGE_MASK]; final2 */
- psubw_r2r(mm3, mm6); /* tmp1-tmp6,final2 */
- psraw_i2r(3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */
+ psubw_r2r (mm3, mm6); /* tmp1-tmp6,final2 */
+ psraw_i2r (3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */
+
+ psraw_i2r (3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */
+
+ packuswb_r2r (mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
- psraw_i2r(3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */
-
- packuswb_r2r(mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
-
- movq_m2r(*(wsptr+2), mm5); /* tmp2,final3 */
- packuswb_r2r(mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
+ movq_m2r (*(wsptr + 2), mm5); /* tmp2,final3 */
+ packuswb_r2r (mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
/* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */
/* & RANGE_MASK]; */
/* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */
/* & RANGE_MASK]; final3 */
- paddw_r2r(mm1, mm7); /* tmp4 */
- movq_r2r(mm5, mm3);
+ paddw_r2r (mm1, mm7); /* tmp4 */
+ movq_r2r (mm5, mm3);
- paddw_r2r(mm1, mm5); /* tmp2+tmp5 */
- psubw_r2r(mm1, mm3); /* tmp2-tmp5 */
+ paddw_r2r (mm1, mm5); /* tmp2+tmp5 */
+ psubw_r2r (mm1, mm3); /* tmp2-tmp5 */
- psraw_i2r(3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */
+ psraw_i2r (3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */
- movq_m2r(*(wsptr+3), mm4); /* tmp3,final4 */
- psraw_i2r(3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */
+ movq_m2r (*(wsptr + 3), mm4); /* tmp3,final4 */
+ psraw_i2r (3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */
@@ -1807,74 +1812,74 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* & RANGE_MASK]; */
/* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */
/* & RANGE_MASK]; final4 */
- movq_r2r(mm4, mm6);
- paddw_r2r(mm7, mm4); /* tmp3+tmp4 */
+ movq_r2r (mm4, mm6);
+ paddw_r2r (mm7, mm4); /* tmp3+tmp4 */
- psubw_r2r(mm7, mm6); /* tmp3-tmp4 */
- psraw_i2r(3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */
+ psubw_r2r (mm7, mm6); /* tmp3-tmp4 */
+ psraw_i2r (3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */
- /* mov ecx, [dataptr] */
+ /* mov ecx, [dataptr] */
- psraw_i2r(3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */
+ psraw_i2r (3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */
- packuswb_r2r(mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
+ packuswb_r2r (mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
- packuswb_r2r(mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
- movq_r2r(mm2, mm4);
+ packuswb_r2r (mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
+ movq_r2r (mm2, mm4);
- movq_r2r(mm5, mm7);
- punpcklbw_r2r(mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
+ movq_r2r (mm5, mm7);
+ punpcklbw_r2r (mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
- punpckhbw_r2r(mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
- movq_r2r(mm2, mm1);
+ punpckhbw_r2r (mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
+ movq_r2r (mm2, mm1);
- punpcklbw_r2r(mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
+ punpcklbw_r2r (mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
- /* add dataptr, 4 */
+ /* add dataptr, 4 */
- punpckhbw_r2r(mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
+ punpckhbw_r2r (mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
- punpcklwd_r2r(mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
-
- /* add ecx, output_col */
+ punpcklwd_r2r (mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
- movq_r2r(mm7, mm6);
- punpckhwd_r2r(mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
+ /* add ecx, output_col */
- movq_r2r(mm2, mm0);
- punpcklwd_r2r(mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
+ movq_r2r (mm7, mm6);
+ punpckhwd_r2r (mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
- /* mov idata, [dataptr] */
-
- punpckldq_r2r(mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
+ movq_r2r (mm2, mm0);
+ punpcklwd_r2r (mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
- /* add dataptr, 4 */
-
- movq_r2r(mm1, mm3);
+ /* mov idata, [dataptr] */
- /* add idata, output_col */
-
- punpckhwd_r2r(mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
-
- movq_r2m(mm2, *(dataptr));
-
- punpckhdq_r2r(mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
+ punpckldq_r2r (mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
- dataptr += rskip;
- movq_r2m(mm0, *(dataptr));
+ /* add dataptr, 4 */
- punpckldq_r2r(mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
- punpckhdq_r2r(mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
-
- dataptr += rskip;
- movq_r2m(mm1, *(dataptr));
+ movq_r2r (mm1, mm3);
- dataptr += rskip;
- movq_r2m(mm3, *(dataptr));
+ /* add idata, output_col */
+
+ punpckhwd_r2r (mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
+
+ movq_r2m (mm2, *(dataptr));
+
+ punpckhdq_r2r (mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
+
+ dataptr += rskip;
+ movq_r2m (mm0, *(dataptr));
+
+ punpckldq_r2r (mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
+ punpckhdq_r2r (mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
+
+ dataptr += rskip;
+ movq_r2m (mm1, *(dataptr));
+
+ dataptr += rskip;
+ movq_r2m (mm3, *(dataptr));
/*******************************************************************/
- wsptr += 8;
+ wsptr += 8;
/*******************************************************************/
@@ -1882,249 +1887,249 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */
/* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */
/* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */
- movq_m2r(*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+ movq_m2r (*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+
+ movq_m2r (*(wsptr + 1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */
+ movq_r2r (mm0, mm2);
- movq_m2r(*(wsptr+1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */
- movq_r2r(mm0, mm2);
-
- movq_m2r(*(wsptr+2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */
- paddw_r2r(mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
+ movq_m2r (*(wsptr + 2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ paddw_r2r (mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
- movq_m2r(*(wsptr+3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */
- psubw_r2r(mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
+ movq_m2r (*(wsptr + 3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */
+ psubw_r2r (mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
- movq_r2r(mm0, mm6);
- movq_r2r(mm3, mm5);
-
- paddw_r2r(mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
- movq_r2r(mm2, mm1);
+ movq_r2r (mm0, mm6);
+ movq_r2r (mm3, mm5);
- psubw_r2r(mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
- punpcklwd_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
+ paddw_r2r (mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
+ movq_r2r (mm2, mm1);
- movq_m2r(*(wsptr+7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */
- punpckhwd_r2r(mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
+ psubw_r2r (mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
+ punpcklwd_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
- movq_m2r(*(wsptr+4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */
- punpckldq_r2r(mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
+ movq_m2r (*(wsptr + 7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ punpckhwd_r2r (mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
- punpcklwd_r2r(mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
- movq_r2r(mm3, mm4);
+ movq_m2r (*(wsptr + 4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckldq_r2r (mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
- movq_m2r(*(wsptr+6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */
- punpckhwd_r2r(mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
+ punpcklwd_r2r (mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
+ movq_r2r (mm3, mm4);
- movq_m2r(*(wsptr+5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */
- punpckldq_r2r(mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
+ movq_m2r (*(wsptr + 6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */
+ punpckhwd_r2r (mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
- paddw_r2r(mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
- movq_r2r(mm6, mm2);
+ movq_m2r (*(wsptr + 5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckldq_r2r (mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
- psubw_r2r(mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
- paddw_r2r(mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
+ paddw_r2r (mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
+ movq_r2r (mm6, mm2);
- movq_r2r(mm3, mm5);
- punpcklwd_r2r(mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
-
- psubw_r2r(mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
- punpckhwd_r2r(mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
+ psubw_r2r (mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
+ paddw_r2r (mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
- movq_r2r(mm4, mm7);
- punpckldq_r2r(mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
+ movq_r2r (mm3, mm5);
+ punpcklwd_r2r (mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
- punpcklwd_r2r(mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
+ psubw_r2r (mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
+ punpckhwd_r2r (mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
- punpckhwd_r2r(mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
+ movq_r2r (mm4, mm7);
+ punpckldq_r2r (mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
- punpckldq_r2r(mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
- movq_r2r(mm1, mm6);
+ punpcklwd_r2r (mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
- /*OK */
+ punpckhwd_r2r (mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
+
+ punpckldq_r2r (mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
+ movq_r2r (mm1, mm6);
+
+ /*OK */
/* mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
/* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
- movq_r2r(mm0, mm2);
- punpckhdq_r2r(mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
+ movq_r2r (mm0, mm2);
+ punpckhdq_r2r (mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
- punpckldq_r2r(mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
- psllw_i2r(2, mm6);
+ punpckldq_r2r (mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
+ psllw_i2r (2, mm6);
- pmulhw_m2r(fix_141, mm6);
- punpckldq_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
+ pmulhw_m2r (fix_141, mm6);
+ punpckldq_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
- punpckhdq_r2r(mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
- movq_r2r(mm0, mm7);
+ punpckhdq_r2r (mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
+ movq_r2r (mm0, mm7);
/* tmp0 = tmp10 + tmp13; */
/* tmp3 = tmp10 - tmp13; */
- paddw_r2r(mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
- psubw_r2r(mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
+ paddw_r2r (mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
+ psubw_r2r (mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
/* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */
- psubw_r2r(mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
+ psubw_r2r (mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
/* tmp1 = tmp11 + tmp12; */
/* tmp2 = tmp11 - tmp12; */
- movq_r2r(mm1, mm5);
+ movq_r2r (mm1, mm5);
- /*OK */
+ /*OK */
- /* Odd part */
+ /* Odd part */
/* z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; */
/* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */
/* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */
/* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */
- movq_m2r(*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */
- paddw_r2r(mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
+ movq_m2r (*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+ paddw_r2r (mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
- movq_m2r(*(wsptr+1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */
- psubw_r2r(mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
+ movq_m2r (*(wsptr + 1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */
+ psubw_r2r (mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
- movq_r2r(mm3, mm6);
- punpckldq_r2r(mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */
+ movq_r2r (mm3, mm6);
+ punpckldq_r2r (mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */
- punpckhdq_r2r(mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */
- movq_r2r(mm3, mm2);
+ punpckhdq_r2r (mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */
+ movq_r2r (mm3, mm2);
/*Save tmp0 and tmp1 in wsptr */
- movq_r2m(mm0, *(wsptr)); /* save tmp0 */
- paddw_r2r(mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
+ movq_r2m (mm0, *(wsptr)); /* save tmp0 */
+ paddw_r2r (mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
+
-
/*Continue with z10 --- z13 */
- movq_m2r(*(wsptr+2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */
- psubw_r2r(mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
+ movq_m2r (*(wsptr + 2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ psubw_r2r (mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
- movq_m2r(*(wsptr+3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */
- movq_r2r(mm6, mm4);
+ movq_m2r (*(wsptr + 3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */
+ movq_r2r (mm6, mm4);
- movq_r2m(mm1, *(wsptr+1)); /* save tmp1 */
- punpckldq_r2r(mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */
+ movq_r2m (mm1, *(wsptr + 1)); /* save tmp1 */
+ punpckldq_r2r (mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */
+
+ punpckhdq_r2r (mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */
+ movq_r2r (mm6, mm1);
- punpckhdq_r2r(mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */
- movq_r2r(mm6, mm1);
-
/*Save tmp2 and tmp3 in wsptr */
- paddw_r2r(mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
- movq_r2r(mm2, mm4);
-
+ paddw_r2r (mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
+ movq_r2r (mm2, mm4);
+
/*Continue with z10 --- z13 */
- movq_r2m(mm5, *(wsptr+2)); /* save tmp2 */
- punpcklwd_r2r(mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
+ movq_r2m (mm5, *(wsptr + 2)); /* save tmp2 */
+ punpcklwd_r2r (mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
- psubw_r2r(mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
- punpckhwd_r2r(mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
+ psubw_r2r (mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
+ punpckhwd_r2r (mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
- movq_r2r(mm3, mm0);
- punpcklwd_r2r(mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
+ movq_r2r (mm3, mm0);
+ punpcklwd_r2r (mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
- movq_r2m(mm7, *(wsptr+3)); /* save tmp3 */
- punpckhwd_r2r(mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
+ movq_r2m (mm7, *(wsptr + 3)); /* save tmp3 */
+ punpckhwd_r2r (mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
- movq_m2r(*(wsptr+4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */
- punpckhdq_r2r(mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
+ movq_m2r (*(wsptr + 4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckhdq_r2r (mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
- movq_m2r(*(wsptr+5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */
- punpckhdq_r2r(mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
+ movq_m2r (*(wsptr + 5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckhdq_r2r (mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
- movq_m2r(*(wsptr+6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */
- movq_r2r(mm6, mm4);
+ movq_m2r (*(wsptr + 6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */
+ movq_r2r (mm6, mm4);
- punpckldq_r2r(mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */
- movq_r2r(mm1, mm5);
+ punpckldq_r2r (mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */
+ movq_r2r (mm1, mm5);
- punpckhdq_r2r(mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */
- movq_r2r(mm6, mm2);
-
- movq_m2r(*(wsptr+7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */
- paddw_r2r(mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
+ punpckhdq_r2r (mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */
+ movq_r2r (mm6, mm2);
- psubw_r2r(mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
- punpckldq_r2r(mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */
+ movq_m2r (*(wsptr + 7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ paddw_r2r (mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
- punpckhdq_r2r(mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */
- movq_r2r(mm1, mm7);
+ psubw_r2r (mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
+ punpckldq_r2r (mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */
- paddw_r2r(mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
- psubw_r2r(mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
+ punpckhdq_r2r (mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */
+ movq_r2r (mm1, mm7);
- movq_r2r(mm6, mm5);
- punpcklwd_r2r(mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
+ paddw_r2r (mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
+ psubw_r2r (mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
- punpckhwd_r2r(mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
- movq_r2r(mm2, mm4);
+ movq_r2r (mm6, mm5);
+ punpcklwd_r2r (mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
- punpcklwd_r2r(mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
+ punpckhwd_r2r (mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
+ movq_r2r (mm2, mm4);
- punpckhwd_r2r(mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
+ punpcklwd_r2r (mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
- punpckhdq_r2r(mm6, mm4); /* wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
+ punpckhwd_r2r (mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
- punpckhdq_r2r(mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
- movq_r2r(mm0, mm5);
+ punpckhdq_r2r (mm6, mm4); /* wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
- punpckldq_r2r(mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
+ punpckhdq_r2r (mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
+ movq_r2r (mm0, mm5);
- punpckhdq_r2r(mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
- movq_r2r(mm3, mm4);
+ punpckldq_r2r (mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
- punpckhdq_r2r(mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
- movq_r2r(mm5, mm1);
+ punpckhdq_r2r (mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
+ movq_r2r (mm3, mm4);
- punpckldq_r2r(mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
+ punpckhdq_r2r (mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
+ movq_r2r (mm5, mm1);
+
+ punpckldq_r2r (mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
/* tmp7 = z11 + z13; : phase 5 */
/* tmp8 = z11 - z13; : phase 5 */
- psubw_r2r(mm4, mm1); /* tmp8 */
+ psubw_r2r (mm4, mm1); /* tmp8 */
- paddw_r2r(mm4, mm5); /* tmp7 */
+ paddw_r2r (mm4, mm5); /* tmp7 */
/* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); 2*c4 */
- psllw_i2r(2, mm1);
+ psllw_i2r (2, mm1);
- psllw_i2r(2, mm0);
+ psllw_i2r (2, mm0);
- pmulhw_m2r(fix_141, mm1); /* tmp21 */
+ pmulhw_m2r (fix_141, mm1); /* tmp21 */
/* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) : 2*(c2-c6) */
/* + MULTIPLY(z10, - FIX_1_847759065); : 2*c2 */
- psllw_i2r(2, mm3);
- movq_r2r(mm0, mm7);
+ psllw_i2r (2, mm3);
+ movq_r2r (mm0, mm7);
- pmulhw_m2r(fix_n184, mm7);
- movq_r2r(mm3, mm6);
+ pmulhw_m2r (fix_n184, mm7);
+ movq_r2r (mm3, mm6);
- movq_m2r(*(wsptr), mm2); /* tmp0,final1 */
+ movq_m2r (*(wsptr), mm2); /* tmp0,final1 */
- pmulhw_m2r(fix_108n184, mm6);
+ pmulhw_m2r (fix_108n184, mm6);
/* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) : -2*(c2+c6) */
/* + MULTIPLY(z12, FIX_1_847759065); : 2*c2 */
- movq_r2r(mm2, mm4); /* final1 */
-
- pmulhw_m2r(fix_184n261, mm0);
- paddw_r2r(mm5, mm2); /* tmp0+tmp7,final1 */
+ movq_r2r (mm2, mm4); /* final1 */
+
+ pmulhw_m2r (fix_184n261, mm0);
+ paddw_r2r (mm5, mm2); /* tmp0+tmp7,final1 */
- pmulhw_m2r(fix_184, mm3);
- psubw_r2r(mm5, mm4); /* tmp0-tmp7,final1 */
+ pmulhw_m2r (fix_184, mm3);
+ psubw_r2r (mm5, mm4); /* tmp0-tmp7,final1 */
/* tmp6 = tmp22 - tmp7; phase 2 */
- psraw_i2r(3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
+ psraw_i2r (3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
- paddw_r2r(mm6, mm7); /* tmp20 */
- psraw_i2r(3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
+ paddw_r2r (mm6, mm7); /* tmp20 */
+ psraw_i2r (3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
- paddw_r2r(mm0, mm3); /* tmp22 */
+ paddw_r2r (mm0, mm3); /* tmp22 */
/* tmp5 = tmp21 - tmp6; */
- psubw_r2r(mm5, mm3); /* tmp6 */
+ psubw_r2r (mm5, mm3); /* tmp6 */
/* tmp4 = tmp20 + tmp5; */
- movq_m2r(*(wsptr+1), mm0); /* tmp1,final2 */
- psubw_r2r(mm3, mm1); /* tmp5 */
+ movq_m2r (*(wsptr + 1), mm0); /* tmp1,final2 */
+ psubw_r2r (mm3, mm1); /* tmp5 */
- movq_r2r(mm0, mm6); /* final2 */
- paddw_r2r(mm3, mm0); /* tmp1+tmp6,final2 */
+ movq_r2r (mm0, mm6); /* final2 */
+ paddw_r2r (mm3, mm0); /* tmp1+tmp6,final2 */
- /* Final output stage: scale down by a factor of 8 and range-limit */
+ /* Final output stage: scale down by a factor of 8 and range-limit */
/* outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) */
/* & RANGE_MASK]; */
@@ -2136,30 +2141,30 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* & RANGE_MASK]; */
/* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */
/* & RANGE_MASK]; final2 */
- psubw_r2r(mm3, mm6); /* tmp1-tmp6,final2 */
- psraw_i2r(3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */
+ psubw_r2r (mm3, mm6); /* tmp1-tmp6,final2 */
+ psraw_i2r (3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */
+
+ psraw_i2r (3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */
+
+ packuswb_r2r (mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
- psraw_i2r(3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */
-
- packuswb_r2r(mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
-
- movq_m2r(*(wsptr+2), mm5); /* tmp2,final3 */
- packuswb_r2r(mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
+ movq_m2r (*(wsptr + 2), mm5); /* tmp2,final3 */
+ packuswb_r2r (mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
/* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */
/* & RANGE_MASK]; */
/* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */
/* & RANGE_MASK]; final3 */
- paddw_r2r(mm1, mm7); /* tmp4 */
- movq_r2r(mm5, mm3);
+ paddw_r2r (mm1, mm7); /* tmp4 */
+ movq_r2r (mm5, mm3);
- paddw_r2r(mm1, mm5); /* tmp2+tmp5 */
- psubw_r2r(mm1, mm3); /* tmp2-tmp5 */
+ paddw_r2r (mm1, mm5); /* tmp2+tmp5 */
+ psubw_r2r (mm1, mm3); /* tmp2-tmp5 */
- psraw_i2r(3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */
+ psraw_i2r (3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */
- movq_m2r(*(wsptr+3), mm4); /* tmp3,final4 */
- psraw_i2r(3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */
+ movq_m2r (*(wsptr + 3), mm4); /* tmp3,final4 */
+ psraw_i2r (3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */
@@ -2167,68 +2172,68 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* & RANGE_MASK]; */
/* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */
/* & RANGE_MASK]; final4 */
- movq_r2r(mm4, mm6);
- paddw_r2r(mm7, mm4); /* tmp3+tmp4 */
+ movq_r2r (mm4, mm6);
+ paddw_r2r (mm7, mm4); /* tmp3+tmp4 */
- psubw_r2r(mm7, mm6); /* tmp3-tmp4 */
- psraw_i2r(3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */
+ psubw_r2r (mm7, mm6); /* tmp3-tmp4 */
+ psraw_i2r (3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */
- psraw_i2r(3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */
+ psraw_i2r (3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */
- /*
- movq_r2m(mm4, *dummy);
- fprintf(stderr, "3-4 %016llx\n", dummy);
- movq_r2m(mm4, *dummy);
- fprintf(stderr, "3+4 %016llx\n", dummy);
- */
-
+ /*
+ movq_r2m(mm4, *dummy);
+ fprintf(stderr, "3-4 %016llx\n", dummy);
+ movq_r2m(mm4, *dummy);
+ fprintf(stderr, "3+4 %016llx\n", dummy);
+ */
- packuswb_r2r(mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
- packuswb_r2r(mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
- movq_r2r(mm2, mm4);
+ packuswb_r2r (mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
- movq_r2r(mm5, mm7);
- punpcklbw_r2r(mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
+ packuswb_r2r (mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
+ movq_r2r (mm2, mm4);
- punpckhbw_r2r(mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
- movq_r2r(mm2, mm1);
+ movq_r2r (mm5, mm7);
+ punpcklbw_r2r (mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
- punpcklbw_r2r(mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
-
- punpckhbw_r2r(mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
+ punpckhbw_r2r (mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
+ movq_r2r (mm2, mm1);
- punpcklwd_r2r(mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
-
- movq_r2r(mm7, mm6);
- punpckhwd_r2r(mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
+ punpcklbw_r2r (mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
- movq_r2r(mm2, mm0);
- punpcklwd_r2r(mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
+ punpckhbw_r2r (mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
- punpckldq_r2r(mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
+ punpcklwd_r2r (mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
- movq_r2r(mm1, mm3);
+ movq_r2r (mm7, mm6);
+ punpckhwd_r2r (mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
- punpckhwd_r2r(mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
-
- dataptr += rskip;
- movq_r2m(mm2, *(dataptr));
+ movq_r2r (mm2, mm0);
+ punpcklwd_r2r (mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
- punpckhdq_r2r(mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
+ punpckldq_r2r (mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
- dataptr += rskip;
- movq_r2m(mm0, *(dataptr));
+ movq_r2r (mm1, mm3);
- punpckldq_r2r(mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
-
- punpckhdq_r2r(mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
+ punpckhwd_r2r (mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
- dataptr += rskip;
- movq_r2m(mm1, *(dataptr));
+ dataptr += rskip;
+ movq_r2m (mm2, *(dataptr));
- dataptr += rskip;
- movq_r2m(mm3, *(dataptr));
+ punpckhdq_r2r (mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
+
+ dataptr += rskip;
+ movq_r2m (mm0, *(dataptr));
+
+ punpckldq_r2r (mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
+
+ punpckhdq_r2r (mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
+
+ dataptr += rskip;
+ movq_r2m (mm1, *(dataptr));
+
+ dataptr += rskip;
+ movq_r2m (mm3, *(dataptr));
#else
__s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
@@ -2244,9 +2249,9 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
inptr = data;
wsptr = workspace;
for (ctr = 8; ctr > 0; ctr--) {
-
+
if ((inptr[8] | inptr[16] | inptr[24] |
- inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
+ inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
dcval = inptr[0];
wsptr[0] = dcval;
wsptr[8] = dcval;
@@ -2256,12 +2261,12 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
wsptr[40] = dcval;
wsptr[48] = dcval;
wsptr[56] = dcval;
-
- inptr++;
+
+ inptr++;
wsptr++;
continue;
- }
-
+ }
+
tmp0 = inptr[0];
tmp1 = inptr[16];
tmp2 = inptr[32];
@@ -2271,13 +2276,13 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
tmp11 = tmp0 - tmp2;
tmp13 = tmp1 + tmp3;
- tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13;
+ tmp12 = MULTIPLY (tmp1 - tmp3, FIX_1_414213562) - tmp13;
tmp0 = tmp10 + tmp13;
tmp3 = tmp10 - tmp13;
tmp1 = tmp11 + tmp12;
tmp2 = tmp11 - tmp12;
-
+
tmp4 = inptr[8];
tmp5 = inptr[24];
tmp6 = inptr[40];
@@ -2289,11 +2294,11 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
z12 = tmp4 - tmp7;
tmp7 = z11 + z13;
- tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
+ tmp11 = MULTIPLY (z11 - z13, FIX_1_414213562);
- z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
- tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
- tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
+ z5 = MULTIPLY (z10 + z12, FIX_1_847759065);
+ tmp10 = MULTIPLY (z12, FIX_1_082392200) - z5;
+ tmp12 = MULTIPLY (z10, -FIX_2_613125930) + z5;
tmp6 = tmp12 - tmp7;
tmp5 = tmp11 - tmp6;
@@ -2314,13 +2319,13 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
wsptr = workspace;
for (ctr = 0; ctr < 8; ctr++) {
- outptr = &(odata[ctr*rskip]);
+ outptr = &(odata[ctr * rskip]);
tmp10 = wsptr[0] + wsptr[4];
tmp11 = wsptr[0] - wsptr[4];
tmp13 = wsptr[2] + wsptr[6];
- tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
+ tmp12 = MULTIPLY (wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
tmp0 = tmp10 + tmp13;
tmp3 = tmp10 - tmp13;
@@ -2333,29 +2338,30 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
z12 = wsptr[1] - wsptr[7];
tmp7 = z11 + z13;
- tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
+ tmp11 = MULTIPLY (z11 - z13, FIX_1_414213562);
- z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
- tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
- tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
+ z5 = MULTIPLY (z10 + z12, FIX_1_847759065);
+ tmp10 = MULTIPLY (z12, FIX_1_082392200) - z5;
+ tmp12 = MULTIPLY (z10, -FIX_2_613125930) + z5;
tmp6 = tmp12 - tmp7;
tmp5 = tmp11 - tmp6;
tmp4 = tmp10 + tmp5;
- outptr[0] = RL(DESCALE(tmp0 + tmp7));
- outptr[7] = RL(DESCALE(tmp0 - tmp7));
- outptr[1] = RL(DESCALE(tmp1 + tmp6));
- outptr[6] = RL(DESCALE(tmp1 - tmp6));
- outptr[2] = RL(DESCALE(tmp2 + tmp5));
- outptr[5] = RL(DESCALE(tmp2 - tmp5));
- outptr[4] = RL(DESCALE(tmp3 + tmp4));
- outptr[3] = RL(DESCALE(tmp3 - tmp4));
+ outptr[0] = RL (DESCALE (tmp0 + tmp7));
+ outptr[7] = RL (DESCALE (tmp0 - tmp7));
+ outptr[1] = RL (DESCALE (tmp1 + tmp6));
+ outptr[6] = RL (DESCALE (tmp1 - tmp6));
+ outptr[2] = RL (DESCALE (tmp2 + tmp5));
+ outptr[5] = RL (DESCALE (tmp2 - tmp5));
+ outptr[4] = RL (DESCALE (tmp3 + tmp4));
+ outptr[3] = RL (DESCALE (tmp3 - tmp4));
wsptr += 8;
}
#endif
}
+
/*
Main Routines
@@ -2374,24 +2380,25 @@ Initialise all the cache-aliged data blocks
*/
-void RTjpeg_init_data(void)
+void
+RTjpeg_init_data (void)
{
- unsigned long dptr;
-
- dptr=(unsigned long)&(RTjpeg_alldata[0]);
- dptr+=32;
- dptr=dptr>>5;
- dptr=dptr<<5; /* cache align data */
-
- RTjpeg_block=(__s16 *)dptr;
- dptr+=sizeof(__s16)*64;
- RTjpeg_lqt=(__s32 *)dptr;
- dptr+=sizeof(__s32)*64;
- RTjpeg_cqt=(__s32 *)dptr;
- dptr+=sizeof(__s32)*64;
- RTjpeg_liqt=(__u32 *)dptr;
- dptr+=sizeof(__u32)*64;
- RTjpeg_ciqt=(__u32 *)dptr;
+ unsigned long dptr;
+
+ dptr = (unsigned long) &(RTjpeg_alldata[0]);
+ dptr += 32;
+ dptr = dptr >> 5;
+ dptr = dptr << 5; /* cache align data */
+
+ RTjpeg_block = (__s16 *) dptr;
+ dptr += sizeof (__s16) * 64;
+ RTjpeg_lqt = (__s32 *) dptr;
+ dptr += sizeof (__s32) * 64;
+ RTjpeg_cqt = (__s32 *) dptr;
+ dptr += sizeof (__s32) * 64;
+ RTjpeg_liqt = (__u32 *) dptr;
+ dptr += sizeof (__u32) * 64;
+ RTjpeg_ciqt = (__u32 *) dptr;
}
/*
@@ -2405,35 +2412,39 @@ Input: buf -> pointer to 128 ints for quant values store to pass back to
Q -> quality factor (192=best, 32=worst)
*/
-void RTjpeg_init_Q(__u8 Q)
+void
+RTjpeg_init_Q (__u8 Q)
{
- int i;
- __u64 qual;
-
- qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */
-
- for(i=0; i<64; i++)
- {
- RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
- if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
- RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
- if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
- RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
- RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
- RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
- RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
- }
-
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
-
- RTjpeg_dct_init();
- RTjpeg_idct_init();
- RTjpeg_quant_init();
+ int i;
+ __u64 qual;
+
+ qual = (__u64) Q << (32 - 7); /* 32 bit FP, 255=2, 0=0 */
+
+ for (i = 0; i < 64; i++) {
+ RTjpeg_lqt[i] =
+ (__s32) ((qual / ((__u64) RTjpeg_lum_quant_tbl[i] << 16)) >> 3);
+ if (RTjpeg_lqt[i] == 0)
+ RTjpeg_lqt[i] = 1;
+ RTjpeg_cqt[i] =
+ (__s32) ((qual / ((__u64) RTjpeg_chrom_quant_tbl[i] << 16)) >> 3);
+ if (RTjpeg_cqt[i] == 0)
+ RTjpeg_cqt[i] = 1;
+ RTjpeg_liqt[i] = (1 << 16) / (RTjpeg_lqt[i] << 3);
+ RTjpeg_ciqt[i] = (1 << 16) / (RTjpeg_cqt[i] << 3);
+ RTjpeg_lqt[i] = ((1 << 16) / RTjpeg_liqt[i]) >> 3;
+ RTjpeg_cqt[i] = ((1 << 16) / RTjpeg_ciqt[i]) >> 3;
+ }
+
+ RTjpeg_lb8 = 0;
+ while (RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]] <= 8);
+ RTjpeg_lb8--;
+ RTjpeg_cb8 = 0;
+ while (RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]] <= 8);
+ RTjpeg_cb8--;
+
+ RTjpeg_dct_init ();
+ RTjpeg_idct_init ();
+ RTjpeg_quant_init ();
}
/*
@@ -2450,337 +2461,338 @@ Input: buf -> pointer to 128 ints for quant values store to pass back to
*/
-void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q)
+void
+RTjpeg_init_compress (__u32 * buf, int width, int height, __u8 Q)
{
- int i;
- __u64 qual;
-
- RTjpeg_init_data();
-
- RTjpeg_width=width;
- RTjpeg_height=height;
- RTjpeg_Ywidth = RTjpeg_width>>3;
- RTjpeg_Ysize=width * height;
- RTjpeg_Cwidth = RTjpeg_width>>4;
- RTjpeg_Csize= (width>>1) * height;
-
- qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */
-
- for(i=0; i<64; i++)
- {
- RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
- if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
- RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
- if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
- RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
- RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
- RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
- RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
- }
-
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
-
- RTjpeg_dct_init();
- RTjpeg_quant_init();
-
- for(i=0; i<64; i++)
- buf[i]=RTjpeg_liqt[i];
- for(i=0; i<64; i++)
- buf[64+i]=RTjpeg_ciqt[i];
+ int i;
+ __u64 qual;
+
+ RTjpeg_init_data ();
+
+ RTjpeg_width = width;
+ RTjpeg_height = height;
+ RTjpeg_Ywidth = RTjpeg_width >> 3;
+ RTjpeg_Ysize = width * height;
+ RTjpeg_Cwidth = RTjpeg_width >> 4;
+ RTjpeg_Csize = (width >> 1) * height;
+
+ qual = (__u64) Q << (32 - 7); /* 32 bit FP, 255=2, 0=0 */
+
+ for (i = 0; i < 64; i++) {
+ RTjpeg_lqt[i] =
+ (__s32) ((qual / ((__u64) RTjpeg_lum_quant_tbl[i] << 16)) >> 3);
+ if (RTjpeg_lqt[i] == 0)
+ RTjpeg_lqt[i] = 1;
+ RTjpeg_cqt[i] =
+ (__s32) ((qual / ((__u64) RTjpeg_chrom_quant_tbl[i] << 16)) >> 3);
+ if (RTjpeg_cqt[i] == 0)
+ RTjpeg_cqt[i] = 1;
+ RTjpeg_liqt[i] = (1 << 16) / (RTjpeg_lqt[i] << 3);
+ RTjpeg_ciqt[i] = (1 << 16) / (RTjpeg_cqt[i] << 3);
+ RTjpeg_lqt[i] = ((1 << 16) / RTjpeg_liqt[i]) >> 3;
+ RTjpeg_cqt[i] = ((1 << 16) / RTjpeg_ciqt[i]) >> 3;
+ }
+
+ RTjpeg_lb8 = 0;
+ while (RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]] <= 8);
+ RTjpeg_lb8--;
+ RTjpeg_cb8 = 0;
+ while (RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]] <= 8);
+ RTjpeg_cb8--;
+
+ RTjpeg_dct_init ();
+ RTjpeg_quant_init ();
+
+ for (i = 0; i < 64; i++)
+ buf[i] = RTjpeg_liqt[i];
+ for (i = 0; i < 64; i++)
+ buf[64 + i] = RTjpeg_ciqt[i];
}
-void RTjpeg_init_decompress(__u32 *buf, int width, int height)
+void
+RTjpeg_init_decompress (__u32 * buf, int width, int height)
{
- int i;
-
- RTjpeg_init_data();
-
- RTjpeg_width=width;
- RTjpeg_height=height;
- RTjpeg_Ywidth = RTjpeg_width>>3;
- RTjpeg_Ysize=width * height;
- RTjpeg_Cwidth = RTjpeg_width>>4;
- RTjpeg_Csize= (width>>1) * height;
-
- for(i=0; i<64; i++)
- {
- RTjpeg_liqt[i]=buf[i];
- RTjpeg_ciqt[i]=buf[i+64];
- }
-
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
-
- RTjpeg_idct_init();
+ int i;
+
+ RTjpeg_init_data ();
+
+ RTjpeg_width = width;
+ RTjpeg_height = height;
+ RTjpeg_Ywidth = RTjpeg_width >> 3;
+ RTjpeg_Ysize = width * height;
+ RTjpeg_Cwidth = RTjpeg_width >> 4;
+ RTjpeg_Csize = (width >> 1) * height;
+
+ for (i = 0; i < 64; i++) {
+ RTjpeg_liqt[i] = buf[i];
+ RTjpeg_ciqt[i] = buf[i + 64];
+ }
+
+ RTjpeg_lb8 = 0;
+ while (RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]] <= 8);
+ RTjpeg_lb8--;
+ RTjpeg_cb8 = 0;
+ while (RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]] <= 8);
+ RTjpeg_cb8--;
+
+ RTjpeg_idct_init ();
/* RTjpeg_color_init(); */
}
-int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
+int
+RTjpeg_compressYUV420 (__s8 * sp, unsigned char *bp)
{
- __s8 * sb;
- register __s8 * bp1 = bp + (RTjpeg_width<<3);
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
- register int i, j, k;
+ __s8 *sb;
+ register __s8 *bp1 = bp + (RTjpeg_width << 3);
+ register __s8 *bp2 = bp + RTjpeg_Ysize;
+ register __s8 *bp3 = bp2 + (RTjpeg_Csize >> 1);
+ register int i, j, k;
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- sb=sp;
+ sb = sp;
/* Y */
- for(i=RTjpeg_height>>1; i; i-=8)
- {
- for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
- RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+ for (i = RTjpeg_height >> 1; i; i -= 8) {
+ for (j = 0, k = 0; j < RTjpeg_width; j += 16, k += 8) {
+ RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+
+ RTjpeg_dctY (bp + j + 8, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+
+ RTjpeg_dctY (bp1 + j, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+
+ RTjpeg_dctY (bp1 + j + 8, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+
+ RTjpeg_dctY (bp2 + k, RTjpeg_block, RTjpeg_Cwidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_cqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8);
+
+ RTjpeg_dctY (bp3 + k, RTjpeg_block, RTjpeg_Cwidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_cqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8);
+
+ }
+ bp += RTjpeg_width << 4;
+ bp1 += RTjpeg_width << 4;
+ bp2 += RTjpeg_width << 2;
+ bp3 += RTjpeg_width << 2;
}
- bp+=RTjpeg_width<<4;
- bp1+=RTjpeg_width<<4;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
-
- }
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- return (sp-sb);
+ return (sp - sb);
}
-int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp)
+int
+RTjpeg_compressYUV422 (__s8 * sp, unsigned char *bp)
{
- __s8 * sb;
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + RTjpeg_Csize;
- register int i, j, k;
+ __s8 *sb;
+ register __s8 *bp2 = bp + RTjpeg_Ysize;
+ register __s8 *bp3 = bp2 + RTjpeg_Csize;
+ register int i, j, k;
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- sb=sp;
+ sb = sp;
/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
- for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
- RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+ for (i = RTjpeg_height; i; i -= 8) {
+ for (j = 0, k = 0; j < RTjpeg_width; j += 16, k += 8) {
+ RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+
+ RTjpeg_dctY (bp + j + 8, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+
+ RTjpeg_dctY (bp2 + k, RTjpeg_block, RTjpeg_Cwidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_cqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8);
+
+ RTjpeg_dctY (bp3 + k, RTjpeg_block, RTjpeg_Cwidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_cqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8);
+
+ }
+ bp += RTjpeg_width << 3;
+ bp2 += RTjpeg_width << 2;
+ bp3 += RTjpeg_width << 2;
}
- bp+=RTjpeg_width<<3;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
-
- }
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- return (sp-sb);
+ return (sp - sb);
}
-int RTjpeg_compress8(__s8 *sp, unsigned char *bp)
+int
+RTjpeg_compress8 (__s8 * sp, unsigned char *bp)
{
- __s8 * sb;
- int i, j;
+ __s8 *sb;
+ int i, j;
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
-
- sb=sp;
+
+ sb = sp;
/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
- for(j=0; j<RTjpeg_width; j+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+ for (i = 0; i < RTjpeg_height; i += 8) {
+ for (j = 0; j < RTjpeg_width; j += 8) {
+ RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_width);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+ }
+ bp += RTjpeg_width;
}
- bp+=RTjpeg_width;
- }
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- return (sp-sb);
+ return (sp - sb);
}
-void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp)
+void
+RTjpeg_decompressYUV422 (__s8 * sp, __u8 * bp)
{
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize);
- int i, j,k;
+ register __s8 *bp2 = bp + RTjpeg_Ysize;
+ register __s8 *bp3 = bp2 + (RTjpeg_Csize);
+ int i, j, k;
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
- for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
- }
+ for (i = RTjpeg_height; i; i -= 8) {
+ for (k = 0, j = 0; j < RTjpeg_width; j += 16, k += 8) {
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp + j + 8, RTjpeg_block, RTjpeg_width);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+ RTjpeg_idct (bp2 + k, RTjpeg_block, RTjpeg_width >> 1);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+ RTjpeg_idct (bp3 + k, RTjpeg_block, RTjpeg_width >> 1);
+ }
+ }
+ bp += RTjpeg_width << 3;
+ bp2 += RTjpeg_width << 2;
+ bp3 += RTjpeg_width << 2;
}
- bp+=RTjpeg_width<<3;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
- }
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
}
-void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp)
+void
+RTjpeg_decompressYUV420 (__s8 * sp, __u8 * bp)
{
- register __s8 * bp1 = bp + (RTjpeg_width<<3);
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
- int i, j,k;
+ register __s8 *bp1 = bp + (RTjpeg_width << 3);
+ register __s8 *bp2 = bp + RTjpeg_Ysize;
+ register __s8 *bp3 = bp2 + (RTjpeg_Csize >> 1);
+ int i, j, k;
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
/* Y */
- for(i=RTjpeg_height>>1; i; i-=8)
- {
- for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp1+j, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp1+j+8, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
- }
+ for (i = RTjpeg_height >> 1; i; i -= 8) {
+ for (k = 0, j = 0; j < RTjpeg_width; j += 16, k += 8) {
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp + j + 8, RTjpeg_block, RTjpeg_width);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp1 + j, RTjpeg_block, RTjpeg_width);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp1 + j + 8, RTjpeg_block, RTjpeg_width);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+ RTjpeg_idct (bp2 + k, RTjpeg_block, RTjpeg_width >> 1);
+ }
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+ RTjpeg_idct (bp3 + k, RTjpeg_block, RTjpeg_width >> 1);
+ }
+ }
+ bp += RTjpeg_width << 4;
+ bp1 += RTjpeg_width << 4;
+ bp2 += RTjpeg_width << 2;
+ bp3 += RTjpeg_width << 2;
}
- bp+=RTjpeg_width<<4;
- bp1+=RTjpeg_width<<4;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
- }
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
}
-void RTjpeg_decompress8(__s8 *sp, __u8 *bp)
+void
+RTjpeg_decompress8 (__s8 * sp, __u8 * bp)
{
- int i, j;
+ int i, j;
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
- for(j=0; j<RTjpeg_width; j+=8)
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
- }
- bp+=RTjpeg_width<<3;
- }
+ for (i = 0; i < RTjpeg_height; i += 8) {
+ for (j = 0; j < RTjpeg_width; j += 8)
+ if (*sp == -1)
+ sp++;
+ else {
+ sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+ RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width);
+ }
+ bp += RTjpeg_width << 3;
+ }
}
/*
@@ -2790,211 +2802,213 @@ Initialise additional data structures for motion compensation
*/
-void RTjpeg_init_mcompress(void)
+void
+RTjpeg_init_mcompress (void)
{
- unsigned long tmp;
-
- if(!RTjpeg_old)
- {
- RTjpeg_old=malloc((4*RTjpeg_width*RTjpeg_height)+32);
- tmp=(unsigned long)RTjpeg_old;
- tmp+=32;
- tmp=tmp>>5;
- RTjpeg_old=(__s16 *)(tmp<<5);
- }
- if (!RTjpeg_old)
- {
- fprintf(stderr, "RTjpeg: Could not allocate memory\n");
- exit(-1);
- }
- memset (RTjpeg_old, 0, ((4*RTjpeg_width*RTjpeg_height)));
+ unsigned long tmp;
+
+ if (!RTjpeg_old) {
+ RTjpeg_old = malloc ((4 * RTjpeg_width * RTjpeg_height) + 32);
+ tmp = (unsigned long) RTjpeg_old;
+ tmp += 32;
+ tmp = tmp >> 5;
+ RTjpeg_old = (__s16 *) (tmp << 5);
+ }
+ if (!RTjpeg_old) {
+ fprintf (stderr, "RTjpeg: Could not allocate memory\n");
+ exit (-1);
+ }
+ memset (RTjpeg_old, 0, ((4 * RTjpeg_width * RTjpeg_height)));
}
#ifdef HAVE_LIBMMX
-int RTjpeg_bcomp(__s16 *old, mmx_t *mask)
+int
+RTjpeg_bcomp (__s16 * old, mmx_t * mask)
{
- int i;
- mmx_t *mold=(mmx_t *)old;
- mmx_t *mblock=(mmx_t *)RTjpeg_block;
- mmx_t result;
- static mmx_t neg=(mmx_t)(unsigned long long)0xffffffffffffffffULL;
-
- movq_m2r(*mask, mm7);
- movq_m2r(neg, mm6);
- pxor_r2r(mm5, mm5);
-
- for(i=0; i<8; i++)
- {
- movq_m2r(*(mblock++), mm0);
- movq_m2r(*(mblock++), mm2);
- movq_m2r(*(mold++), mm1);
- movq_m2r(*(mold++), mm3);
- psubsw_r2r(mm1, mm0);
- psubsw_r2r(mm3, mm2);
- movq_r2r(mm0, mm1);
- movq_r2r(mm2, mm3);
- pcmpgtw_r2r(mm7, mm0);
- pcmpgtw_r2r(mm7, mm2);
- pxor_r2r(mm6, mm1);
- pxor_r2r(mm6, mm3);
- pcmpgtw_r2r(mm7, mm1);
- pcmpgtw_r2r(mm7, mm3);
- por_r2r(mm0, mm5);
- por_r2r(mm2, mm5);
- por_r2r(mm1, mm5);
- por_r2r(mm3, mm5);
- }
- movq_r2m(mm5, result);
-
- if(result.q)
- {
- if(!RTjpeg_mtest)
- for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
- return 0;
- }
+ int i;
+ mmx_t *mold = (mmx_t *) old;
+ mmx_t *mblock = (mmx_t *) RTjpeg_block;
+ mmx_t result;
+ static mmx_t neg = (mmx_t) (unsigned long long) 0xffffffffffffffffULL;
+
+ movq_m2r (*mask, mm7);
+ movq_m2r (neg, mm6);
+ pxor_r2r (mm5, mm5);
+
+ for (i = 0; i < 8; i++) {
+ movq_m2r (*(mblock++), mm0);
+ movq_m2r (*(mblock++), mm2);
+ movq_m2r (*(mold++), mm1);
+ movq_m2r (*(mold++), mm3);
+ psubsw_r2r (mm1, mm0);
+ psubsw_r2r (mm3, mm2);
+ movq_r2r (mm0, mm1);
+ movq_r2r (mm2, mm3);
+ pcmpgtw_r2r (mm7, mm0);
+ pcmpgtw_r2r (mm7, mm2);
+ pxor_r2r (mm6, mm1);
+ pxor_r2r (mm6, mm3);
+ pcmpgtw_r2r (mm7, mm1);
+ pcmpgtw_r2r (mm7, mm3);
+ por_r2r (mm0, mm5);
+ por_r2r (mm2, mm5);
+ por_r2r (mm1, mm5);
+ por_r2r (mm3, mm5);
+ }
+ movq_r2m (mm5, result);
+
+ if (result.q) {
+ if (!RTjpeg_mtest)
+ for (i = 0; i < 16; i++)
+ ((__u64 *) old)[i] = ((__u64 *) RTjpeg_block)[i];
+ return 0;
+ }
/* printf("."); */
- return 1;
+ return 1;
}
#else
-int RTjpeg_bcomp(__s16 *old, __u16 *mask)
+int
+RTjpeg_bcomp (__s16 * old, __u16 * mask)
{
- int i;
-
- for(i=0; i<64; i++)
- if(abs(old[i]-RTjpeg_block[i])>*mask)
- {
- if(!RTjpeg_mtest)
- for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
- return 0;
- }
- return 1;
+ int i;
+
+ for (i = 0; i < 64; i++)
+ if (abs (old[i] - RTjpeg_block[i]) > *mask) {
+ if (!RTjpeg_mtest)
+ for (i = 0; i < 16; i++)
+ ((__u64 *) old)[i] = ((__u64 *) RTjpeg_block)[i];
+ return 0;
+ }
+ return 1;
}
#endif
-void RTjpeg_set_test(int i)
+void
+RTjpeg_set_test (int i)
{
- RTjpeg_mtest=i;
+ RTjpeg_mtest = i;
}
-int RTjpeg_mcompress(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
+int
+RTjpeg_mcompress (__s8 * sp, unsigned char *bp, __u16 lmask, __u16 cmask)
{
- __s8 * sb;
- __s16 *block;
- register __s8 * bp2;
- register __s8 * bp3;
- register int i, j, k;
+ __s8 *sb;
+ __s16 *block;
+ register __s8 *bp2;
+ register __s8 *bp3;
+ register int i, j, k;
#ifdef HAVE_LIBMMX
- emms();
- RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
- RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
+ emms ();
+ RTjpeg_lmask =
+ (mmx_t) (((__u64) lmask << 48) | ((__u64) lmask << 32) | ((__u64) lmask <<
+ 16) | lmask);
+ RTjpeg_cmask =
+ (mmx_t) (((__u64) cmask << 48) | ((__u64) cmask << 32) | ((__u64) cmask <<
+ 16) | cmask);
#else
- RTjpeg_lmask=lmask;
- RTjpeg_cmask=cmask;
+ RTjpeg_lmask = lmask;
+ RTjpeg_cmask = cmask;
#endif
-
- bp = bp - RTjpeg_width*0;
- bp2 = bp + RTjpeg_Ysize-RTjpeg_width*0;
- bp3 = bp2 + RTjpeg_Csize;
- sb=sp;
- block=RTjpeg_old;
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
- for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- if(RTjpeg_bcomp(block, &RTjpeg_lmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- block+=64;
-
- RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- if(RTjpeg_bcomp(block, &RTjpeg_lmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- block+=64;
-
- RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- if(RTjpeg_bcomp(block, &RTjpeg_cmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
- block+=64;
-
- RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- if(RTjpeg_bcomp(block, &RTjpeg_cmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
- block+=64;
+ bp = bp - RTjpeg_width * 0;
+ bp2 = bp + RTjpeg_Ysize - RTjpeg_width * 0;
+ bp3 = bp2 + RTjpeg_Csize;
+ sb = sp;
+ block = RTjpeg_old;
+/* Y */
+ for (i = RTjpeg_height; i; i -= 8) {
+ for (j = 0, k = 0; j < RTjpeg_width; j += 16, k += 8) {
+ RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ if (RTjpeg_bcomp (block, &RTjpeg_lmask)) {
+ *((__u8 *) sp++) = 255;
+ } else
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+ block += 64;
+
+ RTjpeg_dctY (bp + j + 8, RTjpeg_block, RTjpeg_Ywidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ if (RTjpeg_bcomp (block, &RTjpeg_lmask)) {
+ *((__u8 *) sp++) = 255;
+ } else
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+ block += 64;
+
+ RTjpeg_dctY (bp2 + k, RTjpeg_block, RTjpeg_Cwidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_cqt);
+ if (RTjpeg_bcomp (block, &RTjpeg_cmask)) {
+ *((__u8 *) sp++) = 255;
+ } else
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8);
+ block += 64;
+
+ RTjpeg_dctY (bp3 + k, RTjpeg_block, RTjpeg_Cwidth);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_cqt);
+ if (RTjpeg_bcomp (block, &RTjpeg_cmask)) {
+ *((__u8 *) sp++) = 255;
+ } else
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8);
+ block += 64;
+
+ }
+ bp += RTjpeg_width << 3;
+ bp2 += RTjpeg_width << 2;
+ bp3 += RTjpeg_width << 2;
}
- bp+=RTjpeg_width<<3;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
- }
- /*printf ("%d\n", block - RTjpeg_old); */
+ /*printf ("%d\n", block - RTjpeg_old); */
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- return (sp-sb);
+ return (sp - sb);
}
-int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask)
+int
+RTjpeg_mcompress8 (__s8 * sp, unsigned char *bp, __u16 lmask)
{
- __s8 * sb;
- __s16 *block;
- int i, j;
+ __s8 *sb;
+ __s16 *block;
+ int i, j;
#ifdef HAVE_LIBMMX
- emms();
- RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
+ emms ();
+ RTjpeg_lmask =
+ (mmx_t) (((__u64) lmask << 48) | ((__u64) lmask << 32) | ((__u64) lmask <<
+ 16) | lmask);
#else
- RTjpeg_lmask=lmask;
+ RTjpeg_lmask = lmask;
#endif
-
- sb=sp;
- block=RTjpeg_old;
+
+ sb = sp;
+ block = RTjpeg_old;
/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
- for(j=0; j<RTjpeg_width; j+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- if(RTjpeg_bcomp(block, &RTjpeg_lmask))
- {
- *((__u8 *)sp++)=255;
+ for (i = 0; i < RTjpeg_height; i += 8) {
+ for (j = 0; j < RTjpeg_width; j += 8) {
+ RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_width);
+ RTjpeg_quant (RTjpeg_block, RTjpeg_lqt);
+ if (RTjpeg_bcomp (block, &RTjpeg_lmask)) {
+ *((__u8 *) sp++) = 255;
/* printf("* %d ", sp[-1]); */
- } else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- block+=64;
+ } else
+ sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8);
+ block += 64;
+ }
+ bp += RTjpeg_width << 3;
}
- bp+=RTjpeg_width<<3;
- }
#ifdef HAVE_LIBMMX
- emms();
+ emms ();
#endif
- return (sp-sb);
+ return (sp - sb);
}
-void RTjpeg_color_init(void)
+void
+RTjpeg_color_init (void)
{
-}
+}
#define KcrR 76284
#define KcrG 53281
@@ -3002,435 +3016,433 @@ void RTjpeg_color_init(void)
#define KcbB 132252
#define Ky 76284
-void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb)
+void
+RTjpeg_yuv422rgb (__u8 * buf, __u8 * rgb)
{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute;
- int yskip;
-
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
- bufy=&buf[0];
- bufoute=rgb;
-
- for(i=0; i<(RTjpeg_height); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute;
+ int yskip;
+
+ yskip = RTjpeg_width;
+
+ bufcb = &buf[RTjpeg_width * RTjpeg_height];
+ bufcr =
+ &buf[RTjpeg_width * RTjpeg_height + (RTjpeg_width * RTjpeg_height) / 2];
+ bufy = &buf[0];
+ bufoute = rgb;
+
+ for (i = 0; i < (RTjpeg_height); i++) {
+ for (j = 0; j < RTjpeg_width; j += 2) {
+ crR = (*bufcr - 128) * KcrR;
+ crG = (*(bufcr++) - 128) * KcrG;
+ cbG = (*bufcb - 128) * KcbG;
+ cbB = (*(bufcb++) - 128) * KcbB;
+
+ y = (bufy[j] - 16) * Ky;
+
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + 1] - 16) * Ky;
+
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ }
+ bufy += yskip;
}
- bufy+=yskip;
- }
}
-void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb)
+void
+RTjpeg_yuv420rgb (__u8 * buf, __u8 * rgb)
{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
-
- oskip=RTjpeg_width*3;
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+oskip;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+
+ oskip = RTjpeg_width * 3;
+ yskip = RTjpeg_width;
+
+ bufcb = &buf[RTjpeg_width * RTjpeg_height];
+ bufcr =
+ &buf[RTjpeg_width * RTjpeg_height + (RTjpeg_width * RTjpeg_height) / 4];
+ bufy = &buf[0];
+ bufoute = rgb;
+ bufouto = rgb + oskip;
+
+ for (i = 0; i < (RTjpeg_height >> 1); i++) {
+ for (j = 0; j < RTjpeg_width; j += 2) {
+ crR = (*bufcr - 128) * KcrR;
+ crG = (*(bufcr++) - 128) * KcrG;
+ cbG = (*bufcb - 128) * KcbG;
+ cbB = (*(bufcb++) - 128) * KcbB;
+
+ y = (bufy[j] - 16) * Ky;
+
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + 1] - 16) * Ky;
+
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + yskip] - 16) * Ky;
+
+ tmp = (y + crR) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + cbB) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + 1 + yskip] - 16) * Ky;
+
+ tmp = (y + crR) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + cbB) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ }
+ bufoute += oskip;
+ bufouto += oskip;
+ bufy += yskip << 1;
}
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
}
-void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb)
+void
+RTjpeg_yuvrgb32 (__u8 * buf, __u8 * rgb)
{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
-
- oskip=RTjpeg_width*4;
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+oskip;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufoute++;
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufoute++;
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufouto++;
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufouto++;
-
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+
+ oskip = RTjpeg_width * 4;
+ yskip = RTjpeg_width;
+
+ bufcb = &buf[RTjpeg_width * RTjpeg_height];
+ bufcr =
+ &buf[RTjpeg_width * RTjpeg_height + (RTjpeg_width * RTjpeg_height) / 2];
+ bufy = &buf[0];
+ bufoute = rgb;
+ bufouto = rgb + oskip;
+
+ for (i = 0; i < (RTjpeg_height >> 1); i++) {
+ for (j = 0; j < RTjpeg_width; j += 2) {
+ crR = (*bufcr - 128) * KcrR;
+ crG = (*(bufcr++) - 128) * KcrG;
+ cbG = (*bufcb - 128) * KcbG;
+ cbB = (*(bufcb++) - 128) * KcbB;
+
+ y = (bufy[j] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ bufoute++;
+
+ y = (bufy[j + 1] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ bufoute++;
+
+ y = (bufy[j + yskip] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ bufouto++;
+
+ y = (bufy[j + 1 + yskip] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ bufouto++;
+
+ }
+ bufoute += oskip;
+ bufouto += oskip;
+ bufy += yskip << 1;
}
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
}
-void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb)
+void
+RTjpeg_yuvrgb24 (__u8 * buf, __u8 * rgb)
{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
-
- oskip=RTjpeg_width*3;
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+oskip;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+
+ oskip = RTjpeg_width * 3;
+ yskip = RTjpeg_width;
+
+ bufcb = &buf[RTjpeg_width * RTjpeg_height];
+ bufcr =
+ &buf[RTjpeg_width * RTjpeg_height + (RTjpeg_width * RTjpeg_height) / 4];
+ bufy = &buf[0];
+ bufoute = rgb;
+ bufouto = rgb + oskip;
+
+ for (i = 0; i < (RTjpeg_height >> 1); i++) {
+ for (j = 0; j < RTjpeg_width; j += 2) {
+ crR = (*bufcr - 128) * KcrR;
+ crG = (*(bufcr++) - 128) * KcrG;
+ cbG = (*bufcb - 128) * KcbG;
+ cbB = (*(bufcb++) - 128) * KcbB;
+
+ y = (bufy[j] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + 1] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufoute++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + yskip] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ y = (bufy[j + 1 + yskip] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ *(bufouto++) = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+
+ }
+ bufoute += oskip;
+ bufouto += oskip;
+ bufy += yskip << 1;
}
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
}
-void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb)
+void
+RTjpeg_yuvrgb16 (__u8 * buf, __u8 * rgb)
{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
- unsigned char r, g, b;
-
- oskip=RTjpeg_width*2;
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+oskip;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufoute++)=tmp&0xff;
- *(bufoute++)=tmp>>8;
-
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufoute++)=tmp&0xff;
- *(bufoute++)=tmp>>8;
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufouto++)=tmp&0xff;
- *(bufouto++)=tmp>>8;
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufouto++)=tmp&0xff;
- *(bufouto++)=tmp>>8;
-
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+ unsigned char r, g, b;
+
+ oskip = RTjpeg_width * 2;
+ yskip = RTjpeg_width;
+
+ bufcb = &buf[RTjpeg_width * RTjpeg_height];
+ bufcr =
+ &buf[RTjpeg_width * RTjpeg_height + (RTjpeg_width * RTjpeg_height) / 4];
+ bufy = &buf[0];
+ bufoute = rgb;
+ bufouto = rgb + oskip;
+
+ for (i = 0; i < (RTjpeg_height >> 1); i++) {
+ for (j = 0; j < RTjpeg_width; j += 2) {
+ crR = (*bufcr - 128) * KcrR;
+ crG = (*(bufcr++) - 128) * KcrG;
+ cbG = (*bufcb - 128) * KcbG;
+ cbB = (*(bufcb++) - 128) * KcbB;
+
+ y = (bufy[j] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ b = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ g = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ r = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (int) ((int) b >> 3);
+ tmp |= (int) (((int) g >> 2) << 5);
+ tmp |= (int) (((int) r >> 3) << 11);
+ *(bufoute++) = tmp & 0xff;
+ *(bufoute++) = tmp >> 8;
+
+
+ y = (bufy[j + 1] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ b = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ g = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ r = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (int) ((int) b >> 3);
+ tmp |= (int) (((int) g >> 2) << 5);
+ tmp |= (int) (((int) r >> 3) << 11);
+ *(bufoute++) = tmp & 0xff;
+ *(bufoute++) = tmp >> 8;
+
+ y = (bufy[j + yskip] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ b = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ g = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ r = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (int) ((int) b >> 3);
+ tmp |= (int) (((int) g >> 2) << 5);
+ tmp |= (int) (((int) r >> 3) << 11);
+ *(bufouto++) = tmp & 0xff;
+ *(bufouto++) = tmp >> 8;
+
+ y = (bufy[j + 1 + yskip] - 16) * Ky;
+
+ tmp = (y + cbB) >> 16;
+ b = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y - crG - cbG) >> 16;
+ g = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (y + crR) >> 16;
+ r = (tmp > 255) ? 255 : ((tmp < 0) ? 0 : tmp);
+ tmp = (int) ((int) b >> 3);
+ tmp |= (int) (((int) g >> 2) << 5);
+ tmp |= (int) (((int) r >> 3) << 11);
+ *(bufouto++) = tmp & 0xff;
+ *(bufouto++) = tmp >> 8;
+
+ }
+ bufoute += oskip;
+ bufouto += oskip;
+ bufy += yskip << 1;
}
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
}
-void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb)
+void
+RTjpeg_yuvrgb8 (__u8 * buf, __u8 * rgb)
{
- bcopy(buf, rgb, RTjpeg_width*RTjpeg_height);
+ bcopy (buf, rgb, RTjpeg_width * RTjpeg_height);
}
-void RTjpeg_double32(__u32 *buf)
+void
+RTjpeg_double32 (__u32 * buf)
{
- int i, j;
-
- __u32 *iptr, *optr1, *optr2;
-
- iptr=buf+(RTjpeg_width*RTjpeg_height)-1;
- optr1=buf+(RTjpeg_width*RTjpeg_height*4)-1;
- optr2=optr1-(2*RTjpeg_width);
-
- for(i=0; i<RTjpeg_height; i++)
- {
- for(j=0; j<RTjpeg_width; j++)
- {
- *(optr1--)=*iptr;
- *(optr1--)=*iptr;
- *(optr2--)=*iptr;
- *(optr2--)=*(iptr--);
+ int i, j;
+
+ __u32 *iptr, *optr1, *optr2;
+
+ iptr = buf + (RTjpeg_width * RTjpeg_height) - 1;
+ optr1 = buf + (RTjpeg_width * RTjpeg_height * 4) - 1;
+ optr2 = optr1 - (2 * RTjpeg_width);
+
+ for (i = 0; i < RTjpeg_height; i++) {
+ for (j = 0; j < RTjpeg_width; j++) {
+ *(optr1--) = *iptr;
+ *(optr1--) = *iptr;
+ *(optr2--) = *iptr;
+ *(optr2--) = *(iptr--);
+ }
+ optr2 = optr2 - 2 * RTjpeg_width;
+ optr1 = optr1 - 2 * RTjpeg_width;
}
- optr2=optr2-2*RTjpeg_width;
- optr1=optr1-2*RTjpeg_width;
- }
}
-void RTjpeg_double24(__u8 *buf)
+void
+RTjpeg_double24 (__u8 * buf)
{
}
-void RTjpeg_double16(__u16 *buf)
+void
+RTjpeg_double16 (__u16 * buf)
{
- int i, j;
-
- __u16 *iptr, *optr1, *optr2;
-
- iptr=buf+(RTjpeg_width*RTjpeg_height)-1;
- optr1=buf+(RTjpeg_width*RTjpeg_height*4)-1;
- optr2=optr1-(2*RTjpeg_width);
-
- for(i=0; i<RTjpeg_height; i++)
- {
- for(j=0; j<RTjpeg_width; j++)
- {
- *(optr1--)=*iptr;
- *(optr1--)=*iptr;
- *(optr2--)=*iptr;
- *(optr2--)=*(iptr--);
+ int i, j;
+
+ __u16 *iptr, *optr1, *optr2;
+
+ iptr = buf + (RTjpeg_width * RTjpeg_height) - 1;
+ optr1 = buf + (RTjpeg_width * RTjpeg_height * 4) - 1;
+ optr2 = optr1 - (2 * RTjpeg_width);
+
+ for (i = 0; i < RTjpeg_height; i++) {
+ for (j = 0; j < RTjpeg_width; j++) {
+ *(optr1--) = *iptr;
+ *(optr1--) = *iptr;
+ *(optr2--) = *iptr;
+ *(optr2--) = *(iptr--);
+ }
+ optr2 = optr2 - 2 * RTjpeg_width;
+ optr1 = optr1 - 2 * RTjpeg_width;
}
- optr2=optr2-2*RTjpeg_width;
- optr1=optr1-2*RTjpeg_width;
- }
}
-void RTjpeg_double8(__u8 *buf)
+void
+RTjpeg_double8 (__u8 * buf)
{
- int i, j;
-
- __u8 *iptr, *optr1, *optr2;
-
- iptr=buf+(RTjpeg_width*RTjpeg_height)-1;
- optr1=buf+(RTjpeg_width*RTjpeg_height*4)-1;
- optr2=optr1-(2*RTjpeg_width);
-
- for(i=0; i<RTjpeg_height; i++)
- {
- for(j=0; j<RTjpeg_width; j++)
- {
- *(optr1--)=*iptr;
- *(optr1--)=*iptr;
- *(optr2--)=*iptr;
- *(optr2--)=*(iptr--);
+ int i, j;
+
+ __u8 *iptr, *optr1, *optr2;
+
+ iptr = buf + (RTjpeg_width * RTjpeg_height) - 1;
+ optr1 = buf + (RTjpeg_width * RTjpeg_height * 4) - 1;
+ optr2 = optr1 - (2 * RTjpeg_width);
+
+ for (i = 0; i < RTjpeg_height; i++) {
+ for (j = 0; j < RTjpeg_width; j++) {
+ *(optr1--) = *iptr;
+ *(optr1--) = *iptr;
+ *(optr2--) = *iptr;
+ *(optr2--) = *(iptr--);
+ }
+ optr2 = optr2 - 2 * RTjpeg_width;
+ optr1 = optr1 - 2 * RTjpeg_width;
}
- optr2=optr2-2*RTjpeg_width;
- optr1=optr1-2*RTjpeg_width;
- }
}
-