diff options
Diffstat (limited to 'gst/rtjpeg/RTjpeg.c')
-rw-r--r-- | gst/rtjpeg/RTjpeg.c | 1686 |
1 files changed, 843 insertions, 843 deletions
diff --git a/gst/rtjpeg/RTjpeg.c b/gst/rtjpeg/RTjpeg.c index d2e7b67d..f2f61252 100644 --- a/gst/rtjpeg/RTjpeg.c +++ b/gst/rtjpeg/RTjpeg.c @@ -147,7 +147,7 @@ RTjpeg_b2s (__s16 * data, __s8 * strm, __u8 bt8) strm[0] = (__u8) (data[RTjpeg_ZZ[0]] > 254) ? 254 : ((data[RTjpeg_ZZ[0]] < - 0) ? 0 : data[RTjpeg_ZZ[0]]); + 0) ? 0 : data[RTjpeg_ZZ[0]]); for (ci = 1; ci <= bt8; ci++) { ZZvalue = data[RTjpeg_ZZ[ci]]; @@ -166,11 +166,11 @@ RTjpeg_b2s (__s16 * data, __s8 * strm, __u8 bt8) strm[co++] = (__s8) (ZZvalue > 63) ? 63 : ZZvalue; } else if (ZZvalue < 0) { strm[co++] = (__s8) (ZZvalue < -64) ? -64 : ZZvalue; - } else { /* compress zeros */ + } else { /* compress zeros */ tmp = ci; do { - ci++; + ci++; } while ((ci < 64) && (data[RTjpeg_ZZ[ci]] == 0)); @@ -199,7 +199,7 @@ RTjpeg_s2b (__s16 * data, __s8 * strm, __u8 bt8, __u32 * qtbl) if (strm[ci] > 63) { tmp = co + strm[ci] - 63; for (; co < tmp; co++) - data[RTjpeg_ZZ[co]] = 0; + data[RTjpeg_ZZ[co]] = 0; co--; } else { i = RTjpeg_ZZ[co]; @@ -242,19 +242,19 @@ RTjpeg_quant (__s16 * block, __s32 * qtbl) movq_m2r (RTjpeg_half, mm7); for (i = 16; i; i--) { - movq_m2r (*(ql++), mm0); /* quant vals (4) */ - movq_m2r (*bl, mm2); /* block vals (4) */ + movq_m2r (*(ql++), mm0); /* quant vals (4) */ + movq_m2r (*bl, mm2); /* block vals (4) */ movq_r2r (mm0, mm1); movq_r2r (mm2, mm3); - punpcklwd_r2r (mm6, mm0); /* 1 qb 1 qa */ - punpckhwd_r2r (mm6, mm1); /* 1 qd 1 qc */ + punpcklwd_r2r (mm6, mm0); /* 1 qb 1 qa */ + punpckhwd_r2r (mm6, mm1); /* 1 qd 1 qc */ - punpcklwd_r2r (mm7, mm2); /* 32767 bb 32767 ba */ - punpckhwd_r2r (mm7, mm3); /* 32767 bd 32767 bc */ + punpcklwd_r2r (mm7, mm2); /* 32767 bb 32767 ba */ + punpckhwd_r2r (mm7, mm3); /* 32767 bd 32767 bc */ - pmaddwd_r2r (mm2, mm0); /* 32767+bb*qb 32767+ba*qa */ - pmaddwd_r2r (mm3, mm1); /* 32767+bd*qd 32767+bc*qc */ + pmaddwd_r2r (mm2, mm0); /* 32767+bb*qb 32767+ba*qa */ + pmaddwd_r2r (mm3, mm1); /* 32767+bd*qd 32767+bc*qc */ psrad_i2r (16, mm0); psrad_i2r (16, mm1); @@ -293,10 +293,10 @@ static mmx_t RTjpeg_zero = (mmx_t) (long long) 0x0000000000000000LL; #else -#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */ -#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */ -#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */ -#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */ +#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */ #define DESCALE10(x) (__s16)( ((x)+128) >> 8) #define DESCALE20(x) (__s16)(((x)+32768) >> 16) @@ -338,36 +338,36 @@ RTjpeg_dctY (__u8 * idata, __s16 * odata, int rskip) tmp3 = idataptr[3] + idataptr[4]; tmp4 = idataptr[3] - idataptr[4]; - tmp10 = (tmp0 + tmp3); /* phase 2 */ + tmp10 = (tmp0 + tmp3); /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = (tmp1 + tmp2); tmp12 = tmp1 - tmp2; - wsptr[0] = (tmp10 + tmp11) << 8; /* phase 3 */ + wsptr[0] = (tmp10 + tmp11) << 8; /* phase 3 */ wsptr[4] = (tmp10 - tmp11) << 8; - z1 = D_MULTIPLY (tmp12 + tmp13, FIX_0_707106781); /* c4 */ - wsptr[2] = (tmp13 << 8) + z1; /* phase 5 */ + z1 = D_MULTIPLY (tmp12 + tmp13, FIX_0_707106781); /* c4 */ + wsptr[2] = (tmp13 << 8) + z1; /* phase 5 */ wsptr[6] = (tmp13 << 8) - z1; - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; - z5 = D_MULTIPLY (tmp10 - tmp12, FIX_0_382683433); /* c6 */ - z2 = D_MULTIPLY (tmp10, FIX_0_541196100) + z5; /* c2-c6 */ - z4 = D_MULTIPLY (tmp12, FIX_1_306562965) + z5; /* c2+c6 */ - z3 = D_MULTIPLY (tmp11, FIX_0_707106781); /* c4 */ + z5 = D_MULTIPLY (tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = D_MULTIPLY (tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = D_MULTIPLY (tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = D_MULTIPLY (tmp11, FIX_0_707106781); /* c4 */ - z11 = (tmp7 << 8) + z3; /* phase 5 */ + z11 = (tmp7 << 8) + z3; /* phase 5 */ z13 = (tmp7 << 8) - z3; - wsptr[5] = z13 + z2; /* phase 6 */ + wsptr[5] = z13 + z2; /* phase 6 */ wsptr[3] = z13 - z2; wsptr[1] = z11 + z4; wsptr[7] = z11 - z4; - idataptr += rskip << 3; /* advance pointer to next row */ + idataptr += rskip << 3; /* advance pointer to next row */ wsptr += 8; } @@ -383,36 +383,36 @@ RTjpeg_dctY (__u8 * idata, __s16 * odata, int rskip) tmp3 = wsptr[24] + wsptr[32]; tmp4 = wsptr[24] - wsptr[32]; - tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - odataptr[0] = DESCALE10 (tmp10 + tmp11); /* phase 3 */ + odataptr[0] = DESCALE10 (tmp10 + tmp11); /* phase 3 */ odataptr[32] = DESCALE10 (tmp10 - tmp11); - z1 = D_MULTIPLY (tmp12 + tmp13, FIX_0_707106781); /* c4 */ - odataptr[16] = DESCALE20 ((tmp13 << 8) + z1); /* phase 5 */ + z1 = D_MULTIPLY (tmp12 + tmp13, FIX_0_707106781); /* c4 */ + odataptr[16] = DESCALE20 ((tmp13 << 8) + z1); /* phase 5 */ odataptr[48] = DESCALE20 ((tmp13 << 8) - z1); - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; - z5 = D_MULTIPLY (tmp10 - tmp12, FIX_0_382683433); /* c6 */ - z2 = D_MULTIPLY (tmp10, FIX_0_541196100) + z5; /* c2-c6 */ - z4 = D_MULTIPLY (tmp12, FIX_1_306562965) + z5; /* c2+c6 */ - z3 = D_MULTIPLY (tmp11, FIX_0_707106781); /* c4 */ + z5 = D_MULTIPLY (tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = D_MULTIPLY (tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = D_MULTIPLY (tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = D_MULTIPLY (tmp11, FIX_0_707106781); /* c4 */ - z11 = (tmp7 << 8) + z3; /* phase 5 */ + z11 = (tmp7 << 8) + z3; /* phase 5 */ z13 = (tmp7 << 8) - z3; - odataptr[40] = DESCALE20 (z13 + z2); /* phase 6 */ + odataptr[40] = DESCALE20 (z13 + z2); /* phase 6 */ odataptr[24] = DESCALE20 (z13 - z2); odataptr[8] = DESCALE20 (z11 + z4); odataptr[56] = DESCALE20 (z11 - z4); - odataptr++; /* advance pointer to next column */ + odataptr++; /* advance pointer to next column */ wsptr++; } #else @@ -513,169 +513,169 @@ RTjpeg_dctY (__u8 * idata, __s16 * odata, int rskip) /* Start Transpose to do calculations on rows */ - movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into m5 */ + movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into m5 */ - movq_m2r (*(dataptr + 13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */ + movq_m2r (*(dataptr + 13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */ movq_r2r (mm7, mm5); - punpcklwd_m2r (*(dataptr + 11), mm7); /* m11:m01|m10:m00 - interleave first and second lines */ + punpcklwd_m2r (*(dataptr + 11), mm7); /* m11:m01|m10:m00 - interleave first and second lines */ movq_r2r (mm6, mm2); - punpcklwd_m2r (*(dataptr + 15), mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */ + punpcklwd_m2r (*(dataptr + 15), mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */ movq_r2r (mm7, mm1); - movq_m2r (*(dataptr + 11), mm3); /* m13:m13|m11:m10 - second line */ - punpckldq_r2r (mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */ + movq_m2r (*(dataptr + 11), mm3); /* m13:m13|m11:m10 - second line */ + punpckldq_r2r (mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */ - movq_m2r (*(dataptr + 15), mm0); /* m13:m13|m11:m10 - fourth line */ - punpckhdq_r2r (mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ + movq_m2r (*(dataptr + 15), mm0); /* m13:m13|m11:m10 - fourth line */ + punpckhdq_r2r (mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ - movq_r2m (mm7, *(dataptr + 9)); /* write result 1 */ - punpckhwd_r2r (mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */ + movq_r2m (mm7, *(dataptr + 9)); /* write result 1 */ + punpckhwd_r2r (mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */ - movq_r2m (mm1, *(dataptr + 11)); /* write result 2 */ - punpckhwd_r2r (mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */ + movq_r2m (mm1, *(dataptr + 11)); /* write result 2 */ + punpckhwd_r2r (mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */ movq_r2r (mm5, mm1); - punpckldq_r2r (mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */ + punpckldq_r2r (mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */ - movq_m2r (*(dataptr + 1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */ - punpckhdq_r2r (mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */ + movq_m2r (*(dataptr + 1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */ + punpckhdq_r2r (mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */ - movq_r2m (mm5, *(dataptr + 13)); /* write result 3 */ + movq_r2m (mm5, *(dataptr + 13)); /* write result 3 */ /* last 4x4 done */ - movq_r2m (mm1, *(dataptr + 15)); /* write result 4, last 4x4 */ + movq_r2m (mm1, *(dataptr + 15)); /* write result 4, last 4x4 */ - movq_m2r (*(dataptr + 5), mm2); /* m23:m22|m21:m20 - third line */ + movq_m2r (*(dataptr + 5), mm2); /* m23:m22|m21:m20 - third line */ movq_r2r (mm0, mm6); - punpcklwd_m2r (*(dataptr + 3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ + punpcklwd_m2r (*(dataptr + 3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ movq_r2r (mm2, mm7); - punpcklwd_m2r (*(dataptr + 7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */ + punpcklwd_m2r (*(dataptr + 7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */ movq_r2r (mm0, mm4); - movq_m2r (*(dataptr + 8), mm1); /* n03:n02|n01:n00 - first line */ - punpckldq_r2r (mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */ + movq_m2r (*(dataptr + 8), mm1); /* n03:n02|n01:n00 - first line */ + punpckldq_r2r (mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */ - movq_m2r (*(dataptr + 12), mm3); /* n23:n22|n21:n20 - third line */ - punpckhdq_r2r (mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */ + movq_m2r (*(dataptr + 12), mm3); /* n23:n22|n21:n20 - third line */ + punpckhdq_r2r (mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */ - punpckhwd_m2r (*(dataptr + 3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */ - movq_r2r (mm1, mm2); /* copy first line */ + punpckhwd_m2r (*(dataptr + 3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */ + movq_r2r (mm1, mm2); /* copy first line */ - punpckhwd_m2r (*(dataptr + 7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */ - movq_r2r (mm6, mm5); /* copy first intermediate result */ + punpckhwd_m2r (*(dataptr + 7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */ + movq_r2r (mm6, mm5); /* copy first intermediate result */ - movq_r2m (mm0, *(dataptr + 8)); /* write result 1 */ - punpckhdq_r2r (mm7, mm5); /* m33:m23|m13:m03 - produce third result */ + movq_r2m (mm0, *(dataptr + 8)); /* write result 1 */ + punpckhdq_r2r (mm7, mm5); /* m33:m23|m13:m03 - produce third result */ - punpcklwd_m2r (*(dataptr + 10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */ - movq_r2r (mm3, mm0); /* copy third line */ + punpcklwd_m2r (*(dataptr + 10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */ + movq_r2r (mm3, mm0); /* copy third line */ - punpckhwd_m2r (*(dataptr + 10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */ + punpckhwd_m2r (*(dataptr + 10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */ - movq_r2m (mm4, *(dataptr + 10)); /* write result 2 out */ - punpckldq_r2r (mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */ + movq_r2m (mm4, *(dataptr + 10)); /* write result 2 out */ + punpckldq_r2r (mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */ - punpcklwd_m2r (*(dataptr + 14), mm3); /* n31:n21|n30:n20 - interleave third and fourth lines */ + punpcklwd_m2r (*(dataptr + 14), mm3); /* n31:n21|n30:n20 - interleave third and fourth lines */ movq_r2r (mm1, mm4); - movq_r2m (mm6, *(dataptr + 12)); /* write result 3 out */ - punpckldq_r2r (mm3, mm1); /* n30:n20|n10:n00 - produce first result */ + movq_r2m (mm6, *(dataptr + 12)); /* write result 3 out */ + punpckldq_r2r (mm3, mm1); /* n30:n20|n10:n00 - produce first result */ - punpckhwd_m2r (*(dataptr + 14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */ + punpckhwd_m2r (*(dataptr + 14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */ movq_r2r (mm2, mm6); - movq_r2m (mm5, *(dataptr + 14)); /* write result 4 out */ - punpckhdq_r2r (mm3, mm4); /* n31:n21|n11:n01- produce second result */ + movq_r2m (mm5, *(dataptr + 14)); /* write result 4 out */ + punpckhdq_r2r (mm3, mm4); /* n31:n21|n11:n01- produce second result */ - movq_r2m (mm1, *(dataptr + 1)); /* write result 5 out - (first result for other 4 x 4 block) */ - punpckldq_r2r (mm0, mm2); /* n32:n22|n12:n02- produce third result */ + movq_r2m (mm1, *(dataptr + 1)); /* write result 5 out - (first result for other 4 x 4 block) */ + punpckldq_r2r (mm0, mm2); /* n32:n22|n12:n02- produce third result */ - movq_r2m (mm4, *(dataptr + 3)); /* write result 6 out */ - punpckhdq_r2r (mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */ + movq_r2m (mm4, *(dataptr + 3)); /* write result 6 out */ + punpckhdq_r2r (mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */ - movq_r2m (mm2, *(dataptr + 5)); /* write result 7 out */ + movq_r2m (mm2, *(dataptr + 5)); /* write result 7 out */ - movq_m2r (*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */ + movq_m2r (*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */ - movq_r2m (mm6, *(dataptr + 7)); /* write result 8 out */ + movq_r2m (mm6, *(dataptr + 7)); /* write result 8 out */ /* Do first 4x4 quadrant, which is used in the beginning of the DCT: */ - movq_m2r (*(dataptr + 4), mm7); /* m23:m22|m21:m20 - third line */ + movq_m2r (*(dataptr + 4), mm7); /* m23:m22|m21:m20 - third line */ movq_r2r (mm0, mm2); - punpcklwd_m2r (*(dataptr + 2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ + punpcklwd_m2r (*(dataptr + 2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ movq_r2r (mm7, mm4); - punpcklwd_m2r (*(dataptr + 6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */ + punpcklwd_m2r (*(dataptr + 6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */ movq_r2r (mm0, mm1); - movq_m2r (*(dataptr + 2), mm6); /* m13:m12|m11:m10 - second line */ - punpckldq_r2r (mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */ + movq_m2r (*(dataptr + 2), mm6); /* m13:m12|m11:m10 - second line */ + punpckldq_r2r (mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */ - movq_m2r (*(dataptr + 6), mm5); /* m33:m32|m31:m30 - fourth line */ - punpckhdq_r2r (mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ + movq_m2r (*(dataptr + 6), mm5); /* m33:m32|m31:m30 - fourth line */ + punpckhdq_r2r (mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ - movq_r2r (mm0, mm7); /* write result 1 */ - punpckhwd_r2r (mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */ + movq_r2r (mm0, mm7); /* write result 1 */ + punpckhwd_r2r (mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */ - psubw_m2r (*(dataptr + 14), mm7); /* tmp07=x0-x7: Stage 1 */ - movq_r2r (mm1, mm6); /* write result 2 */ + psubw_m2r (*(dataptr + 14), mm7); /* tmp07=x0-x7: Stage 1 */ + movq_r2r (mm1, mm6); /* write result 2 */ - paddw_m2r (*(dataptr + 14), mm0); /* tmp00=x0+x7: Stage 1 */ - punpckhwd_r2r (mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */ + paddw_m2r (*(dataptr + 14), mm0); /* tmp00=x0+x7: Stage 1 */ + punpckhwd_r2r (mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */ - paddw_m2r (*(dataptr + 12), mm1); /* tmp01=x1+x6: Stage 1 */ - movq_r2r (mm2, mm3); /* copy first intermediate result */ + paddw_m2r (*(dataptr + 12), mm1); /* tmp01=x1+x6: Stage 1 */ + movq_r2r (mm2, mm3); /* copy first intermediate result */ - psubw_m2r (*(dataptr + 12), mm6); /* tmp06=x1-x6: Stage 1 */ - punpckldq_r2r (mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */ + psubw_m2r (*(dataptr + 12), mm6); /* tmp06=x1-x6: Stage 1 */ + punpckldq_r2r (mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */ movq_r2m (mm7, tmp7); - movq_r2r (mm2, mm5); /* write result 3 */ + movq_r2r (mm2, mm5); /* write result 3 */ movq_r2m (mm6, tmp6); - punpckhdq_r2r (mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */ + punpckhdq_r2r (mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */ - paddw_m2r (*(dataptr + 10), mm2); /* tmp02=x2+5: Stage 1 */ - movq_r2r (mm3, mm4); /* write result 4 */ + paddw_m2r (*(dataptr + 10), mm2); /* tmp02=x2+5: Stage 1 */ + movq_r2r (mm3, mm4); /* write result 4 */ /************************************************************************************************ End of Transpose ************************************************************************************************/ - paddw_m2r (*(dataptr + 8), mm3); /* tmp03=x3+x4: stage 1 */ + paddw_m2r (*(dataptr + 8), mm3); /* tmp03=x3+x4: stage 1 */ movq_r2r (mm0, mm7); - psubw_m2r (*(dataptr + 8), mm4); /* tmp04=x3-x4: stage 1 */ + psubw_m2r (*(dataptr + 8), mm4); /* tmp04=x3-x4: stage 1 */ movq_r2r (mm1, mm6); - paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */ - psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */ + paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */ + psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */ - psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */ - paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */ + psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */ + paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */ - psubw_m2r (*(dataptr + 10), mm5); /* tmp05=x2-x5: stage 1 */ - paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ + psubw_m2r (*(dataptr + 10), mm5); /* tmp05=x2-x5: stage 1 */ + paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ /* stage 3 */ movq_m2r (tmp6, mm2); movq_r2r (mm0, mm3); - psllw_i2r (2, mm6); /* m8 * 2^2 */ + psllw_i2r (2, mm6); /* m8 * 2^2 */ paddw_r2r (mm1, mm0); - pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ + pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ psubw_r2r (mm1, mm3); movq_r2m (mm0, *dataptr); @@ -683,349 +683,349 @@ RTjpeg_dctY (__u8 * idata, __s16 * odata, int rskip) /* Odd part */ movq_r2m (mm3, *(dataptr + 8)); - paddw_r2r (mm5, mm4); /* tmp10 */ + paddw_r2r (mm5, mm4); /* tmp10 */ movq_m2r (tmp7, mm3); - paddw_r2r (mm6, mm0); /* tmp32 */ + paddw_r2r (mm6, mm0); /* tmp32 */ - paddw_r2r (mm2, mm5); /* tmp11 */ - psubw_r2r (mm6, mm7); /* tmp33 */ + paddw_r2r (mm2, mm5); /* tmp11 */ + psubw_r2r (mm6, mm7); /* tmp33 */ movq_r2m (mm0, *(dataptr + 4)); - paddw_r2r (mm3, mm2); /* tmp12 */ + paddw_r2r (mm3, mm2); /* tmp12 */ /* stage 4 */ movq_r2m (mm7, *(dataptr + 12)); - movq_r2r (mm4, mm1); /* copy of tmp10 */ + movq_r2r (mm4, mm1); /* copy of tmp10 */ - psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ - psllw_i2r (2, mm4); /* m8 * 2^2 */ + psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ + psllw_i2r (2, mm4); /* m8 * 2^2 */ movq_m2r (RTjpeg_C2mC6, mm0); psllw_i2r (2, mm1); - pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ + pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ psllw_i2r (2, mm2); - pmulhw_r2r (mm0, mm4); /* z5 */ + pmulhw_r2r (mm0, mm4); /* z5 */ /* stage 5 */ pmulhw_m2r (RTjpeg_C2pC6, mm2); psllw_i2r (2, mm5); - pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ - movq_r2r (mm3, mm0); /* copy tmp7 */ + pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ + movq_r2r (mm3, mm0); /* copy tmp7 */ movq_m2r (*(dataptr + 1), mm7); - paddw_r2r (mm1, mm4); /* z2 */ + paddw_r2r (mm1, mm4); /* z2 */ - paddw_r2r (mm1, mm2); /* z4 */ + paddw_r2r (mm1, mm2); /* z4 */ - paddw_r2r (mm5, mm0); /* z11 */ - psubw_r2r (mm5, mm3); /* z13 */ + paddw_r2r (mm5, mm0); /* z11 */ + psubw_r2r (mm5, mm3); /* z13 */ /* stage 6 */ - movq_r2r (mm3, mm5); /* copy z13 */ - psubw_r2r (mm4, mm3); /* y3=z13 - z2 */ + movq_r2r (mm3, mm5); /* copy z13 */ + psubw_r2r (mm4, mm3); /* y3=z13 - z2 */ - paddw_r2r (mm4, mm5); /* y5=z13 + z2 */ - movq_r2r (mm0, mm6); /* copy z11 */ + paddw_r2r (mm4, mm5); /* y5=z13 + z2 */ + movq_r2r (mm0, mm6); /* copy z11 */ - movq_r2m (mm3, *(dataptr + 6)); /*save y3 */ - psubw_r2r (mm2, mm0); /* y7=z11 - z4 */ + movq_r2m (mm3, *(dataptr + 6)); /*save y3 */ + psubw_r2r (mm2, mm0); /* y7=z11 - z4 */ - movq_r2m (mm5, *(dataptr + 10)); /*save y5 */ - paddw_r2r (mm2, mm6); /* y1=z11 + z4 */ + movq_r2m (mm5, *(dataptr + 10)); /*save y5 */ + paddw_r2r (mm2, mm6); /* y1=z11 + z4 */ - movq_r2m (mm0, *(dataptr + 14)); /*save y7 */ + movq_r2m (mm0, *(dataptr + 14)); /*save y7 */ - /************************************************ + /************************************************ * End of 1st 4 rows ************************************************/ - movq_m2r (*(dataptr + 3), mm1); /* load x1: stage 1 */ - movq_r2r (mm7, mm0); /* copy x0 */ + movq_m2r (*(dataptr + 3), mm1); /* load x1: stage 1 */ + movq_r2r (mm7, mm0); /* copy x0 */ - movq_r2m (mm6, *(dataptr + 2)); /*save y1 */ + movq_r2m (mm6, *(dataptr + 2)); /*save y1 */ - movq_m2r (*(dataptr + 5), mm2); /* load x2: stage 1 */ - movq_r2r (mm1, mm6); /* copy x1 */ + movq_m2r (*(dataptr + 5), mm2); /* load x2: stage 1 */ + movq_r2r (mm1, mm6); /* copy x1 */ - paddw_m2r (*(dataptr + 15), mm0); /* tmp00 = x0 + x7 */ + paddw_m2r (*(dataptr + 15), mm0); /* tmp00 = x0 + x7 */ - movq_m2r (*(dataptr + 7), mm3); /* load x3 : stage 1 */ - movq_r2r (mm2, mm5); /* copy x2 */ + movq_m2r (*(dataptr + 7), mm3); /* load x3 : stage 1 */ + movq_r2r (mm2, mm5); /* copy x2 */ - psubw_m2r (*(dataptr + 15), mm7); /* tmp07 = x0 - x7 */ - movq_r2r (mm3, mm4); /* copy x3 */ + psubw_m2r (*(dataptr + 15), mm7); /* tmp07 = x0 - x7 */ + movq_r2r (mm3, mm4); /* copy x3 */ - paddw_m2r (*(dataptr + 13), mm1); /* tmp01 = x1 + x6 */ + paddw_m2r (*(dataptr + 13), mm1); /* tmp01 = x1 + x6 */ - movq_r2m (mm7, tmp7); /* save tmp07 */ - movq_r2r (mm0, mm7); /* copy tmp00 */ + movq_r2m (mm7, tmp7); /* save tmp07 */ + movq_r2r (mm0, mm7); /* copy tmp00 */ - psubw_m2r (*(dataptr + 13), mm6); /* tmp06 = x1 - x6 */ + psubw_m2r (*(dataptr + 13), mm6); /* tmp06 = x1 - x6 */ /* stage 2, Even Part */ - paddw_m2r (*(dataptr + 9), mm3); /* tmp03 = x3 + x4 */ + paddw_m2r (*(dataptr + 9), mm3); /* tmp03 = x3 + x4 */ - movq_r2m (mm6, tmp6); /* save tmp07 */ - movq_r2r (mm1, mm6); /* copy tmp01 */ + movq_r2m (mm6, tmp6); /* save tmp07 */ + movq_r2r (mm1, mm6); /* copy tmp01 */ - paddw_m2r (*(dataptr + 11), mm2); /* tmp02 = x2 + x5 */ - paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03 */ + paddw_m2r (*(dataptr + 11), mm2); /* tmp02 = x2 + x5 */ + paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03 */ - psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03 */ + psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03 */ - psubw_m2r (*(dataptr + 9), mm4); /* tmp04 = x3 - x4 */ - psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02 */ + psubw_m2r (*(dataptr + 9), mm4); /* tmp04 = x3 - x4 */ + psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02 */ - paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02 */ + paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02 */ - psubw_m2r (*(dataptr + 11), mm5); /* tmp05 = x2 - x5 */ - paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ + psubw_m2r (*(dataptr + 11), mm5); /* tmp05 = x2 - x5 */ + paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ /* stage 3, Even and stage 4 & 5 even */ - movq_m2r (tmp6, mm2); /* load tmp6 */ - movq_r2r (mm0, mm3); /* copy tmp10 */ + movq_m2r (tmp6, mm2); /* load tmp6 */ + movq_r2r (mm0, mm3); /* copy tmp10 */ - psllw_i2r (2, mm6); /* shift z1 */ - paddw_r2r (mm1, mm0); /* y0=tmp10 + tmp11 */ + psllw_i2r (2, mm6); /* shift z1 */ + paddw_r2r (mm1, mm0); /* y0=tmp10 + tmp11 */ - pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ - psubw_r2r (mm1, mm3); /* y4=tmp10 - tmp11 */ + pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ + psubw_r2r (mm1, mm3); /* y4=tmp10 - tmp11 */ - movq_r2m (mm0, *(dataptr + 1)); /*save y0 */ - movq_r2r (mm7, mm0); /* copy tmp13 */ + movq_r2m (mm0, *(dataptr + 1)); /*save y0 */ + movq_r2r (mm7, mm0); /* copy tmp13 */ /* odd part */ - movq_r2m (mm3, *(dataptr + 9)); /*save y4 */ - paddw_r2r (mm5, mm4); /* tmp10 = tmp4 + tmp5 */ + movq_r2m (mm3, *(dataptr + 9)); /*save y4 */ + paddw_r2r (mm5, mm4); /* tmp10 = tmp4 + tmp5 */ - movq_m2r (tmp7, mm3); /* load tmp7 */ - paddw_r2r (mm6, mm0); /* tmp32 = tmp13 + z1 */ + movq_m2r (tmp7, mm3); /* load tmp7 */ + paddw_r2r (mm6, mm0); /* tmp32 = tmp13 + z1 */ - paddw_r2r (mm2, mm5); /* tmp11 = tmp5 + tmp6 */ - psubw_r2r (mm6, mm7); /* tmp33 = tmp13 - z1 */ + paddw_r2r (mm2, mm5); /* tmp11 = tmp5 + tmp6 */ + psubw_r2r (mm6, mm7); /* tmp33 = tmp13 - z1 */ - movq_r2m (mm0, *(dataptr + 5)); /*save y2 */ - paddw_r2r (mm3, mm2); /* tmp12 = tmp6 + tmp7 */ + movq_r2m (mm0, *(dataptr + 5)); /*save y2 */ + paddw_r2r (mm3, mm2); /* tmp12 = tmp6 + tmp7 */ /* stage 4 */ - movq_r2m (mm7, *(dataptr + 13)); /*save y6 */ - movq_r2r (mm4, mm1); /* copy tmp10 */ + movq_r2m (mm7, *(dataptr + 13)); /*save y6 */ + movq_r2r (mm4, mm1); /* copy tmp10 */ - psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ - psllw_i2r (2, mm4); /* shift tmp10 */ + psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ + psllw_i2r (2, mm4); /* shift tmp10 */ - movq_m2r (RTjpeg_C2mC6, mm0); /* load C2mC6 */ - psllw_i2r (2, mm1); /* shift (tmp10-tmp12) */ + movq_m2r (RTjpeg_C2mC6, mm0); /* load C2mC6 */ + psllw_i2r (2, mm1); /* shift (tmp10-tmp12) */ - pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ - psllw_i2r (2, mm5); /* prepare for multiply */ + pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ + psllw_i2r (2, mm5); /* prepare for multiply */ - pmulhw_r2r (mm0, mm4); /* multiply by converted real */ + pmulhw_r2r (mm0, mm4); /* multiply by converted real */ /* stage 5 */ - pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ - psllw_i2r (2, mm2); /* prepare for multiply */ + pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ + psllw_i2r (2, mm2); /* prepare for multiply */ - pmulhw_m2r (RTjpeg_C2pC6, mm2); /* multiply */ - movq_r2r (mm3, mm0); /* copy tmp7 */ + pmulhw_m2r (RTjpeg_C2pC6, mm2); /* multiply */ + movq_r2r (mm3, mm0); /* copy tmp7 */ - movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */ - paddw_r2r (mm1, mm4); /* z2 */ + movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */ + paddw_r2r (mm1, mm4); /* z2 */ - paddw_r2r (mm5, mm0); /* z11 */ - psubw_r2r (mm5, mm3); /* z13 */ + paddw_r2r (mm5, mm0); /* z11 */ + psubw_r2r (mm5, mm3); /* z13 */ /* stage 6 */ - movq_r2r (mm3, mm5); /* copy z13 */ - paddw_r2r (mm1, mm2); /* z4 */ + movq_r2r (mm3, mm5); /* copy z13 */ + paddw_r2r (mm1, mm2); /* z4 */ - movq_r2r (mm0, mm6); /* copy z11 */ - psubw_r2r (mm4, mm5); /* y3 */ + movq_r2r (mm0, mm6); /* copy z11 */ + psubw_r2r (mm4, mm5); /* y3 */ - paddw_r2r (mm2, mm6); /* y1 */ - paddw_r2r (mm4, mm3); /* y5 */ + paddw_r2r (mm2, mm6); /* y1 */ + paddw_r2r (mm4, mm3); /* y5 */ - movq_r2m (mm5, *(dataptr + 7)); /*save y3 */ + movq_r2m (mm5, *(dataptr + 7)); /*save y3 */ - movq_r2m (mm6, *(dataptr + 3)); /*save y1 */ - psubw_r2r (mm2, mm0); /* y7 */ + movq_r2m (mm6, *(dataptr + 3)); /*save y1 */ + psubw_r2r (mm2, mm0); /* y7 */ /************************************************************************************************ Start of Transpose ************************************************************************************************/ - movq_m2r (*(dataptr + 13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */ - movq_r2r (mm7, mm5); /* copy first line */ + movq_m2r (*(dataptr + 13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */ + movq_r2r (mm7, mm5); /* copy first line */ - punpcklwd_r2r (mm3, mm7); /* m11:m01|m10:m00 - interleave first and second lines */ - movq_r2r (mm6, mm2); /* copy third line */ + punpcklwd_r2r (mm3, mm7); /* m11:m01|m10:m00 - interleave first and second lines */ + movq_r2r (mm6, mm2); /* copy third line */ - punpcklwd_r2r (mm0, mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */ - movq_r2r (mm7, mm1); /* copy first intermediate result */ + punpcklwd_r2r (mm0, mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */ + movq_r2r (mm7, mm1); /* copy first intermediate result */ - punpckldq_r2r (mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */ + punpckldq_r2r (mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */ - punpckhdq_r2r (mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ + punpckhdq_r2r (mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ - movq_r2m (mm7, *(dataptr + 9)); /* write result 1 */ - punpckhwd_r2r (mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */ + movq_r2m (mm7, *(dataptr + 9)); /* write result 1 */ + punpckhwd_r2r (mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */ - movq_r2m (mm1, *(dataptr + 11)); /* write result 2 */ - punpckhwd_r2r (mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */ + movq_r2m (mm1, *(dataptr + 11)); /* write result 2 */ + punpckhwd_r2r (mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */ - movq_r2r (mm5, mm1); /* copy first intermediate result */ - punpckldq_r2r (mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */ + movq_r2r (mm5, mm1); /* copy first intermediate result */ + punpckldq_r2r (mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */ - movq_m2r (*(dataptr + 1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */ - punpckhdq_r2r (mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */ + movq_m2r (*(dataptr + 1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */ + punpckhdq_r2r (mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */ - movq_r2m (mm5, *(dataptr + 13)); /* write result 3 */ + movq_r2m (mm5, *(dataptr + 13)); /* write result 3 */ - /****** last 4x4 done */ + /****** last 4x4 done */ - movq_r2m (mm1, *(dataptr + 15)); /* write result 4, last 4x4 */ + movq_r2m (mm1, *(dataptr + 15)); /* write result 4, last 4x4 */ - movq_m2r (*(dataptr + 5), mm2); /* m23:m22|m21:m20 - third line */ - movq_r2r (mm0, mm6); /* copy first line */ + movq_m2r (*(dataptr + 5), mm2); /* m23:m22|m21:m20 - third line */ + movq_r2r (mm0, mm6); /* copy first line */ - punpcklwd_m2r (*(dataptr + 3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ - movq_r2r (mm2, mm7); /* copy third line */ + punpcklwd_m2r (*(dataptr + 3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ + movq_r2r (mm2, mm7); /* copy third line */ - punpcklwd_m2r (*(dataptr + 7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */ - movq_r2r (mm0, mm4); /* copy first intermediate result */ + punpcklwd_m2r (*(dataptr + 7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */ + movq_r2r (mm0, mm4); /* copy first intermediate result */ - movq_m2r (*(dataptr + 8), mm1); /* n03:n02|n01:n00 - first line */ - punpckldq_r2r (mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */ + movq_m2r (*(dataptr + 8), mm1); /* n03:n02|n01:n00 - first line */ + punpckldq_r2r (mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */ - movq_m2r (*(dataptr + 12), mm3); /* n23:n22|n21:n20 - third line */ - punpckhdq_r2r (mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */ + movq_m2r (*(dataptr + 12), mm3); /* n23:n22|n21:n20 - third line */ + punpckhdq_r2r (mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */ - punpckhwd_m2r (*(dataptr + 3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */ - movq_r2r (mm1, mm2); /* copy first line */ + punpckhwd_m2r (*(dataptr + 3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */ + movq_r2r (mm1, mm2); /* copy first line */ - punpckhwd_m2r (*(dataptr + 7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */ - movq_r2r (mm6, mm5); /* copy first intermediate result */ + punpckhwd_m2r (*(dataptr + 7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */ + movq_r2r (mm6, mm5); /* copy first intermediate result */ - movq_r2m (mm0, *(dataptr + 8)); /* write result 1 */ - punpckhdq_r2r (mm7, mm5); /* m33:m23|m13:m03 - produce third result */ + movq_r2m (mm0, *(dataptr + 8)); /* write result 1 */ + punpckhdq_r2r (mm7, mm5); /* m33:m23|m13:m03 - produce third result */ - punpcklwd_m2r (*(dataptr + 10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */ - movq_r2r (mm3, mm0); /* copy third line */ + punpcklwd_m2r (*(dataptr + 10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */ + movq_r2r (mm3, mm0); /* copy third line */ - punpckhwd_m2r (*(dataptr + 10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */ + punpckhwd_m2r (*(dataptr + 10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */ - movq_r2m (mm4, *(dataptr + 10)); /* write result 2 out */ - punpckldq_r2r (mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */ + movq_r2m (mm4, *(dataptr + 10)); /* write result 2 out */ + punpckldq_r2r (mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */ - punpcklwd_m2r (*(dataptr + 14), mm3); /* n33:n23|n32:n22 - interleave third and fourth lines */ - movq_r2r (mm1, mm4); /* copy second intermediate result */ + punpcklwd_m2r (*(dataptr + 14), mm3); /* n33:n23|n32:n22 - interleave third and fourth lines */ + movq_r2r (mm1, mm4); /* copy second intermediate result */ - movq_r2m (mm6, *(dataptr + 12)); /* write result 3 out */ - punpckldq_r2r (mm3, mm1); /* */ + movq_r2m (mm6, *(dataptr + 12)); /* write result 3 out */ + punpckldq_r2r (mm3, mm1); /* */ - punpckhwd_m2r (*(dataptr + 14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */ - movq_r2r (mm2, mm6); /* copy second intermediate result */ + punpckhwd_m2r (*(dataptr + 14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */ + movq_r2r (mm2, mm6); /* copy second intermediate result */ - movq_r2m (mm5, *(dataptr + 14)); /* write result 4 out */ - punpckhdq_r2r (mm3, mm4); /* n31:n21|n11:n01- produce second result */ + movq_r2m (mm5, *(dataptr + 14)); /* write result 4 out */ + punpckhdq_r2r (mm3, mm4); /* n31:n21|n11:n01- produce second result */ - movq_r2m (mm1, *(dataptr + 1)); /* write result 5 out - (first result for other 4 x 4 block) */ - punpckldq_r2r (mm0, mm2); /* n32:n22|n12:n02- produce third result */ + movq_r2m (mm1, *(dataptr + 1)); /* write result 5 out - (first result for other 4 x 4 block) */ + punpckldq_r2r (mm0, mm2); /* n32:n22|n12:n02- produce third result */ - movq_r2m (mm4, *(dataptr + 3)); /* write result 6 out */ - punpckhdq_r2r (mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */ + movq_r2m (mm4, *(dataptr + 3)); /* write result 6 out */ + punpckhdq_r2r (mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */ - movq_r2m (mm2, *(dataptr + 5)); /* write result 7 out */ + movq_r2m (mm2, *(dataptr + 5)); /* write result 7 out */ - movq_m2r (*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */ + movq_m2r (*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */ - movq_r2m (mm6, *(dataptr + 7)); /* write result 8 out */ + movq_r2m (mm6, *(dataptr + 7)); /* write result 8 out */ /* Do first 4x4 quadrant, which is used in the beginning of the DCT: */ - movq_m2r (*(dataptr + 4), mm7); /* m23:m22|m21:m20 - third line */ - movq_r2r (mm0, mm2); /* copy first line */ + movq_m2r (*(dataptr + 4), mm7); /* m23:m22|m21:m20 - third line */ + movq_r2r (mm0, mm2); /* copy first line */ - punpcklwd_m2r (*(dataptr + 2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ - movq_r2r (mm7, mm4); /* copy third line */ + punpcklwd_m2r (*(dataptr + 2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */ + movq_r2r (mm7, mm4); /* copy third line */ - punpcklwd_m2r (*(dataptr + 6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */ - movq_r2r (mm0, mm1); /* copy first intermediate result */ + punpcklwd_m2r (*(dataptr + 6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */ + movq_r2r (mm0, mm1); /* copy first intermediate result */ - movq_m2r (*(dataptr + 2), mm6); /* m13:m12|m11:m10 - second line */ - punpckldq_r2r (mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */ + movq_m2r (*(dataptr + 2), mm6); /* m13:m12|m11:m10 - second line */ + punpckldq_r2r (mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */ - movq_m2r (*(dataptr + 6), mm5); /* m33:m32|m31:m30 - fourth line */ - punpckhdq_r2r (mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ + movq_m2r (*(dataptr + 6), mm5); /* m33:m32|m31:m30 - fourth line */ + punpckhdq_r2r (mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */ - movq_r2r (mm0, mm7); /* write result 1 */ - punpckhwd_r2r (mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */ + movq_r2r (mm0, mm7); /* write result 1 */ + punpckhwd_r2r (mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */ - psubw_m2r (*(dataptr + 14), mm7); /* tmp07=x0-x7: Stage 1 */ - movq_r2r (mm1, mm6); /* write result 2 */ + psubw_m2r (*(dataptr + 14), mm7); /* tmp07=x0-x7: Stage 1 */ + movq_r2r (mm1, mm6); /* write result 2 */ - paddw_m2r (*(dataptr + 14), mm0); /* tmp00=x0+x7: Stage 1 */ - punpckhwd_r2r (mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */ + paddw_m2r (*(dataptr + 14), mm0); /* tmp00=x0+x7: Stage 1 */ + punpckhwd_r2r (mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */ - paddw_m2r (*(dataptr + 12), mm1); /* tmp01=x1+x6: Stage 1 */ - movq_r2r (mm2, mm3); /* copy first intermediate result */ + paddw_m2r (*(dataptr + 12), mm1); /* tmp01=x1+x6: Stage 1 */ + movq_r2r (mm2, mm3); /* copy first intermediate result */ - psubw_m2r (*(dataptr + 12), mm6); /* tmp06=x1-x6: Stage 1 */ - punpckldq_r2r (mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */ + psubw_m2r (*(dataptr + 12), mm6); /* tmp06=x1-x6: Stage 1 */ + punpckldq_r2r (mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */ - movq_r2m (mm7, tmp7); /* save tmp07 */ - movq_r2r (mm2, mm5); /* write result 3 */ + movq_r2m (mm7, tmp7); /* save tmp07 */ + movq_r2r (mm2, mm5); /* write result 3 */ - movq_r2m (mm6, tmp6); /* save tmp06 */ + movq_r2m (mm6, tmp6); /* save tmp06 */ - punpckhdq_r2r (mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */ + punpckhdq_r2r (mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */ - paddw_m2r (*(dataptr + 10), mm2); /* tmp02=x2+x5: stage 1 */ - movq_r2r (mm3, mm4); /* write result 4 */ + paddw_m2r (*(dataptr + 10), mm2); /* tmp02=x2+x5: stage 1 */ + movq_r2r (mm3, mm4); /* write result 4 */ /************************************************************************************************ End of Transpose 2 ************************************************************************************************/ - paddw_m2r (*(dataptr + 8), mm3); /* tmp03=x3+x4: stage 1 */ + paddw_m2r (*(dataptr + 8), mm3); /* tmp03=x3+x4: stage 1 */ movq_r2r (mm0, mm7); - psubw_m2r (*(dataptr + 8), mm4); /* tmp04=x3-x4: stage 1 */ + psubw_m2r (*(dataptr + 8), mm4); /* tmp04=x3-x4: stage 1 */ movq_r2r (mm1, mm6); - paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */ - psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */ + paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */ + psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */ - psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */ - paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */ + psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */ + paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */ - psubw_m2r (*(dataptr + 10), mm5); /* tmp05=x2-x5: stage 1 */ - paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ + psubw_m2r (*(dataptr + 10), mm5); /* tmp05=x2-x5: stage 1 */ + paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ /* stage 3 */ movq_m2r (tmp6, mm2); movq_r2r (mm0, mm3); - psllw_i2r (2, mm6); /* m8 * 2^2 */ + psllw_i2r (2, mm6); /* m8 * 2^2 */ paddw_r2r (mm1, mm0); - pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ + pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ psubw_r2r (mm1, mm3); movq_r2m (mm0, *dataptr); @@ -1033,197 +1033,197 @@ RTjpeg_dctY (__u8 * idata, __s16 * odata, int rskip) /* Odd part */ movq_r2m (mm3, *(dataptr + 8)); - paddw_r2r (mm5, mm4); /* tmp10 */ + paddw_r2r (mm5, mm4); /* tmp10 */ movq_m2r (tmp7, mm3); - paddw_r2r (mm6, mm0); /* tmp32 */ + paddw_r2r (mm6, mm0); /* tmp32 */ - paddw_r2r (mm2, mm5); /* tmp11 */ - psubw_r2r (mm6, mm7); /* tmp33 */ + paddw_r2r (mm2, mm5); /* tmp11 */ + psubw_r2r (mm6, mm7); /* tmp33 */ movq_r2m (mm0, *(dataptr + 4)); - paddw_r2r (mm3, mm2); /* tmp12 */ + paddw_r2r (mm3, mm2); /* tmp12 */ /* stage 4 */ movq_r2m (mm7, *(dataptr + 12)); - movq_r2r (mm4, mm1); /* copy of tmp10 */ + movq_r2r (mm4, mm1); /* copy of tmp10 */ - psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ - psllw_i2r (2, mm4); /* m8 * 2^2 */ + psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ + psllw_i2r (2, mm4); /* m8 * 2^2 */ movq_m2r (RTjpeg_C2mC6, mm0); psllw_i2r (2, mm1); - pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ + pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ psllw_i2r (2, mm2); - pmulhw_r2r (mm0, mm4); /* z5 */ + pmulhw_r2r (mm0, mm4); /* z5 */ /* stage 5 */ pmulhw_m2r (RTjpeg_C2pC6, mm2); psllw_i2r (2, mm5); - pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ - movq_r2r (mm3, mm0); /* copy tmp7 */ + pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ + movq_r2r (mm3, mm0); /* copy tmp7 */ movq_m2r (*(dataptr + 1), mm7); - paddw_r2r (mm1, mm4); /* z2 */ + paddw_r2r (mm1, mm4); /* z2 */ - paddw_r2r (mm1, mm2); /* z4 */ + paddw_r2r (mm1, mm2); /* z4 */ - paddw_r2r (mm5, mm0); /* z11 */ - psubw_r2r (mm5, mm3); /* z13 */ + paddw_r2r (mm5, mm0); /* z11 */ + psubw_r2r (mm5, mm3); /* z13 */ /* stage 6 */ - movq_r2r (mm3, mm5); /* copy z13 */ - psubw_r2r (mm4, mm3); /* y3=z13 - z2 */ + movq_r2r (mm3, mm5); /* copy z13 */ + psubw_r2r (mm4, mm3); /* y3=z13 - z2 */ - paddw_r2r (mm4, mm5); /* y5=z13 + z2 */ - movq_r2r (mm0, mm6); /* copy z11 */ + paddw_r2r (mm4, mm5); /* y5=z13 + z2 */ + movq_r2r (mm0, mm6); /* copy z11 */ - movq_r2m (mm3, *(dataptr + 6)); /*save y3 */ - psubw_r2r (mm2, mm0); /* y7=z11 - z4 */ + movq_r2m (mm3, *(dataptr + 6)); /*save y3 */ + psubw_r2r (mm2, mm0); /* y7=z11 - z4 */ - movq_r2m (mm5, *(dataptr + 10)); /*save y5 */ - paddw_r2r (mm2, mm6); /* y1=z11 + z4 */ + movq_r2m (mm5, *(dataptr + 10)); /*save y5 */ + paddw_r2r (mm2, mm6); /* y1=z11 + z4 */ - movq_r2m (mm0, *(dataptr + 14)); /*save y7 */ + movq_r2m (mm0, *(dataptr + 14)); /*save y7 */ - /************************************************ + /************************************************ * End of 1st 4 rows ************************************************/ - movq_m2r (*(dataptr + 3), mm1); /* load x1 : stage 1 */ - movq_r2r (mm7, mm0); /* copy x0 */ + movq_m2r (*(dataptr + 3), mm1); /* load x1 : stage 1 */ + movq_r2r (mm7, mm0); /* copy x0 */ - movq_r2m (mm6, *(dataptr + 2)); /*save y1 */ + movq_r2m (mm6, *(dataptr + 2)); /*save y1 */ - movq_m2r (*(dataptr + 5), mm2); /* load x2 : stage 1 */ - movq_r2r (mm1, mm6); /* copy x1 */ + movq_m2r (*(dataptr + 5), mm2); /* load x2 : stage 1 */ + movq_r2r (mm1, mm6); /* copy x1 */ - paddw_m2r (*(dataptr + 15), mm0); /* tmp00 = x0 + x7 */ + paddw_m2r (*(dataptr + 15), mm0); /* tmp00 = x0 + x7 */ - movq_m2r (*(dataptr + 7), mm3); /* load x3 : stage 1 */ - movq_r2r (mm2, mm5); /* copy x2 */ + movq_m2r (*(dataptr + 7), mm3); /* load x3 : stage 1 */ + movq_r2r (mm2, mm5); /* copy x2 */ - psubw_m2r (*(dataptr + 15), mm7); /* tmp07 = x0 - x7 */ - movq_r2r (mm3, mm4); /* copy x3 */ + psubw_m2r (*(dataptr + 15), mm7); /* tmp07 = x0 - x7 */ + movq_r2r (mm3, mm4); /* copy x3 */ - paddw_m2r (*(dataptr + 13), mm1); /* tmp01 = x1 + x6 */ + paddw_m2r (*(dataptr + 13), mm1); /* tmp01 = x1 + x6 */ - movq_r2m (mm7, tmp7); /* save tmp07 */ - movq_r2r (mm0, mm7); /* copy tmp00 */ + movq_r2m (mm7, tmp7); /* save tmp07 */ + movq_r2r (mm0, mm7); /* copy tmp00 */ - psubw_m2r (*(dataptr + 13), mm6); /* tmp06 = x1 - x6 */ + psubw_m2r (*(dataptr + 13), mm6); /* tmp06 = x1 - x6 */ /* stage 2, Even Part */ - paddw_m2r (*(dataptr + 9), mm3); /* tmp03 = x3 + x4 */ + paddw_m2r (*(dataptr + 9), mm3); /* tmp03 = x3 + x4 */ - movq_r2m (mm6, tmp6); /* save tmp07 */ - movq_r2r (mm1, mm6); /* copy tmp01 */ + movq_r2m (mm6, tmp6); /* save tmp07 */ + movq_r2r (mm1, mm6); /* copy tmp01 */ - paddw_m2r (*(dataptr + 11), mm2); /* tmp02 = x2 + x5 */ - paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03 */ + paddw_m2r (*(dataptr + 11), mm2); /* tmp02 = x2 + x5 */ + paddw_r2r (mm3, mm0); /* tmp10 = tmp00 + tmp03 */ - psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03 */ + psubw_r2r (mm3, mm7); /* tmp13 = tmp00 - tmp03 */ - psubw_m2r (*(dataptr + 9), mm4); /* tmp04 = x3 - x4 */ - psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02 */ + psubw_m2r (*(dataptr + 9), mm4); /* tmp04 = x3 - x4 */ + psubw_r2r (mm2, mm6); /* tmp12 = tmp01 - tmp02 */ - paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02 */ + paddw_r2r (mm2, mm1); /* tmp11 = tmp01 + tmp02 */ - psubw_m2r (*(dataptr + 11), mm5); /* tmp05 = x2 - x5 */ - paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ + psubw_m2r (*(dataptr + 11), mm5); /* tmp05 = x2 - x5 */ + paddw_r2r (mm7, mm6); /* tmp12 + tmp13 */ /* stage 3, Even and stage 4 & 5 even */ - movq_m2r (tmp6, mm2); /* load tmp6 */ - movq_r2r (mm0, mm3); /* copy tmp10 */ + movq_m2r (tmp6, mm2); /* load tmp6 */ + movq_r2r (mm0, mm3); /* copy tmp10 */ - psllw_i2r (2, mm6); /* shift z1 */ - paddw_r2r (mm1, mm0); /* y0=tmp10 + tmp11 */ + psllw_i2r (2, mm6); /* shift z1 */ + paddw_r2r (mm1, mm0); /* y0=tmp10 + tmp11 */ - pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ - psubw_r2r (mm1, mm3); /* y4=tmp10 - tmp11 */ + pmulhw_m2r (RTjpeg_C4, mm6); /* z1 */ + psubw_r2r (mm1, mm3); /* y4=tmp10 - tmp11 */ - movq_r2m (mm0, *(dataptr + 1)); /*save y0 */ - movq_r2r (mm7, mm0); /* copy tmp13 */ + movq_r2m (mm0, *(dataptr + 1)); /*save y0 */ + movq_r2r (mm7, mm0); /* copy tmp13 */ /* odd part */ - movq_r2m (mm3, *(dataptr + 9)); /*save y4 */ - paddw_r2r (mm5, mm4); /* tmp10 = tmp4 + tmp5 */ + movq_r2m (mm3, *(dataptr + 9)); /*save y4 */ + paddw_r2r (mm5, mm4); /* tmp10 = tmp4 + tmp5 */ - movq_m2r (tmp7, mm3); /* load tmp7 */ - paddw_r2r (mm6, mm0); /* tmp32 = tmp13 + z1 */ + movq_m2r (tmp7, mm3); /* load tmp7 */ + paddw_r2r (mm6, mm0); /* tmp32 = tmp13 + z1 */ - paddw_r2r (mm2, mm5); /* tmp11 = tmp5 + tmp6 */ - psubw_r2r (mm6, mm7); /* tmp33 = tmp13 - z1 */ + paddw_r2r (mm2, mm5); /* tmp11 = tmp5 + tmp6 */ + psubw_r2r (mm6, mm7); /* tmp33 = tmp13 - z1 */ - movq_r2m (mm0, *(dataptr + 5)); /*save y2 */ - paddw_r2r (mm3, mm2); /* tmp12 = tmp6 + tmp7 */ + movq_r2m (mm0, *(dataptr + 5)); /*save y2 */ + paddw_r2r (mm3, mm2); /* tmp12 = tmp6 + tmp7 */ /* stage 4 */ - movq_r2m (mm7, *(dataptr + 13)); /*save y6 */ - movq_r2r (mm4, mm1); /* copy tmp10 */ + movq_r2m (mm7, *(dataptr + 13)); /*save y6 */ + movq_r2r (mm4, mm1); /* copy tmp10 */ - psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ - psllw_i2r (2, mm4); /* shift tmp10 */ + psubw_r2r (mm2, mm1); /* tmp10 - tmp12 */ + psllw_i2r (2, mm4); /* shift tmp10 */ - movq_m2r (RTjpeg_C2mC6, mm0); /* load C2mC6 */ - psllw_i2r (2, mm1); /* shift (tmp10-tmp12) */ + movq_m2r (RTjpeg_C2mC6, mm0); /* load C2mC6 */ + psllw_i2r (2, mm1); /* shift (tmp10-tmp12) */ - pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ - psllw_i2r (2, mm5); /* prepare for multiply */ + pmulhw_m2r (RTjpeg_C6, mm1); /* z5 */ + psllw_i2r (2, mm5); /* prepare for multiply */ - pmulhw_r2r (mm0, mm4); /* multiply by converted real */ + pmulhw_r2r (mm0, mm4); /* multiply by converted real */ /* stage 5 */ - pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ - psllw_i2r (2, mm2); /* prepare for multiply */ + pmulhw_m2r (RTjpeg_C4, mm5); /* z3 */ + psllw_i2r (2, mm2); /* prepare for multiply */ - pmulhw_m2r (RTjpeg_C2pC6, mm2); /* multiply */ - movq_r2r (mm3, mm0); /* copy tmp7 */ + pmulhw_m2r (RTjpeg_C2pC6, mm2); /* multiply */ + movq_r2r (mm3, mm0); /* copy tmp7 */ - movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */ - paddw_r2r (mm1, mm4); /* z2 */ + movq_m2r (*(dataptr + 9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */ + paddw_r2r (mm1, mm4); /* z2 */ - paddw_r2r (mm5, mm0); /* z11 */ - psubw_r2r (mm5, mm3); /* z13 */ + paddw_r2r (mm5, mm0); /* z11 */ + psubw_r2r (mm5, mm3); /* z13 */ /* stage 6 */ - movq_r2r (mm3, mm5); /* copy z13 */ - paddw_r2r (mm1, mm2); /* z4 */ + movq_r2r (mm3, mm5); /* copy z13 */ + paddw_r2r (mm1, mm2); /* z4 */ - movq_r2r (mm0, mm6); /* copy z11 */ - psubw_r2r (mm4, mm5); /* y3 */ + movq_r2r (mm0, mm6); /* copy z11 */ + psubw_r2r (mm4, mm5); /* y3 */ - paddw_r2r (mm2, mm6); /* y1 */ - paddw_r2r (mm4, mm3); /* y5 */ + paddw_r2r (mm2, mm6); /* y1 */ + paddw_r2r (mm4, mm3); /* y5 */ - movq_r2m (mm5, *(dataptr + 7)); /*save y3 */ - psubw_r2r (mm2, mm0); /* yè=z11 - z4 */ + movq_r2m (mm5, *(dataptr + 7)); /*save y3 */ + psubw_r2r (mm2, mm0); /* yè=z11 - z4 */ - movq_r2m (mm3, *(dataptr + 11)); /*save y5 */ + movq_r2m (mm3, *(dataptr + 11)); /*save y5 */ - movq_r2m (mm6, *(dataptr + 3)); /*save y1 */ + movq_r2m (mm6, *(dataptr + 3)); /*save y1 */ - movq_r2m (mm0, *(dataptr + 15)); /*save y7 */ + movq_r2m (mm0, *(dataptr + 15)); /*save y7 */ #endif } -#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */ +#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */ +#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */ +#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */ +#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */ #define DESCALE(x) (__s16)( ((x)+4) >> 3) @@ -1266,119 +1266,119 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* Odd part */ - movq_m2r (*(idata + 10), mm1); /* load idata[DCTSIZE*5] */ + movq_m2r (*(idata + 10), mm1); /* load idata[DCTSIZE*5] */ - movq_m2r (*(idata + 6), mm0); /* load idata[DCTSIZE*3] */ + movq_m2r (*(idata + 6), mm0); /* load idata[DCTSIZE*3] */ - movq_m2r (*(idata + 2), mm3); /* load idata[DCTSIZE*1] */ + movq_m2r (*(idata + 2), mm3); /* load idata[DCTSIZE*1] */ - movq_r2r (mm1, mm2); /* copy tmp6 : phase 6 */ - */movq_m2r (*(idata + 14), mm4); /* load idata[DCTSIZE*7] */ + movq_r2r (mm1, mm2); /* copy tmp6 : phase 6 */ + */movq_m2r (*(idata + 14), mm4); /* load idata[DCTSIZE*7] */ - paddw_r2r (mm0, mm1); /* z13 = tmp6 + tmp5; */ + paddw_r2r (mm0, mm1); /* z13 = tmp6 + tmp5; */ - psubw_r2r (mm0, mm2); /* z10 = tmp6 - tmp5 */ + psubw_r2r (mm0, mm2); /* z10 = tmp6 - tmp5 */ - psllw_i2r (2, mm2); /* shift z10 */ - movq_r2r (mm2, mm0); /* copy z10 */ + psllw_i2r (2, mm2); /* shift z10 */ + movq_r2r (mm2, mm0); /* copy z10 */ - pmulhw_m2r (fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */ - movq_r2r (mm3, mm5); /* copy tmp4 */ + pmulhw_m2r (fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */ + movq_r2r (mm3, mm5); /* copy tmp4 */ - pmulhw_m2r (fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */ - paddw_r2r (mm4, mm3); /* z11 = tmp4 + tmp7; */ + pmulhw_m2r (fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */ + paddw_r2r (mm4, mm3); /* z11 = tmp4 + tmp7; */ - movq_r2r (mm3, mm6); /* copy z11 : phase 5 */ - psubw_r2r (mm4, mm5); /* z12 = tmp4 - tmp7; */ + movq_r2r (mm3, mm6); /* copy z11 : phase 5 */ + psubw_r2r (mm4, mm5); /* z12 = tmp4 - tmp7; */ - psubw_r2r (mm1, mm6); /* z11-z13 */ - psllw_i2r (2, mm5); /* shift z12 */ + psubw_r2r (mm1, mm6); /* z11-z13 */ + psllw_i2r (2, mm5); /* shift z12 */ - movq_m2r (*(idata + 12), mm4); /* load idata[DCTSIZE*6], even part */ - movq_r2r (mm5, mm7); /* copy z12 */ + movq_m2r (*(idata + 12), mm4); /* load idata[DCTSIZE*6], even part */ + movq_r2r (mm5, mm7); /* copy z12 */ - pmulhw_m2r (fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6): even part */ - paddw_r2r (mm1, mm3); /* tmp7 = z11 + z13; */ + pmulhw_m2r (fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6): even part */ + paddw_r2r (mm1, mm3); /* tmp7 = z11 + z13; */ /*ok */ /* Even part */ - pmulhw_m2r (fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */ + pmulhw_m2r (fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */ psllw_i2r (2, mm6); - movq_m2r (*(idata + 4), mm1); /* load idata[DCTSIZE*2] */ + movq_m2r (*(idata + 4), mm1); /* load idata[DCTSIZE*2] */ - paddw_r2r (mm5, mm0); /* tmp10 */ + paddw_r2r (mm5, mm0); /* tmp10 */ - paddw_r2r (mm7, mm2); /* tmp12 */ + paddw_r2r (mm7, mm2); /* tmp12 */ - pmulhw_m2r (fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */ - psubw_r2r (mm3, mm2); /* tmp6 = tmp12 - tmp7 */ + pmulhw_m2r (fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */ + psubw_r2r (mm3, mm2); /* tmp6 = tmp12 - tmp7 */ - movq_r2r (mm1, mm5); /* copy tmp1 */ - paddw_r2r (mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */ + movq_r2r (mm1, mm5); /* copy tmp1 */ + paddw_r2r (mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */ - psubw_r2r (mm4, mm5); /* tmp1-tmp3 */ - psubw_r2r (mm2, mm6); /* tmp5 = tmp11 - tmp6; */ + psubw_r2r (mm4, mm5); /* tmp1-tmp3 */ + psubw_r2r (mm2, mm6); /* tmp5 = tmp11 - tmp6; */ - movq_r2m (mm1, *(wsptr)); /* save tmp13 in workspace */ - psllw_i2r (2, mm5); /* shift tmp1-tmp3 */ + movq_r2m (mm1, *(wsptr)); /* save tmp13 in workspace */ + psllw_i2r (2, mm5); /* shift tmp1-tmp3 */ - movq_m2r (*(idata), mm7); /* load idata[DCTSIZE*0] */ + movq_m2r (*(idata), mm7); /* load idata[DCTSIZE*0] */ - pmulhw_m2r (fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */ - paddw_r2r (mm6, mm0); /* tmp4 = tmp10 + tmp5; */ + pmulhw_m2r (fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */ + paddw_r2r (mm6, mm0); /* tmp4 = tmp10 + tmp5; */ - movq_m2r (*(idata + 8), mm4); /* load idata[DCTSIZE*4] */ + movq_m2r (*(idata + 8), mm4); /* load idata[DCTSIZE*4] */ - psubw_r2r (mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */ + psubw_r2r (mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */ - movq_r2m (mm0, *(wsptr + 4)); /* save tmp4 in workspace */ - movq_r2r (mm7, mm1); /* copy tmp0 : phase 3 */ + movq_r2m (mm0, *(wsptr + 4)); /* save tmp4 in workspace */ + movq_r2r (mm7, mm1); /* copy tmp0 : phase 3 */ - movq_r2m (mm5, *(wsptr + 2)); /* save tmp12 in workspace */ - psubw_r2r (mm4, mm1); /* tmp11 = tmp0 - tmp2; */ + movq_r2m (mm5, *(wsptr + 2)); /* save tmp12 in workspace */ + psubw_r2r (mm4, mm1); /* tmp11 = tmp0 - tmp2; */ - paddw_r2r (mm4, mm7); /* tmp10 = tmp0 + tmp2; */ - movq_r2r (mm1, mm5); /* copy tmp11 */ + paddw_r2r (mm4, mm7); /* tmp10 = tmp0 + tmp2; */ + movq_r2r (mm1, mm5); /* copy tmp11 */ - paddw_m2r (*(wsptr + 2), mm1); /* tmp1 = tmp11 + tmp12; */ - movq_r2r (mm7, mm4); /* copy tmp10 : phase 2 */ + paddw_m2r (*(wsptr + 2), mm1); /* tmp1 = tmp11 + tmp12; */ + movq_r2r (mm7, mm4); /* copy tmp10 : phase 2 */ - paddw_m2r (*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */ + paddw_m2r (*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */ - psubw_m2r (*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */ - movq_r2r (mm7, mm0); /* copy tmp0 */ + psubw_m2r (*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */ + movq_r2r (mm7, mm0); /* copy tmp0 */ - psubw_m2r (*(wsptr + 2), mm5); /* tmp2 = tmp11 - tmp12; */ - paddw_r2r (mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */ + psubw_m2r (*(wsptr + 2), mm5); /* tmp2 = tmp11 - tmp12; */ + paddw_r2r (mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */ - psubw_r2r (mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */ + psubw_r2r (mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */ - movq_r2m (mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */ - movq_r2r (mm1, mm3); /* copy tmp1 */ + movq_r2m (mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */ + movq_r2r (mm1, mm3); /* copy tmp1 */ - movq_r2m (mm0, *(wsptr + 14)); /* wsptr[DCTSIZE*7] */ - paddw_r2r (mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */ + movq_r2m (mm0, *(wsptr + 14)); /* wsptr[DCTSIZE*7] */ + paddw_r2r (mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */ - psubw_r2r (mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */ + psubw_r2r (mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */ - movq_r2m (mm1, *(wsptr + 2)); /* wsptr[DCTSIZE*1] */ - movq_r2r (mm4, mm1); /* copy tmp3 */ + movq_r2m (mm1, *(wsptr + 2)); /* wsptr[DCTSIZE*1] */ + movq_r2r (mm4, mm1); /* copy tmp3 */ - movq_r2m (mm3, *(wsptr + 12)); /* wsptr[DCTSIZE*6] */ + movq_r2m (mm3, *(wsptr + 12)); /* wsptr[DCTSIZE*6] */ - paddw_m2r (*(wsptr + 4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */ + paddw_m2r (*(wsptr + 4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */ - psubw_m2r (*(wsptr + 4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */ + psubw_m2r (*(wsptr + 4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */ movq_r2m (mm4, *(wsptr + 8)); - movq_r2r (mm5, mm7); /* copy tmp2 */ + movq_r2r (mm5, mm7); /* copy tmp2 */ - paddw_r2r (mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */ + paddw_r2r (mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */ movq_r2m (mm1, *(wsptr + 6)); - psubw_r2r (mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */ + psubw_r2r (mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */ movq_r2m (mm5, *(wsptr + 4)); @@ -1394,117 +1394,117 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /*****************************************************************/ - movq_m2r (*(idata + 10), mm1); /* load idata[DCTSIZE*5] */ + movq_m2r (*(idata + 10), mm1); /* load idata[DCTSIZE*5] */ - movq_m2r (*(idata + 6), mm0); /* load idata[DCTSIZE*3] */ + movq_m2r (*(idata + 6), mm0); /* load idata[DCTSIZE*3] */ - movq_m2r (*(idata + 2), mm3); /* load idata[DCTSIZE*1] */ - movq_r2r (mm1, mm2); /* copy tmp6 : phase 6 */ - */movq_m2r (*(idata + 14), mm4); /* load idata[DCTSIZE*7] */ - paddw_r2r (mm0, mm1); /* z13 = tmp6 + tmp5; */ + movq_m2r (*(idata + 2), mm3); /* load idata[DCTSIZE*1] */ + movq_r2r (mm1, mm2); /* copy tmp6 : phase 6 */ + */movq_m2r (*(idata + 14), mm4); /* load idata[DCTSIZE*7] */ + paddw_r2r (mm0, mm1); /* z13 = tmp6 + tmp5; */ - psubw_r2r (mm0, mm2); /* z10 = tmp6 - tmp5 */ + psubw_r2r (mm0, mm2); /* z10 = tmp6 - tmp5 */ - psllw_i2r (2, mm2); /* shift z10 */ - movq_r2r (mm2, mm0); /* copy z10 */ + psllw_i2r (2, mm2); /* shift z10 */ + movq_r2r (mm2, mm0); /* copy z10 */ - pmulhw_m2r (fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */ - movq_r2r (mm3, mm5); /* copy tmp4 */ + pmulhw_m2r (fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */ + movq_r2r (mm3, mm5); /* copy tmp4 */ - pmulhw_m2r (fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */ - paddw_r2r (mm4, mm3); /* z11 = tmp4 + tmp7; */ + pmulhw_m2r (fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */ + paddw_r2r (mm4, mm3); /* z11 = tmp4 + tmp7; */ - movq_r2r (mm3, mm6); /* copy z11 : phase 5 */ - psubw_r2r (mm4, mm5); /* z12 = tmp4 - tmp7; */ + movq_r2r (mm3, mm6); /* copy z11 : phase 5 */ + psubw_r2r (mm4, mm5); /* z12 = tmp4 - tmp7; */ - psubw_r2r (mm1, mm6); /* z11-z13 */ - psllw_i2r (2, mm5); /* shift z12 */ + psubw_r2r (mm1, mm6); /* z11-z13 */ + psllw_i2r (2, mm5); /* shift z12 */ - movq_m2r (*(idata + 12), mm4); /* load idata[DCTSIZE*6], even part */ - movq_r2r (mm5, mm7); /* copy z12 */ + movq_m2r (*(idata + 12), mm4); /* load idata[DCTSIZE*6], even part */ + movq_r2r (mm5, mm7); /* copy z12 */ - pmulhw_m2r (fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6) even part */ - paddw_r2r (mm1, mm3); /* tmp7 = z11 + z13; */ + pmulhw_m2r (fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6) even part */ + paddw_r2r (mm1, mm3); /* tmp7 = z11 + z13; */ /*ok */ /* Even part */ - pmulhw_m2r (fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */ + pmulhw_m2r (fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */ psllw_i2r (2, mm6); - movq_m2r (*(idata + 4), mm1); /* load idata[DCTSIZE*2] */ + movq_m2r (*(idata + 4), mm1); /* load idata[DCTSIZE*2] */ - paddw_r2r (mm5, mm0); /* tmp10 */ + paddw_r2r (mm5, mm0); /* tmp10 */ - paddw_r2r (mm7, mm2); /* tmp12 */ + paddw_r2r (mm7, mm2); /* tmp12 */ - pmulhw_m2r (fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */ - psubw_r2r (mm3, mm2); /* tmp6 = tmp12 - tmp7 */ + pmulhw_m2r (fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */ + psubw_r2r (mm3, mm2); /* tmp6 = tmp12 - tmp7 */ - movq_r2r (mm1, mm5); /* copy tmp1 */ - paddw_r2r (mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */ + movq_r2r (mm1, mm5); /* copy tmp1 */ + paddw_r2r (mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */ - psubw_r2r (mm4, mm5); /* tmp1-tmp3 */ - psubw_r2r (mm2, mm6); /* tmp5 = tmp11 - tmp6; */ + psubw_r2r (mm4, mm5); /* tmp1-tmp3 */ + psubw_r2r (mm2, mm6); /* tmp5 = tmp11 - tmp6; */ - movq_r2m (mm1, *(wsptr)); /* save tmp13 in workspace */ - psllw_i2r (2, mm5); /* shift tmp1-tmp3 */ + movq_r2m (mm1, *(wsptr)); /* save tmp13 in workspace */ + psllw_i2r (2, mm5); /* shift tmp1-tmp3 */ - movq_m2r (*(idata), mm7); /* load idata[DCTSIZE*0] */ - paddw_r2r (mm6, mm0); /* tmp4 = tmp10 + tmp5; */ + movq_m2r (*(idata), mm7); /* load idata[DCTSIZE*0] */ + paddw_r2r (mm6, mm0); /* tmp4 = tmp10 + tmp5; */ - pmulhw_m2r (fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */ + pmulhw_m2r (fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */ - movq_m2r (*(idata + 8), mm4); /* load idata[DCTSIZE*4] */ + movq_m2r (*(idata + 8), mm4); /* load idata[DCTSIZE*4] */ - psubw_r2r (mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */ + psubw_r2r (mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */ - movq_r2m (mm0, *(wsptr + 4)); /* save tmp4 in workspace */ - movq_r2r (mm7, mm1); /* copy tmp0: phase 3 */ + movq_r2m (mm0, *(wsptr + 4)); /* save tmp4 in workspace */ + movq_r2r (mm7, mm1); /* copy tmp0: phase 3 */ - movq_r2m (mm5, *(wsptr + 2)); /* save tmp12 in workspace */ - psubw_r2r (mm4, mm1); /* tmp11 = tmp0 - tmp2; */ + movq_r2m (mm5, *(wsptr + 2)); /* save tmp12 in workspace */ + psubw_r2r (mm4, mm1); /* tmp11 = tmp0 - tmp2; */ - paddw_r2r (mm4, mm7); /* tmp10 = tmp0 + tmp2; */ - movq_r2r (mm1, mm5); /* copy tmp11 */ + paddw_r2r (mm4, mm7); /* tmp10 = tmp0 + tmp2; */ + movq_r2r (mm1, mm5); /* copy tmp11 */ - paddw_m2r (*(wsptr + 2), mm1); /* tmp1 = tmp11 + tmp12; */ - movq_r2r (mm7, mm4); /* copy tmp10: phase 2 */ + paddw_m2r (*(wsptr + 2), mm1); /* tmp1 = tmp11 + tmp12; */ + movq_r2r (mm7, mm4); /* copy tmp10: phase 2 */ - paddw_m2r (*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */ + paddw_m2r (*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */ - psubw_m2r (*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */ - movq_r2r (mm7, mm0); /* copy tmp0 */ + psubw_m2r (*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */ + movq_r2r (mm7, mm0); /* copy tmp0 */ - psubw_m2r (*(wsptr + 2), mm5); /* tmp2 = tmp11 - tmp12; */ - paddw_r2r (mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */ + psubw_m2r (*(wsptr + 2), mm5); /* tmp2 = tmp11 - tmp12; */ + paddw_r2r (mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */ - psubw_r2r (mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */ + psubw_r2r (mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */ - movq_r2m (mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */ - movq_r2r (mm1, mm3); /* copy tmp1 */ + movq_r2m (mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */ + movq_r2r (mm1, mm3); /* copy tmp1 */ - movq_r2m (mm0, *(wsptr + 14)); /* wsptr[DCTSIZE*7] */ - paddw_r2r (mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */ + movq_r2m (mm0, *(wsptr + 14)); /* wsptr[DCTSIZE*7] */ + paddw_r2r (mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */ - psubw_r2r (mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */ + psubw_r2r (mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */ - movq_r2m (mm1, *(wsptr + 2)); /* wsptr[DCTSIZE*1] */ - movq_r2r (mm4, mm1); /* copy tmp3 */ + movq_r2m (mm1, *(wsptr + 2)); /* wsptr[DCTSIZE*1] */ + movq_r2r (mm4, mm1); /* copy tmp3 */ - movq_r2m (mm3, *(wsptr + 12)); /* wsptr[DCTSIZE*6] */ + movq_r2m (mm3, *(wsptr + 12)); /* wsptr[DCTSIZE*6] */ - paddw_m2r (*(wsptr + 4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */ + paddw_m2r (*(wsptr + 4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */ - psubw_m2r (*(wsptr + 4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */ + psubw_m2r (*(wsptr + 4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */ movq_r2m (mm4, *(wsptr + 8)); - movq_r2r (mm5, mm7); /* copy tmp2 */ + movq_r2r (mm5, mm7); /* copy tmp2 */ - paddw_r2r (mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */ + paddw_r2r (mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */ movq_r2m (mm1, *(wsptr + 6)); - psubw_r2r (mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */ + psubw_r2r (mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */ movq_r2m (mm5, *(wsptr + 4)); @@ -1525,62 +1525,62 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */ /* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */ /* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */ - movq_m2r (*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */ + movq_m2r (*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */ - movq_m2r (*(wsptr + 1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */ + movq_m2r (*(wsptr + 1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */ movq_r2r (mm0, mm2); - movq_m2r (*(wsptr + 2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */ - paddw_r2r (mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */ + movq_m2r (*(wsptr + 2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */ + paddw_r2r (mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */ - movq_m2r (*(wsptr + 3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */ - psubw_r2r (mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */ + movq_m2r (*(wsptr + 3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */ + psubw_r2r (mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */ movq_r2r (mm0, mm6); movq_r2r (mm3, mm5); - paddw_r2r (mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */ + paddw_r2r (mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */ movq_r2r (mm2, mm1); - psubw_r2r (mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */ - punpcklwd_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */ + psubw_r2r (mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */ + punpcklwd_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */ - movq_m2r (*(wsptr + 7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */ - punpckhwd_r2r (mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */ + movq_m2r (*(wsptr + 7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */ + punpckhwd_r2r (mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */ - movq_m2r (*(wsptr + 4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */ - punpckldq_r2r (mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */ + movq_m2r (*(wsptr + 4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */ + punpckldq_r2r (mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */ - punpcklwd_r2r (mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */ + punpcklwd_r2r (mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */ movq_r2r (mm3, mm4); - movq_m2r (*(wsptr + 6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */ - punpckhwd_r2r (mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */ + movq_m2r (*(wsptr + 6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */ + punpckhwd_r2r (mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */ - movq_m2r (*(wsptr + 5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */ - punpckldq_r2r (mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */ + movq_m2r (*(wsptr + 5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */ + punpckldq_r2r (mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */ - paddw_r2r (mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */ + paddw_r2r (mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */ movq_r2r (mm6, mm2); - psubw_r2r (mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */ - paddw_r2r (mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */ + psubw_r2r (mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */ + paddw_r2r (mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */ movq_r2r (mm3, mm5); - punpcklwd_r2r (mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */ + punpcklwd_r2r (mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */ - psubw_r2r (mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */ - punpckhwd_r2r (mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */ + psubw_r2r (mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */ + punpckhwd_r2r (mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */ movq_r2r (mm4, mm7); - punpckldq_r2r (mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */ + punpckldq_r2r (mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */ - punpcklwd_r2r (mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */ + punpcklwd_r2r (mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */ - punpckhwd_r2r (mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */ + punpckhwd_r2r (mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */ - punpckldq_r2r (mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */ + punpckldq_r2r (mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */ movq_r2r (mm1, mm6); /*ok */ @@ -1590,24 +1590,24 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) movq_r2r (mm0, mm2); - punpckhdq_r2r (mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */ + punpckhdq_r2r (mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */ - punpckldq_r2r (mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */ + punpckldq_r2r (mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */ psllw_i2r (2, mm6); pmulhw_m2r (fix_141, mm6); - punpckldq_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */ + punpckldq_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */ - punpckhdq_r2r (mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */ + punpckhdq_r2r (mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */ movq_r2r (mm0, mm7); /* tmp0 = tmp10 + tmp13; */ /* tmp3 = tmp10 - tmp13; */ - paddw_r2r (mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */ - psubw_r2r (mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */ + paddw_r2r (mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */ + psubw_r2r (mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */ /* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */ - psubw_r2r (mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */ + psubw_r2r (mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */ /* tmp1 = tmp11 + tmp12; */ /* tmp2 = tmp11 - tmp12; */ movq_r2r (mm1, mm5); @@ -1620,115 +1620,115 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */ /* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */ /* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */ - movq_m2r (*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */ - paddw_r2r (mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */ + movq_m2r (*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */ + paddw_r2r (mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */ - movq_m2r (*(wsptr + 1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */ - psubw_r2r (mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */ + movq_m2r (*(wsptr + 1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */ + psubw_r2r (mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */ movq_r2r (mm3, mm6); - punpckldq_r2r (mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */ + punpckldq_r2r (mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */ - punpckhdq_r2r (mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */ + punpckhdq_r2r (mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */ movq_r2r (mm3, mm2); /*Save tmp0 and tmp1 in wsptr */ - movq_r2m (mm0, *(wsptr)); /* save tmp0 */ - paddw_r2r (mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */ + movq_r2m (mm0, *(wsptr)); /* save tmp0 */ + paddw_r2r (mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */ /*Continue with z10 --- z13 */ - movq_m2r (*(wsptr + 2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */ - psubw_r2r (mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */ + movq_m2r (*(wsptr + 2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */ + psubw_r2r (mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */ - movq_m2r (*(wsptr + 3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */ + movq_m2r (*(wsptr + 3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */ movq_r2r (mm6, mm4); - movq_r2m (mm1, *(wsptr + 1)); /* save tmp1 */ - punpckldq_r2r (mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */ + movq_r2m (mm1, *(wsptr + 1)); /* save tmp1 */ + punpckldq_r2r (mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */ - punpckhdq_r2r (mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */ + punpckhdq_r2r (mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */ movq_r2r (mm6, mm1); /*Save tmp2 and tmp3 in wsptr */ - paddw_r2r (mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */ + paddw_r2r (mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */ movq_r2r (mm2, mm4); /*Continue with z10 --- z13 */ - movq_r2m (mm5, *(wsptr + 2)); /* save tmp2 */ - punpcklwd_r2r (mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */ + movq_r2m (mm5, *(wsptr + 2)); /* save tmp2 */ + punpcklwd_r2r (mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */ - psubw_r2r (mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */ - punpckhwd_r2r (mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */ + psubw_r2r (mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */ + punpckhwd_r2r (mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */ movq_r2r (mm3, mm0); - punpcklwd_r2r (mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */ + punpcklwd_r2r (mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */ - movq_r2m (mm7, *(wsptr + 3)); /* save tmp3 */ - punpckhwd_r2r (mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */ + movq_r2m (mm7, *(wsptr + 3)); /* save tmp3 */ + punpckhwd_r2r (mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */ - movq_m2r (*(wsptr + 4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */ - punpckhdq_r2r (mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */ + movq_m2r (*(wsptr + 4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */ + punpckhdq_r2r (mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */ - movq_m2r (*(wsptr + 5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */ - punpckhdq_r2r (mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */ + movq_m2r (*(wsptr + 5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */ + punpckhdq_r2r (mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */ - movq_m2r (*(wsptr + 6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */ + movq_m2r (*(wsptr + 6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */ movq_r2r (mm6, mm4); - punpckldq_r2r (mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */ + punpckldq_r2r (mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */ movq_r2r (mm1, mm5); - punpckhdq_r2r (mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */ + punpckhdq_r2r (mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */ movq_r2r (mm6, mm2); - movq_m2r (*(wsptr + 7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */ - paddw_r2r (mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */ + movq_m2r (*(wsptr + 7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */ + paddw_r2r (mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */ - psubw_r2r (mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */ - punpckldq_r2r (mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */ + psubw_r2r (mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */ + punpckldq_r2r (mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */ - punpckhdq_r2r (mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */ + punpckhdq_r2r (mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */ movq_r2r (mm1, mm7); - paddw_r2r (mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */ - psubw_r2r (mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */ + paddw_r2r (mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */ + psubw_r2r (mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */ movq_r2r (mm6, mm5); - punpcklwd_r2r (mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */ + punpcklwd_r2r (mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */ - punpckhwd_r2r (mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */ + punpckhwd_r2r (mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */ movq_r2r (mm2, mm4); - punpcklwd_r2r (mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */ + punpcklwd_r2r (mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */ - punpckhwd_r2r (mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */ + punpckhwd_r2r (mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */ - punpckhdq_r2r (mm6, mm4); /*/ wsptr[2,z10],[3,z10],[2,z11],[3,z11] */ + punpckhdq_r2r (mm6, mm4); /*/ wsptr[2,z10],[3,z10],[2,z11],[3,z11] */ - punpckhdq_r2r (mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */ + punpckhdq_r2r (mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */ movq_r2r (mm0, mm5); - punpckldq_r2r (mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */ + punpckldq_r2r (mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */ - punpckhdq_r2r (mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */ + punpckhdq_r2r (mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */ movq_r2r (mm3, mm4); - punpckhdq_r2r (mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */ + punpckhdq_r2r (mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */ movq_r2r (mm5, mm1); - punpckldq_r2r (mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */ + punpckldq_r2r (mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */ /* tmp7 = z11 + z13; : phase 5 */ /* tmp8 = z11 - z13; : phase 5 */ - psubw_r2r (mm4, mm1); /* tmp8 */ + psubw_r2r (mm4, mm1); /* tmp8 */ - paddw_r2r (mm4, mm5); /* tmp7 */ + paddw_r2r (mm4, mm5); /* tmp7 */ /* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); 2*c4 */ psllw_i2r (2, mm1); psllw_i2r (2, mm0); - pmulhw_m2r (fix_141, mm1); /* tmp21 */ + pmulhw_m2r (fix_141, mm1); /* tmp21 */ /* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) 2*(c2-c6) */ /* + MULTIPLY(z10, - FIX_1_847759065); : 2*c2 */ psllw_i2r (2, mm3); @@ -1737,36 +1737,36 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) pmulhw_m2r (fix_n184, mm7); movq_r2r (mm3, mm6); - movq_m2r (*(wsptr), mm2); /* tmp0,final1 */ + movq_m2r (*(wsptr), mm2); /* tmp0,final1 */ pmulhw_m2r (fix_108n184, mm6); /* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) : -2*(c2+c6) */ /* + MULTIPLY(z12, FIX_1_847759065); 2*c2 */ - movq_r2r (mm2, mm4); /* final1 */ + movq_r2r (mm2, mm4); /* final1 */ pmulhw_m2r (fix_184n261, mm0); - paddw_r2r (mm5, mm2); /* tmp0+tmp7,final1 */ + paddw_r2r (mm5, mm2); /* tmp0+tmp7,final1 */ pmulhw_m2r (fix_184, mm3); - psubw_r2r (mm5, mm4); /* tmp0-tmp7,final1 */ + psubw_r2r (mm5, mm4); /* tmp0-tmp7,final1 */ /* tmp6 = tmp22 - tmp7; phase 2 */ - psraw_i2r (3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */ + psraw_i2r (3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */ - paddw_r2r (mm6, mm7); /* tmp20 */ - psraw_i2r (3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */ + paddw_r2r (mm6, mm7); /* tmp20 */ + psraw_i2r (3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */ - paddw_r2r (mm0, mm3); /* tmp22 */ + paddw_r2r (mm0, mm3); /* tmp22 */ /* tmp5 = tmp21 - tmp6; */ - psubw_r2r (mm5, mm3); /* tmp6 */ + psubw_r2r (mm5, mm3); /* tmp6 */ /* tmp4 = tmp20 + tmp5; */ - movq_m2r (*(wsptr + 1), mm0); /* tmp1,final2 */ - psubw_r2r (mm3, mm1); /* tmp5 */ + movq_m2r (*(wsptr + 1), mm0); /* tmp1,final2 */ + psubw_r2r (mm3, mm1); /* tmp5 */ - movq_r2r (mm0, mm6); /* final2 */ - paddw_r2r (mm3, mm0); /* tmp1+tmp6,final2 */ + movq_r2r (mm0, mm6); /* final2 */ + paddw_r2r (mm3, mm0); /* tmp1+tmp6,final2 */ /* Final output stage: scale down by a factor of 8 and range-limit */ @@ -1781,30 +1781,30 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* & RANGE_MASK]; */ /* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */ /* & RANGE_MASK]; final2 */ - psubw_r2r (mm3, mm6); /* tmp1-tmp6,final2 */ - psraw_i2r (3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */ + psubw_r2r (mm3, mm6); /* tmp1-tmp6,final2 */ + psraw_i2r (3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */ - psraw_i2r (3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */ + psraw_i2r (3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */ - packuswb_r2r (mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */ + packuswb_r2r (mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */ - movq_m2r (*(wsptr + 2), mm5); /* tmp2,final3 */ - packuswb_r2r (mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */ + movq_m2r (*(wsptr + 2), mm5); /* tmp2,final3 */ + packuswb_r2r (mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */ /* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */ /* & RANGE_MASK]; */ /* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */ /* & RANGE_MASK]; final3 */ - paddw_r2r (mm1, mm7); /* tmp4 */ + paddw_r2r (mm1, mm7); /* tmp4 */ movq_r2r (mm5, mm3); - paddw_r2r (mm1, mm5); /* tmp2+tmp5 */ - psubw_r2r (mm1, mm3); /* tmp2-tmp5 */ + paddw_r2r (mm1, mm5); /* tmp2+tmp5 */ + psubw_r2r (mm1, mm3); /* tmp2-tmp5 */ - psraw_i2r (3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */ + psraw_i2r (3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */ - movq_m2r (*(wsptr + 3), mm4); /* tmp3,final4 */ - psraw_i2r (3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */ + movq_m2r (*(wsptr + 3), mm4); /* tmp3,final4 */ + psraw_i2r (3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */ @@ -1813,45 +1813,45 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */ /* & RANGE_MASK]; final4 */ movq_r2r (mm4, mm6); - paddw_r2r (mm7, mm4); /* tmp3+tmp4 */ + paddw_r2r (mm7, mm4); /* tmp3+tmp4 */ - psubw_r2r (mm7, mm6); /* tmp3-tmp4 */ - psraw_i2r (3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */ + psubw_r2r (mm7, mm6); /* tmp3-tmp4 */ + psraw_i2r (3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */ /* mov ecx, [dataptr] */ - psraw_i2r (3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */ + psraw_i2r (3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */ - packuswb_r2r (mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */ + packuswb_r2r (mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */ - packuswb_r2r (mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */ + packuswb_r2r (mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */ movq_r2r (mm2, mm4); movq_r2r (mm5, mm7); - punpcklbw_r2r (mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */ + punpcklbw_r2r (mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */ - punpckhbw_r2r (mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */ + punpckhbw_r2r (mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */ movq_r2r (mm2, mm1); - punpcklbw_r2r (mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */ + punpcklbw_r2r (mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */ /* add dataptr, 4 */ - punpckhbw_r2r (mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */ + punpckhbw_r2r (mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */ - punpcklwd_r2r (mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */ + punpcklwd_r2r (mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */ /* add ecx, output_col */ movq_r2r (mm7, mm6); - punpckhwd_r2r (mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */ + punpckhwd_r2r (mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */ movq_r2r (mm2, mm0); - punpcklwd_r2r (mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */ + punpcklwd_r2r (mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */ /* mov idata, [dataptr] */ - punpckldq_r2r (mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */ + punpckldq_r2r (mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */ /* add dataptr, 4 */ @@ -1859,17 +1859,17 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* add idata, output_col */ - punpckhwd_r2r (mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */ + punpckhwd_r2r (mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */ movq_r2m (mm2, *(dataptr)); - punpckhdq_r2r (mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */ + punpckhdq_r2r (mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */ dataptr += rskip; movq_r2m (mm0, *(dataptr)); - punpckldq_r2r (mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */ - punpckhdq_r2r (mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */ + punpckldq_r2r (mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */ + punpckhdq_r2r (mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */ dataptr += rskip; movq_r2m (mm1, *(dataptr)); @@ -1887,61 +1887,61 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */ /* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */ /* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */ - movq_m2r (*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */ + movq_m2r (*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */ - movq_m2r (*(wsptr + 1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */ + movq_m2r (*(wsptr + 1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */ movq_r2r (mm0, mm2); - movq_m2r (*(wsptr + 2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */ - paddw_r2r (mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */ + movq_m2r (*(wsptr + 2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */ + paddw_r2r (mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */ - movq_m2r (*(wsptr + 3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */ - psubw_r2r (mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */ + movq_m2r (*(wsptr + 3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */ + psubw_r2r (mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */ movq_r2r (mm0, mm6); movq_r2r (mm3, mm5); - paddw_r2r (mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */ + paddw_r2r (mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */ movq_r2r (mm2, mm1); - psubw_r2r (mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */ - punpcklwd_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */ + psubw_r2r (mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */ + punpcklwd_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */ - movq_m2r (*(wsptr + 7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */ - punpckhwd_r2r (mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */ + movq_m2r (*(wsptr + 7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */ + punpckhwd_r2r (mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */ - movq_m2r (*(wsptr + 4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */ - punpckldq_r2r (mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */ + movq_m2r (*(wsptr + 4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */ + punpckldq_r2r (mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */ - punpcklwd_r2r (mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */ + punpcklwd_r2r (mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */ movq_r2r (mm3, mm4); - movq_m2r (*(wsptr + 6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */ - punpckhwd_r2r (mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */ + movq_m2r (*(wsptr + 6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */ + punpckhwd_r2r (mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */ - movq_m2r (*(wsptr + 5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */ - punpckldq_r2r (mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */ + movq_m2r (*(wsptr + 5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */ + punpckldq_r2r (mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */ - paddw_r2r (mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */ + paddw_r2r (mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */ movq_r2r (mm6, mm2); - psubw_r2r (mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */ - paddw_r2r (mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */ + psubw_r2r (mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */ + paddw_r2r (mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */ movq_r2r (mm3, mm5); - punpcklwd_r2r (mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */ + punpcklwd_r2r (mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */ - psubw_r2r (mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */ - punpckhwd_r2r (mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */ + psubw_r2r (mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */ + punpckhwd_r2r (mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */ movq_r2r (mm4, mm7); - punpckldq_r2r (mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */ + punpckldq_r2r (mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */ - punpcklwd_r2r (mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */ + punpcklwd_r2r (mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */ - punpckhwd_r2r (mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */ + punpckhwd_r2r (mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */ - punpckldq_r2r (mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */ + punpckldq_r2r (mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */ movq_r2r (mm1, mm6); /*OK */ @@ -1950,24 +1950,24 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */ movq_r2r (mm0, mm2); - punpckhdq_r2r (mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */ + punpckhdq_r2r (mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */ - punpckldq_r2r (mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */ + punpckldq_r2r (mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */ psllw_i2r (2, mm6); pmulhw_m2r (fix_141, mm6); - punpckldq_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */ + punpckldq_r2r (mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */ - punpckhdq_r2r (mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */ + punpckhdq_r2r (mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */ movq_r2r (mm0, mm7); /* tmp0 = tmp10 + tmp13; */ /* tmp3 = tmp10 - tmp13; */ - paddw_r2r (mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */ - psubw_r2r (mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */ + paddw_r2r (mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */ + psubw_r2r (mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */ /* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */ - psubw_r2r (mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */ + psubw_r2r (mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */ /* tmp1 = tmp11 + tmp12; */ /* tmp2 = tmp11 - tmp12; */ movq_r2r (mm1, mm5); @@ -1981,115 +1981,115 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */ /* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */ /* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */ - movq_m2r (*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */ - paddw_r2r (mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */ + movq_m2r (*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */ + paddw_r2r (mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */ - movq_m2r (*(wsptr + 1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */ - psubw_r2r (mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */ + movq_m2r (*(wsptr + 1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */ + psubw_r2r (mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */ movq_r2r (mm3, mm6); - punpckldq_r2r (mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */ + punpckldq_r2r (mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */ - punpckhdq_r2r (mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */ + punpckhdq_r2r (mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */ movq_r2r (mm3, mm2); /*Save tmp0 and tmp1 in wsptr */ - movq_r2m (mm0, *(wsptr)); /* save tmp0 */ - paddw_r2r (mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */ + movq_r2m (mm0, *(wsptr)); /* save tmp0 */ + paddw_r2r (mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */ /*Continue with z10 --- z13 */ - movq_m2r (*(wsptr + 2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */ - psubw_r2r (mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */ + movq_m2r (*(wsptr + 2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */ + psubw_r2r (mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */ - movq_m2r (*(wsptr + 3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */ + movq_m2r (*(wsptr + 3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */ movq_r2r (mm6, mm4); - movq_r2m (mm1, *(wsptr + 1)); /* save tmp1 */ - punpckldq_r2r (mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */ + movq_r2m (mm1, *(wsptr + 1)); /* save tmp1 */ + punpckldq_r2r (mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */ - punpckhdq_r2r (mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */ + punpckhdq_r2r (mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */ movq_r2r (mm6, mm1); /*Save tmp2 and tmp3 in wsptr */ - paddw_r2r (mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */ + paddw_r2r (mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */ movq_r2r (mm2, mm4); /*Continue with z10 --- z13 */ - movq_r2m (mm5, *(wsptr + 2)); /* save tmp2 */ - punpcklwd_r2r (mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */ + movq_r2m (mm5, *(wsptr + 2)); /* save tmp2 */ + punpcklwd_r2r (mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */ - psubw_r2r (mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */ - punpckhwd_r2r (mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */ + psubw_r2r (mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */ + punpckhwd_r2r (mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */ movq_r2r (mm3, mm0); - punpcklwd_r2r (mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */ + punpcklwd_r2r (mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */ - movq_r2m (mm7, *(wsptr + 3)); /* save tmp3 */ - punpckhwd_r2r (mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */ + movq_r2m (mm7, *(wsptr + 3)); /* save tmp3 */ + punpckhwd_r2r (mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */ - movq_m2r (*(wsptr + 4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */ - punpckhdq_r2r (mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */ + movq_m2r (*(wsptr + 4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */ + punpckhdq_r2r (mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */ - movq_m2r (*(wsptr + 5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */ - punpckhdq_r2r (mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */ + movq_m2r (*(wsptr + 5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */ + punpckhdq_r2r (mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */ - movq_m2r (*(wsptr + 6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */ + movq_m2r (*(wsptr + 6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */ movq_r2r (mm6, mm4); - punpckldq_r2r (mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */ + punpckldq_r2r (mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */ movq_r2r (mm1, mm5); - punpckhdq_r2r (mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */ + punpckhdq_r2r (mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */ movq_r2r (mm6, mm2); - movq_m2r (*(wsptr + 7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */ - paddw_r2r (mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */ + movq_m2r (*(wsptr + 7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */ + paddw_r2r (mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */ - psubw_r2r (mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */ - punpckldq_r2r (mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */ + psubw_r2r (mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */ + punpckldq_r2r (mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */ - punpckhdq_r2r (mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */ + punpckhdq_r2r (mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */ movq_r2r (mm1, mm7); - paddw_r2r (mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */ - psubw_r2r (mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */ + paddw_r2r (mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */ + psubw_r2r (mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */ movq_r2r (mm6, mm5); - punpcklwd_r2r (mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */ + punpcklwd_r2r (mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */ - punpckhwd_r2r (mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */ + punpckhwd_r2r (mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */ movq_r2r (mm2, mm4); - punpcklwd_r2r (mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */ + punpcklwd_r2r (mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */ - punpckhwd_r2r (mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */ + punpckhwd_r2r (mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */ - punpckhdq_r2r (mm6, mm4); /* wsptr[2,z10],[3,z10],[2,z11],[3,z11] */ + punpckhdq_r2r (mm6, mm4); /* wsptr[2,z10],[3,z10],[2,z11],[3,z11] */ - punpckhdq_r2r (mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */ + punpckhdq_r2r (mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */ movq_r2r (mm0, mm5); - punpckldq_r2r (mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */ + punpckldq_r2r (mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */ - punpckhdq_r2r (mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */ + punpckhdq_r2r (mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */ movq_r2r (mm3, mm4); - punpckhdq_r2r (mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */ + punpckhdq_r2r (mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */ movq_r2r (mm5, mm1); - punpckldq_r2r (mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */ + punpckldq_r2r (mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */ /* tmp7 = z11 + z13; : phase 5 */ /* tmp8 = z11 - z13; : phase 5 */ - psubw_r2r (mm4, mm1); /* tmp8 */ + psubw_r2r (mm4, mm1); /* tmp8 */ - paddw_r2r (mm4, mm5); /* tmp7 */ + paddw_r2r (mm4, mm5); /* tmp7 */ /* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); 2*c4 */ psllw_i2r (2, mm1); psllw_i2r (2, mm0); - pmulhw_m2r (fix_141, mm1); /* tmp21 */ + pmulhw_m2r (fix_141, mm1); /* tmp21 */ /* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) : 2*(c2-c6) */ /* + MULTIPLY(z10, - FIX_1_847759065); : 2*c2 */ psllw_i2r (2, mm3); @@ -2098,36 +2098,36 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) pmulhw_m2r (fix_n184, mm7); movq_r2r (mm3, mm6); - movq_m2r (*(wsptr), mm2); /* tmp0,final1 */ + movq_m2r (*(wsptr), mm2); /* tmp0,final1 */ pmulhw_m2r (fix_108n184, mm6); /* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) : -2*(c2+c6) */ /* + MULTIPLY(z12, FIX_1_847759065); : 2*c2 */ - movq_r2r (mm2, mm4); /* final1 */ + movq_r2r (mm2, mm4); /* final1 */ pmulhw_m2r (fix_184n261, mm0); - paddw_r2r (mm5, mm2); /* tmp0+tmp7,final1 */ + paddw_r2r (mm5, mm2); /* tmp0+tmp7,final1 */ pmulhw_m2r (fix_184, mm3); - psubw_r2r (mm5, mm4); /* tmp0-tmp7,final1 */ + psubw_r2r (mm5, mm4); /* tmp0-tmp7,final1 */ /* tmp6 = tmp22 - tmp7; phase 2 */ - psraw_i2r (3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */ + psraw_i2r (3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */ - paddw_r2r (mm6, mm7); /* tmp20 */ - psraw_i2r (3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */ + paddw_r2r (mm6, mm7); /* tmp20 */ + psraw_i2r (3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */ - paddw_r2r (mm0, mm3); /* tmp22 */ + paddw_r2r (mm0, mm3); /* tmp22 */ /* tmp5 = tmp21 - tmp6; */ - psubw_r2r (mm5, mm3); /* tmp6 */ + psubw_r2r (mm5, mm3); /* tmp6 */ /* tmp4 = tmp20 + tmp5; */ - movq_m2r (*(wsptr + 1), mm0); /* tmp1,final2 */ - psubw_r2r (mm3, mm1); /* tmp5 */ + movq_m2r (*(wsptr + 1), mm0); /* tmp1,final2 */ + psubw_r2r (mm3, mm1); /* tmp5 */ - movq_r2r (mm0, mm6); /* final2 */ - paddw_r2r (mm3, mm0); /* tmp1+tmp6,final2 */ + movq_r2r (mm0, mm6); /* final2 */ + paddw_r2r (mm3, mm0); /* tmp1+tmp6,final2 */ /* Final output stage: scale down by a factor of 8 and range-limit */ @@ -2141,30 +2141,30 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* & RANGE_MASK]; */ /* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */ /* & RANGE_MASK]; final2 */ - psubw_r2r (mm3, mm6); /* tmp1-tmp6,final2 */ - psraw_i2r (3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */ + psubw_r2r (mm3, mm6); /* tmp1-tmp6,final2 */ + psraw_i2r (3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */ - psraw_i2r (3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */ + psraw_i2r (3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */ - packuswb_r2r (mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */ + packuswb_r2r (mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */ - movq_m2r (*(wsptr + 2), mm5); /* tmp2,final3 */ - packuswb_r2r (mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */ + movq_m2r (*(wsptr + 2), mm5); /* tmp2,final3 */ + packuswb_r2r (mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */ /* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */ /* & RANGE_MASK]; */ /* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */ /* & RANGE_MASK]; final3 */ - paddw_r2r (mm1, mm7); /* tmp4 */ + paddw_r2r (mm1, mm7); /* tmp4 */ movq_r2r (mm5, mm3); - paddw_r2r (mm1, mm5); /* tmp2+tmp5 */ - psubw_r2r (mm1, mm3); /* tmp2-tmp5 */ + paddw_r2r (mm1, mm5); /* tmp2+tmp5 */ + psubw_r2r (mm1, mm3); /* tmp2-tmp5 */ - psraw_i2r (3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */ + psraw_i2r (3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */ - movq_m2r (*(wsptr + 3), mm4); /* tmp3,final4 */ - psraw_i2r (3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */ + movq_m2r (*(wsptr + 3), mm4); /* tmp3,final4 */ + psraw_i2r (3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */ @@ -2173,12 +2173,12 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) /* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */ /* & RANGE_MASK]; final4 */ movq_r2r (mm4, mm6); - paddw_r2r (mm7, mm4); /* tmp3+tmp4 */ + paddw_r2r (mm7, mm4); /* tmp3+tmp4 */ - psubw_r2r (mm7, mm6); /* tmp3-tmp4 */ - psraw_i2r (3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */ + psubw_r2r (mm7, mm6); /* tmp3-tmp4 */ + psraw_i2r (3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */ - psraw_i2r (3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */ + psraw_i2r (3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */ /* movq_r2m(mm4, *dummy); @@ -2188,46 +2188,46 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) */ - packuswb_r2r (mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */ + packuswb_r2r (mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */ - packuswb_r2r (mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */ + packuswb_r2r (mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */ movq_r2r (mm2, mm4); movq_r2r (mm5, mm7); - punpcklbw_r2r (mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */ + punpcklbw_r2r (mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */ - punpckhbw_r2r (mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */ + punpckhbw_r2r (mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */ movq_r2r (mm2, mm1); - punpcklbw_r2r (mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */ + punpcklbw_r2r (mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */ - punpckhbw_r2r (mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */ + punpckhbw_r2r (mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */ - punpcklwd_r2r (mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */ + punpcklwd_r2r (mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */ movq_r2r (mm7, mm6); - punpckhwd_r2r (mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */ + punpckhwd_r2r (mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */ movq_r2r (mm2, mm0); - punpcklwd_r2r (mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */ + punpcklwd_r2r (mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */ - punpckldq_r2r (mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */ + punpckldq_r2r (mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */ movq_r2r (mm1, mm3); - punpckhwd_r2r (mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */ + punpckhwd_r2r (mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */ dataptr += rskip; movq_r2m (mm2, *(dataptr)); - punpckhdq_r2r (mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */ + punpckhdq_r2r (mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */ dataptr += rskip; movq_r2m (mm0, *(dataptr)); - punpckldq_r2r (mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */ + punpckldq_r2r (mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */ - punpckhdq_r2r (mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */ + punpckhdq_r2r (mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */ dataptr += rskip; movq_r2m (mm1, *(dataptr)); @@ -2251,7 +2251,7 @@ RTjpeg_idct (__u8 * odata, __s16 * data, int rskip) for (ctr = 8; ctr > 0; ctr--) { if ((inptr[8] | inptr[16] | inptr[24] | - inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) { + inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) { dcval = inptr[0]; wsptr[0] = dcval; wsptr[8] = dcval; @@ -2388,7 +2388,7 @@ RTjpeg_init_data (void) dptr = (unsigned long) &(RTjpeg_alldata[0]); dptr += 32; dptr = dptr >> 5; - dptr = dptr << 5; /* cache align data */ + dptr = dptr << 5; /* cache align data */ RTjpeg_block = (__s16 *) dptr; dptr += sizeof (__s16) * 64; @@ -2418,15 +2418,15 @@ RTjpeg_init_Q (__u8 Q) int i; __u64 qual; - qual = (__u64) Q << (32 - 7); /* 32 bit FP, 255=2, 0=0 */ + qual = (__u64) Q << (32 - 7); /* 32 bit FP, 255=2, 0=0 */ for (i = 0; i < 64; i++) { RTjpeg_lqt[i] = - (__s32) ((qual / ((__u64) RTjpeg_lum_quant_tbl[i] << 16)) >> 3); + (__s32) ((qual / ((__u64) RTjpeg_lum_quant_tbl[i] << 16)) >> 3); if (RTjpeg_lqt[i] == 0) RTjpeg_lqt[i] = 1; RTjpeg_cqt[i] = - (__s32) ((qual / ((__u64) RTjpeg_chrom_quant_tbl[i] << 16)) >> 3); + (__s32) ((qual / ((__u64) RTjpeg_chrom_quant_tbl[i] << 16)) >> 3); if (RTjpeg_cqt[i] == 0) RTjpeg_cqt[i] = 1; RTjpeg_liqt[i] = (1 << 16) / (RTjpeg_lqt[i] << 3); @@ -2476,15 +2476,15 @@ RTjpeg_init_compress (__u32 * buf, int width, int height, __u8 Q) RTjpeg_Cwidth = RTjpeg_width >> 4; RTjpeg_Csize = (width >> 1) * height; - qual = (__u64) Q << (32 - 7); /* 32 bit FP, 255=2, 0=0 */ + qual = (__u64) Q << (32 - 7); /* 32 bit FP, 255=2, 0=0 */ for (i = 0; i < 64; i++) { RTjpeg_lqt[i] = - (__s32) ((qual / ((__u64) RTjpeg_lum_quant_tbl[i] << 16)) >> 3); + (__s32) ((qual / ((__u64) RTjpeg_lum_quant_tbl[i] << 16)) >> 3); if (RTjpeg_lqt[i] == 0) RTjpeg_lqt[i] = 1; RTjpeg_cqt[i] = - (__s32) ((qual / ((__u64) RTjpeg_chrom_quant_tbl[i] << 16)) >> 3); + (__s32) ((qual / ((__u64) RTjpeg_chrom_quant_tbl[i] << 16)) >> 3); if (RTjpeg_cqt[i] == 0) RTjpeg_cqt[i] = 1; RTjpeg_liqt[i] = (1 << 16) / (RTjpeg_lqt[i] << 3); @@ -2678,28 +2678,28 @@ RTjpeg_decompressYUV422 (__s8 * sp, __u8 * bp) for (i = RTjpeg_height; i; i -= 8) { for (k = 0, j = 0; j < RTjpeg_width; j += 16, k += 8) { if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp + j + 8, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp + j + 8, RTjpeg_block, RTjpeg_width); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct (bp2 + k, RTjpeg_block, RTjpeg_width >> 1); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); + RTjpeg_idct (bp2 + k, RTjpeg_block, RTjpeg_width >> 1); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct (bp3 + k, RTjpeg_block, RTjpeg_width >> 1); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); + RTjpeg_idct (bp3 + k, RTjpeg_block, RTjpeg_width >> 1); } } bp += RTjpeg_width << 3; @@ -2727,40 +2727,40 @@ RTjpeg_decompressYUV420 (__s8 * sp, __u8 * bp) for (i = RTjpeg_height >> 1; i; i -= 8) { for (k = 0, j = 0; j < RTjpeg_width; j += 16, k += 8) { if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp + j + 8, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp + j + 8, RTjpeg_block, RTjpeg_width); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp1 + j, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp1 + j, RTjpeg_block, RTjpeg_width); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp1 + j + 8, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp1 + j + 8, RTjpeg_block, RTjpeg_width); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct (bp2 + k, RTjpeg_block, RTjpeg_width >> 1); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); + RTjpeg_idct (bp2 + k, RTjpeg_block, RTjpeg_width >> 1); } if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct (bp3 + k, RTjpeg_block, RTjpeg_width >> 1); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); + RTjpeg_idct (bp3 + k, RTjpeg_block, RTjpeg_width >> 1); } } bp += RTjpeg_width << 4; @@ -2786,10 +2786,10 @@ RTjpeg_decompress8 (__s8 * sp, __u8 * bp) for (i = 0; i < RTjpeg_height; i += 8) { for (j = 0; j < RTjpeg_width; j += 8) if (*sp == -1) - sp++; + sp++; else { - sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); - RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width); + sp += RTjpeg_s2b (RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt); + RTjpeg_idct (bp + j, RTjpeg_block, RTjpeg_width); } bp += RTjpeg_width << 3; } @@ -2861,7 +2861,7 @@ RTjpeg_bcomp (__s16 * old, mmx_t * mask) if (result.q) { if (!RTjpeg_mtest) for (i = 0; i < 16; i++) - ((__u64 *) old)[i] = ((__u64 *) RTjpeg_block)[i]; + ((__u64 *) old)[i] = ((__u64 *) RTjpeg_block)[i]; return 0; } /* printf("."); */ @@ -2877,8 +2877,8 @@ RTjpeg_bcomp (__s16 * old, __u16 * mask) for (i = 0; i < 64; i++) if (abs (old[i] - RTjpeg_block[i]) > *mask) { if (!RTjpeg_mtest) - for (i = 0; i < 16; i++) - ((__u64 *) old)[i] = ((__u64 *) RTjpeg_block)[i]; + for (i = 0; i < 16; i++) + ((__u64 *) old)[i] = ((__u64 *) RTjpeg_block)[i]; return 0; } return 1; @@ -2904,10 +2904,10 @@ RTjpeg_mcompress (__s8 * sp, unsigned char *bp, __u16 lmask, __u16 cmask) emms (); RTjpeg_lmask = (mmx_t) (((__u64) lmask << 48) | ((__u64) lmask << 32) | ((__u64) lmask << - 16) | lmask); + 16) | lmask); RTjpeg_cmask = (mmx_t) (((__u64) cmask << 48) | ((__u64) cmask << 32) | ((__u64) cmask << - 16) | cmask); + 16) | cmask); #else RTjpeg_lmask = lmask; RTjpeg_cmask = cmask; @@ -2925,33 +2925,33 @@ RTjpeg_mcompress (__s8 * sp, unsigned char *bp, __u16 lmask, __u16 cmask) RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_Ywidth); RTjpeg_quant (RTjpeg_block, RTjpeg_lqt); if (RTjpeg_bcomp (block, &RTjpeg_lmask)) { - *((__u8 *) sp++) = 255; + *((__u8 *) sp++) = 255; } else - sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8); + sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8); block += 64; RTjpeg_dctY (bp + j + 8, RTjpeg_block, RTjpeg_Ywidth); RTjpeg_quant (RTjpeg_block, RTjpeg_lqt); if (RTjpeg_bcomp (block, &RTjpeg_lmask)) { - *((__u8 *) sp++) = 255; + *((__u8 *) sp++) = 255; } else - sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8); + sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8); block += 64; RTjpeg_dctY (bp2 + k, RTjpeg_block, RTjpeg_Cwidth); RTjpeg_quant (RTjpeg_block, RTjpeg_cqt); if (RTjpeg_bcomp (block, &RTjpeg_cmask)) { - *((__u8 *) sp++) = 255; + *((__u8 *) sp++) = 255; } else - sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8); + sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8); block += 64; RTjpeg_dctY (bp3 + k, RTjpeg_block, RTjpeg_Cwidth); RTjpeg_quant (RTjpeg_block, RTjpeg_cqt); if (RTjpeg_bcomp (block, &RTjpeg_cmask)) { - *((__u8 *) sp++) = 255; + *((__u8 *) sp++) = 255; } else - sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8); + sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_cb8); block += 64; } @@ -2977,7 +2977,7 @@ RTjpeg_mcompress8 (__s8 * sp, unsigned char *bp, __u16 lmask) emms (); RTjpeg_lmask = (mmx_t) (((__u64) lmask << 48) | ((__u64) lmask << 32) | ((__u64) lmask << - 16) | lmask); + 16) | lmask); #else RTjpeg_lmask = lmask; #endif @@ -2991,10 +2991,10 @@ RTjpeg_mcompress8 (__s8 * sp, unsigned char *bp, __u16 lmask) RTjpeg_dctY (bp + j, RTjpeg_block, RTjpeg_width); RTjpeg_quant (RTjpeg_block, RTjpeg_lqt); if (RTjpeg_bcomp (block, &RTjpeg_lmask)) { - *((__u8 *) sp++) = 255; + *((__u8 *) sp++) = 255; /* printf("* %d ", sp[-1]); */ } else - sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8); + sp += RTjpeg_b2s (RTjpeg_block, sp, RTjpeg_lb8); block += 64; } bp += RTjpeg_width << 3; |