summaryrefslogtreecommitdiffstats
path: root/gst/rtjpeg/RTjpeg.c
diff options
context:
space:
mode:
Diffstat (limited to 'gst/rtjpeg/RTjpeg.c')
-rw-r--r--gst/rtjpeg/RTjpeg.c1600
1 files changed, 800 insertions, 800 deletions
diff --git a/gst/rtjpeg/RTjpeg.c b/gst/rtjpeg/RTjpeg.c
index 0f795e79..5ed91634 100644
--- a/gst/rtjpeg/RTjpeg.c
+++ b/gst/rtjpeg/RTjpeg.c
@@ -603,71 +603,71 @@ void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
movq_m2r(*(dataptr+4), mm7); /* m23:m22|m21:m20 - third line */
movq_r2r(mm0, mm2);
- punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines
+ punpcklwd_m2r(*(dataptr+2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
movq_r2r(mm7, mm4);
- punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines
+ punpcklwd_m2r(*(dataptr+6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */
movq_r2r(mm0, mm1);
- movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line
- punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1
+ movq_m2r(*(dataptr+2), mm6); /* m13:m12|m11:m10 - second line */
+ punpckldq_r2r(mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */
- movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line
- punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2
+ movq_m2r(*(dataptr+6), mm5); /* m33:m32|m31:m30 - fourth line */
+ punpckhdq_r2r(mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2r(mm0, mm7); // write result 1
- punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines
+ movq_r2r(mm0, mm7); /* write result 1 */
+ punpckhwd_r2r(mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */
- psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */
- movq_r2r(mm1, mm6); // write result 2
+ psubw_m2r(*(dataptr+14), mm7); /* tmp07=x0-x7: Stage 1 */
+ movq_r2r(mm1, mm6); /* write result 2 */
- paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */
- punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines
+ paddw_m2r(*(dataptr+14), mm0); /* tmp00=x0+x7: Stage 1 */
+ punpckhwd_r2r(mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */
- paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */
- movq_r2r(mm2, mm3); // copy first intermediate result
+ paddw_m2r(*(dataptr+12), mm1); /* tmp01=x1+x6: Stage 1 */
+ movq_r2r(mm2, mm3); /* copy first intermediate result */
- psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */
- punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3
+ psubw_m2r(*(dataptr+12), mm6); /* tmp06=x1-x6: Stage 1 */
+ punpckldq_r2r(mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */
movq_r2m(mm7, tmp7);
- movq_r2r(mm2, mm5); // write result 3
+ movq_r2r(mm2, mm5); /* write result 3 */
movq_r2m(mm6, tmp6);
- punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4
+ punpckhdq_r2r(mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */
- paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */
- movq_r2r(mm3, mm4); // write result 4
+ paddw_m2r(*(dataptr+10), mm2); /* tmp02=x2+5: Stage 1 */
+ movq_r2r(mm3, mm4); /* write result 4 */
/************************************************************************************************
End of Transpose
************************************************************************************************/
- paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/
+ paddw_m2r(*(dataptr+8), mm3); /* tmp03=x3+x4: stage 1 */
movq_r2r(mm0, mm7);
- psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/
+ psubw_m2r(*(dataptr+8), mm4); /* tmp04=x3-x4: stage 1 */
movq_r2r(mm1, mm6);
- paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */
- psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */
+ paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */
+ psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */
- psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */
- paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */
+ psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */
+ paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */
- psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/
- paddw_r2r(mm7, mm6); // tmp12 + tmp13
+ psubw_m2r(*(dataptr+10), mm5); /* tmp05=x2-x5: stage 1 */
+ paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
/* stage 3 */
movq_m2r(tmp6, mm2);
movq_r2r(mm0, mm3);
- psllw_i2r(2, mm6); // m8 * 2^2
+ psllw_i2r(2, mm6); /* m8 * 2^2 */
paddw_r2r(mm1, mm0);
- pmulhw_m2r(RTjpeg_C4, mm6); // z1
+ pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
psubw_r2r(mm1, mm3);
movq_r2m(mm0, *dataptr);
@@ -675,349 +675,349 @@ void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
/* Odd part */
movq_r2m(mm3, *(dataptr+8));
- paddw_r2r(mm5, mm4); // tmp10
+ paddw_r2r(mm5, mm4); /* tmp10 */
movq_m2r(tmp7, mm3);
- paddw_r2r(mm6, mm0); // tmp32
+ paddw_r2r(mm6, mm0); /* tmp32 */
- paddw_r2r(mm2, mm5); // tmp11
- psubw_r2r(mm6, mm7); // tmp33
+ paddw_r2r(mm2, mm5); /* tmp11 */
+ psubw_r2r(mm6, mm7); /* tmp33 */
movq_r2m(mm0, *(dataptr+4));
- paddw_r2r(mm3, mm2); // tmp12
+ paddw_r2r(mm3, mm2); /* tmp12 */
/* stage 4 */
movq_r2m(mm7, *(dataptr+12));
- movq_r2r(mm4, mm1); // copy of tmp10
+ movq_r2r(mm4, mm1); /* copy of tmp10 */
- psubw_r2r(mm2, mm1); // tmp10 - tmp12
- psllw_i2r(2, mm4); // m8 * 2^2
+ psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r(2, mm4); /* m8 * 2^2 */
movq_m2r(RTjpeg_C2mC6, mm0);
psllw_i2r(2, mm1);
- pmulhw_m2r(RTjpeg_C6, mm1); // z5
+ pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
psllw_i2r(2, mm2);
- pmulhw_r2r(mm0, mm4); // z5
+ pmulhw_r2r(mm0, mm4); /* z5 */
/* stage 5 */
pmulhw_m2r(RTjpeg_C2pC6, mm2);
psllw_i2r(2, mm5);
- pmulhw_m2r(RTjpeg_C4, mm5); // z3
- movq_r2r(mm3, mm0); // copy tmp7
+ pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
+ movq_r2r(mm3, mm0); /* copy tmp7 */
movq_m2r(*(dataptr+1), mm7);
- paddw_r2r(mm1, mm4); // z2
+ paddw_r2r(mm1, mm4); /* z2 */
- paddw_r2r(mm1, mm2); // z4
+ paddw_r2r(mm1, mm2); /* z4 */
- paddw_r2r(mm5, mm0); // z11
- psubw_r2r(mm5, mm3); // z13
+ paddw_r2r(mm5, mm0); /* z11 */
+ psubw_r2r(mm5, mm3); /* z13 */
/* stage 6 */
- movq_r2r(mm3, mm5); // copy z13
- psubw_r2r(mm4, mm3); // y3=z13 - z2
+ movq_r2r(mm3, mm5); /* copy z13 */
+ psubw_r2r(mm4, mm3); /* y3=z13 - z2 */
- paddw_r2r(mm4, mm5); // y5=z13 + z2
- movq_r2r(mm0, mm6); // copy z11
+ paddw_r2r(mm4, mm5); /* y5=z13 + z2 */
+ movq_r2r(mm0, mm6); /* copy z11 */
- movq_r2m(mm3, *(dataptr+6)); //save y3
- psubw_r2r(mm2, mm0); // y7=z11 - z4
+ movq_r2m(mm3, *(dataptr+6)); /*save y3 */
+ psubw_r2r(mm2, mm0); /* y7=z11 - z4 */
- movq_r2m(mm5, *(dataptr+10)); //save y5
- paddw_r2r(mm2, mm6); // y1=z11 + z4
+ movq_r2m(mm5, *(dataptr+10)); /*save y5 */
+ paddw_r2r(mm2, mm6); /* y1=z11 + z4 */
- movq_r2m(mm0, *(dataptr+14)); //save y7
+ movq_r2m(mm0, *(dataptr+14)); /*save y7 */
/************************************************
* End of 1st 4 rows
************************************************/
- movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */
- movq_r2r(mm7, mm0); // copy x0
+ movq_m2r(*(dataptr+3), mm1); /* load x1: stage 1 */
+ movq_r2r(mm7, mm0); /* copy x0 */
- movq_r2m(mm6, *(dataptr+2)); //save y1
+ movq_r2m(mm6, *(dataptr+2)); /*save y1 */
- movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */
- movq_r2r(mm1, mm6); // copy x1
+ movq_m2r(*(dataptr+5), mm2); /* load x2: stage 1 */
+ movq_r2r(mm1, mm6); /* copy x1 */
- paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7
+ paddw_m2r(*(dataptr+15), mm0); /* tmp00 = x0 + x7 */
- movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */
- movq_r2r(mm2, mm5); // copy x2
+ movq_m2r(*(dataptr+7), mm3); /* load x3 : stage 1 */
+ movq_r2r(mm2, mm5); /* copy x2 */
- psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7
- movq_r2r(mm3, mm4); // copy x3
+ psubw_m2r(*(dataptr+15), mm7); /* tmp07 = x0 - x7 */
+ movq_r2r(mm3, mm4); /* copy x3 */
- paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6
+ paddw_m2r(*(dataptr+13), mm1); /* tmp01 = x1 + x6 */
- movq_r2m(mm7, tmp7); // save tmp07
- movq_r2r(mm0, mm7); // copy tmp00
+ movq_r2m(mm7, tmp7); /* save tmp07 */
+ movq_r2r(mm0, mm7); /* copy tmp00 */
- psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6
+ psubw_m2r(*(dataptr+13), mm6); /* tmp06 = x1 - x6 */
/* stage 2, Even Part */
- paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4
+ paddw_m2r(*(dataptr+9), mm3); /* tmp03 = x3 + x4 */
- movq_r2m(mm6, tmp6); // save tmp07
- movq_r2r(mm1, mm6); // copy tmp01
+ movq_r2m(mm6, tmp6); /* save tmp07 */
+ movq_r2r(mm1, mm6); /* copy tmp01 */
- paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5
- paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03
+ paddw_m2r(*(dataptr+11), mm2); /* tmp02 = x2 + x5 */
+ paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03 */
- psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03
+ psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03 */
- psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4
- psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02
+ psubw_m2r(*(dataptr+9), mm4); /* tmp04 = x3 - x4 */
+ psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02 */
- paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02
+ paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02 */
- psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5
- paddw_r2r(mm7, mm6); // tmp12 + tmp13
+ psubw_m2r(*(dataptr+11), mm5); /* tmp05 = x2 - x5 */
+ paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
/* stage 3, Even and stage 4 & 5 even */
- movq_m2r(tmp6, mm2); // load tmp6
- movq_r2r(mm0, mm3); // copy tmp10
+ movq_m2r(tmp6, mm2); /* load tmp6 */
+ movq_r2r(mm0, mm3); /* copy tmp10 */
- psllw_i2r(2, mm6); // shift z1
- paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11
+ psllw_i2r(2, mm6); /* shift z1 */
+ paddw_r2r(mm1, mm0); /* y0=tmp10 + tmp11 */
- pmulhw_m2r(RTjpeg_C4, mm6); // z1
- psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11
+ pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
+ psubw_r2r(mm1, mm3); /* y4=tmp10 - tmp11 */
- movq_r2m(mm0, *(dataptr+1)); //save y0
- movq_r2r(mm7, mm0); // copy tmp13
+ movq_r2m(mm0, *(dataptr+1)); /*save y0 */
+ movq_r2r(mm7, mm0); /* copy tmp13 */
/* odd part */
- movq_r2m(mm3, *(dataptr+9)); //save y4
- paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5
+ movq_r2m(mm3, *(dataptr+9)); /*save y4 */
+ paddw_r2r(mm5, mm4); /* tmp10 = tmp4 + tmp5 */
- movq_m2r(tmp7, mm3); // load tmp7
- paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1
+ movq_m2r(tmp7, mm3); /* load tmp7 */
+ paddw_r2r(mm6, mm0); /* tmp32 = tmp13 + z1 */
- paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6
- psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1
+ paddw_r2r(mm2, mm5); /* tmp11 = tmp5 + tmp6 */
+ psubw_r2r(mm6, mm7); /* tmp33 = tmp13 - z1 */
- movq_r2m(mm0, *(dataptr+5)); //save y2
- paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7
+ movq_r2m(mm0, *(dataptr+5)); /*save y2 */
+ paddw_r2r(mm3, mm2); /* tmp12 = tmp6 + tmp7 */
/* stage 4 */
- movq_r2m(mm7, *(dataptr+13)); //save y6
- movq_r2r(mm4, mm1); // copy tmp10
+ movq_r2m(mm7, *(dataptr+13)); /*save y6 */
+ movq_r2r(mm4, mm1); /* copy tmp10 */
- psubw_r2r(mm2, mm1); // tmp10 - tmp12
- psllw_i2r(2, mm4); // shift tmp10
+ psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r(2, mm4); /* shift tmp10 */
- movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6
- psllw_i2r(2, mm1); // shift (tmp10-tmp12)
+ movq_m2r(RTjpeg_C2mC6, mm0); /* load C2mC6 */
+ psllw_i2r(2, mm1); /* shift (tmp10-tmp12) */
- pmulhw_m2r(RTjpeg_C6, mm1); // z5
- psllw_i2r(2, mm5); // prepare for multiply
+ pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
+ psllw_i2r(2, mm5); /* prepare for multiply */
- pmulhw_r2r(mm0, mm4); // multiply by converted real
+ pmulhw_r2r(mm0, mm4); /* multiply by converted real */
/* stage 5 */
- pmulhw_m2r(RTjpeg_C4, mm5); // z3
- psllw_i2r(2, mm2); // prepare for multiply
+ pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
+ psllw_i2r(2, mm2); /* prepare for multiply */
- pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply
- movq_r2r(mm3, mm0); // copy tmp7
+ pmulhw_m2r(RTjpeg_C2pC6, mm2); /* multiply */
+ movq_r2r(mm3, mm0); /* copy tmp7 */
- movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7
- paddw_r2r(mm1, mm4); // z2
+ movq_m2r(*(dataptr+9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
+ paddw_r2r(mm1, mm4); /* z2 */
- paddw_r2r(mm5, mm0); // z11
- psubw_r2r(mm5, mm3); // z13
+ paddw_r2r(mm5, mm0); /* z11 */
+ psubw_r2r(mm5, mm3); /* z13 */
/* stage 6 */
- movq_r2r(mm3, mm5); // copy z13
- paddw_r2r(mm1, mm2); // z4
+ movq_r2r(mm3, mm5); /* copy z13 */
+ paddw_r2r(mm1, mm2); /* z4 */
- movq_r2r(mm0, mm6); // copy z11
- psubw_r2r(mm4, mm5); // y3
+ movq_r2r(mm0, mm6); /* copy z11 */
+ psubw_r2r(mm4, mm5); /* y3 */
- paddw_r2r(mm2, mm6); // y1
- paddw_r2r(mm4, mm3); // y5
+ paddw_r2r(mm2, mm6); /* y1 */
+ paddw_r2r(mm4, mm3); /* y5 */
- movq_r2m(mm5, *(dataptr+7)); //save y3
+ movq_r2m(mm5, *(dataptr+7)); /*save y3 */
- movq_r2m(mm6, *(dataptr+3)); //save y1
- psubw_r2r(mm2, mm0); // y7
+ movq_r2m(mm6, *(dataptr+3)); /*save y1 */
+ psubw_r2r(mm2, mm0); /* y7 */
/************************************************************************************************
Start of Transpose
************************************************************************************************/
- movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2
- movq_r2r(mm7, mm5); // copy first line
+ movq_m2r(*(dataptr+13), mm6); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
+ movq_r2r(mm7, mm5); /* copy first line */
- punpcklwd_r2r(mm3, mm7); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm6, mm2); // copy third line
+ punpcklwd_r2r(mm3, mm7); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r(mm6, mm2); /* copy third line */
- punpcklwd_r2r(mm0, mm6); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm7, mm1); // copy first intermediate result
+ punpcklwd_r2r(mm0, mm6); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r(mm7, mm1); /* copy first intermediate result */
- punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1
+ punpckldq_r2r(mm6, mm7); /* m30:m20|m10:m00 - interleave to produce result 1 */
- punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2
+ punpckhdq_r2r(mm6, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2m(mm7, *(dataptr+9)); // write result 1
- punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines
+ movq_r2m(mm7, *(dataptr+9)); /* write result 1 */
+ punpckhwd_r2r(mm3, mm5); /* m13:m03|m12:m02 - interleave first and second lines */
- movq_r2m(mm1, *(dataptr+11)); // write result 2
- punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines
+ movq_r2m(mm1, *(dataptr+11)); /* write result 2 */
+ punpckhwd_r2r(mm0, mm2); /* m33:m23|m32:m22 - interleave third and fourth lines */
- movq_r2r(mm5, mm1); // copy first intermediate result
- punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3
+ movq_r2r(mm5, mm1); /* copy first intermediate result */
+ punpckldq_r2r(mm2, mm5); /* m32:m22|m12:m02 - interleave to produce result 3 */
- movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4
- punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4
+ movq_m2r(*(dataptr+1), mm0); /* m03:m02|m01:m00 - first line, 4x4 */
+ punpckhdq_r2r(mm2, mm1); /* m33:m23|m13:m03 - interleave to produce result 4 */
- movq_r2m(mm5, *(dataptr+13)); // write result 3
+ movq_r2m(mm5, *(dataptr+13)); /* write result 3 */
/****** last 4x4 done */
- movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4
+ movq_r2m(mm1, *(dataptr+15)); /* write result 4, last 4x4 */
- movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line
- movq_r2r(mm0, mm6); // copy first line
+ movq_m2r(*(dataptr+5), mm2); /* m23:m22|m21:m20 - third line */
+ movq_r2r(mm0, mm6); /* copy first line */
- punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm2, mm7); // copy third line
+ punpcklwd_m2r(*(dataptr+3), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r(mm2, mm7); /* copy third line */
- punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm0, mm4); // copy first intermediate result
+ punpcklwd_m2r(*(dataptr+7), mm2); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r(mm0, mm4); /* copy first intermediate result */
- movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line
- punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result
+ movq_m2r(*(dataptr+8), mm1); /* n03:n02|n01:n00 - first line */
+ punpckldq_r2r(mm2, mm0); /* m30:m20|m10:m00 - interleave to produce first result */
- movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line
- punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result
+ movq_m2r(*(dataptr+12), mm3); /* n23:n22|n21:n20 - third line */
+ punpckhdq_r2r(mm2, mm4); /* m31:m21|m11:m01 - interleave to produce second result */
- punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines
- movq_r2r(mm1, mm2); // copy first line
+ punpckhwd_m2r(*(dataptr+3), mm6); /* m13:m03|m12:m02 - interleave first and second lines */
+ movq_r2r(mm1, mm2); /* copy first line */
- punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines
- movq_r2r(mm6, mm5); // copy first intermediate result
+ punpckhwd_m2r(*(dataptr+7), mm7); /* m33:m23|m32:m22 - interleave third and fourth lines */
+ movq_r2r(mm6, mm5); /* copy first intermediate result */
- movq_r2m(mm0, *(dataptr+8)); // write result 1
- punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result
+ movq_r2m(mm0, *(dataptr+8)); /* write result 1 */
+ punpckhdq_r2r(mm7, mm5); /* m33:m23|m13:m03 - produce third result */
- punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines
- movq_r2r(mm3, mm0); // copy third line
+ punpcklwd_m2r(*(dataptr+10), mm1); /* n11:n01|n10:n00 - interleave first and second lines */
+ movq_r2r(mm3, mm0); /* copy third line */
- punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines
+ punpckhwd_m2r(*(dataptr+10), mm2); /* n13:n03|n12:n02 - interleave first and second lines */
- movq_r2m(mm4, *(dataptr+10)); // write result 2 out
- punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result
+ movq_r2m(mm4, *(dataptr+10)); /* write result 2 out */
+ punpckldq_r2r(mm7, mm6); /* m32:m22|m12:m02 - produce fourth result */
- punpcklwd_m2r(*(dataptr+14), mm3); // n33:n23|n32:n22 - interleave third and fourth lines
- movq_r2r(mm1, mm4); // copy second intermediate result
+ punpcklwd_m2r(*(dataptr+14), mm3); /* n33:n23|n32:n22 - interleave third and fourth lines */
+ movq_r2r(mm1, mm4); /* copy second intermediate result */
- movq_r2m(mm6, *(dataptr+12)); // write result 3 out
- punpckldq_r2r(mm3, mm1); //
+ movq_r2m(mm6, *(dataptr+12)); /* write result 3 out */
+ punpckldq_r2r(mm3, mm1); /* */
- punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines
- movq_r2r(mm2, mm6); // copy second intermediate result
+ punpckhwd_m2r(*(dataptr+14), mm0); /* n33:n23|n32:n22 - interleave third and fourth lines */
+ movq_r2r(mm2, mm6); /* copy second intermediate result */
- movq_r2m(mm5, *(dataptr+14)); // write result 4 out
- punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result
+ movq_r2m(mm5, *(dataptr+14)); /* write result 4 out */
+ punpckhdq_r2r(mm3, mm4); /* n31:n21|n11:n01- produce second result */
- movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block)
- punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result
+ movq_r2m(mm1, *(dataptr+1)); /* write result 5 out - (first result for other 4 x 4 block) */
+ punpckldq_r2r(mm0, mm2); /* n32:n22|n12:n02- produce third result */
- movq_r2m(mm4, *(dataptr+3)); // write result 6 out
- punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result
+ movq_r2m(mm4, *(dataptr+3)); /* write result 6 out */
+ punpckhdq_r2r(mm0, mm6); /* n33:n23|n13:n03 - produce fourth result */
- movq_r2m(mm2, *(dataptr+5)); // write result 7 out
+ movq_r2m(mm2, *(dataptr+5)); /* write result 7 out */
- movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4
+ movq_m2r(*dataptr, mm0); /* m03:m02|m01:m00 - first line, first 4x4 */
- movq_r2m(mm6, *(dataptr+7)); // write result 8 out
+ movq_r2m(mm6, *(dataptr+7)); /* write result 8 out */
-// Do first 4x4 quadrant, which is used in the beginning of the DCT:
+/* Do first 4x4 quadrant, which is used in the beginning of the DCT: */
- movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line
- movq_r2r(mm0, mm2); // copy first line
+ movq_m2r(*(dataptr+4), mm7); /* m23:m22|m21:m20 - third line */
+ movq_r2r(mm0, mm2); /* copy first line */
- punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm7, mm4); // copy third line
+ punpcklwd_m2r(*(dataptr+2), mm0); /* m11:m01|m10:m00 - interleave first and second lines */
+ movq_r2r(mm7, mm4); /* copy third line */
- punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm0, mm1); // copy first intermediate result
+ punpcklwd_m2r(*(dataptr+6), mm7); /* m31:m21|m30:m20 - interleave third and fourth lines */
+ movq_r2r(mm0, mm1); /* copy first intermediate result */
- movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line
- punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1
+ movq_m2r(*(dataptr+2), mm6); /* m13:m12|m11:m10 - second line */
+ punpckldq_r2r(mm7, mm0); /* m30:m20|m10:m00 - interleave to produce result 1 */
- movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line
- punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2
+ movq_m2r(*(dataptr+6), mm5); /* m33:m32|m31:m30 - fourth line */
+ punpckhdq_r2r(mm7, mm1); /* m31:m21|m11:m01 - interleave to produce result 2 */
- movq_r2r(mm0, mm7); // write result 1
- punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines
+ movq_r2r(mm0, mm7); /* write result 1 */
+ punpckhwd_r2r(mm6, mm2); /* m13:m03|m12:m02 - interleave first and second lines */
- psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */
- movq_r2r(mm1, mm6); // write result 2
+ psubw_m2r(*(dataptr+14), mm7); /* tmp07=x0-x7: Stage 1 */
+ movq_r2r(mm1, mm6); /* write result 2 */
- paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */
- punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines
+ paddw_m2r(*(dataptr+14), mm0); /* tmp00=x0+x7: Stage 1 */
+ punpckhwd_r2r(mm5, mm4); /* m33:m23|m32:m22 - interleave third and fourth lines */
- paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */
- movq_r2r(mm2, mm3); // copy first intermediate result
+ paddw_m2r(*(dataptr+12), mm1); /* tmp01=x1+x6: Stage 1 */
+ movq_r2r(mm2, mm3); /* copy first intermediate result */
- psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */
- punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3
+ psubw_m2r(*(dataptr+12), mm6); /* tmp06=x1-x6: Stage 1 */
+ punpckldq_r2r(mm4, mm2); /* m32:m22|m12:m02 - interleave to produce result 3 */
- movq_r2m(mm7, tmp7); // save tmp07
- movq_r2r(mm2, mm5); // write result 3
+ movq_r2m(mm7, tmp7); /* save tmp07 */
+ movq_r2r(mm2, mm5); /* write result 3 */
- movq_r2m(mm6, tmp6); // save tmp06
+ movq_r2m(mm6, tmp6); /* save tmp06 */
- punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4
+ punpckhdq_r2r(mm4, mm3); /* m33:m23|m13:m03 - interleave to produce result 4 */
- paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+x5 /* stage 1 */
- movq_r2r(mm3, mm4); // write result 4
+ paddw_m2r(*(dataptr+10), mm2); /* tmp02=x2+x5: stage 1 */
+ movq_r2r(mm3, mm4); /* write result 4 */
/************************************************************************************************
End of Transpose 2
************************************************************************************************/
- paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/
+ paddw_m2r(*(dataptr+8), mm3); /* tmp03=x3+x4: stage 1 */
movq_r2r(mm0, mm7);
- psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/
+ psubw_m2r(*(dataptr+8), mm4); /* tmp04=x3-x4: stage 1 */
movq_r2r(mm1, mm6);
- paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */
- psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */
+ paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03: even 2 */
+ psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03: even 2 */
- psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */
- paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */
+ psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02: even 2 */
+ paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02: even 2 */
- psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/
- paddw_r2r(mm7, mm6); // tmp12 + tmp13
+ psubw_m2r(*(dataptr+10), mm5); /* tmp05=x2-x5: stage 1 */
+ paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
/* stage 3 */
movq_m2r(tmp6, mm2);
movq_r2r(mm0, mm3);
- psllw_i2r(2, mm6); // m8 * 2^2
+ psllw_i2r(2, mm6); /* m8 * 2^2 */
paddw_r2r(mm1, mm0);
- pmulhw_m2r(RTjpeg_C4, mm6); // z1
+ pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
psubw_r2r(mm1, mm3);
movq_r2m(mm0, *dataptr);
@@ -1025,188 +1025,188 @@ void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
/* Odd part */
movq_r2m(mm3, *(dataptr+8));
- paddw_r2r(mm5, mm4); // tmp10
+ paddw_r2r(mm5, mm4); /* tmp10 */
movq_m2r(tmp7, mm3);
- paddw_r2r(mm6, mm0); // tmp32
+ paddw_r2r(mm6, mm0); /* tmp32 */
- paddw_r2r(mm2, mm5); // tmp11
- psubw_r2r(mm6, mm7); // tmp33
+ paddw_r2r(mm2, mm5); /* tmp11 */
+ psubw_r2r(mm6, mm7); /* tmp33 */
movq_r2m(mm0, *(dataptr+4));
- paddw_r2r(mm3, mm2); // tmp12
+ paddw_r2r(mm3, mm2); /* tmp12 */
/* stage 4 */
movq_r2m(mm7, *(dataptr+12));
- movq_r2r(mm4, mm1); // copy of tmp10
+ movq_r2r(mm4, mm1); /* copy of tmp10 */
- psubw_r2r(mm2, mm1); // tmp10 - tmp12
- psllw_i2r(2, mm4); // m8 * 2^2
+ psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r(2, mm4); /* m8 * 2^2 */
movq_m2r(RTjpeg_C2mC6, mm0);
psllw_i2r(2, mm1);
- pmulhw_m2r(RTjpeg_C6, mm1); // z5
+ pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
psllw_i2r(2, mm2);
- pmulhw_r2r(mm0, mm4); // z5
+ pmulhw_r2r(mm0, mm4); /* z5 */
/* stage 5 */
pmulhw_m2r(RTjpeg_C2pC6, mm2);
psllw_i2r(2, mm5);
- pmulhw_m2r(RTjpeg_C4, mm5); // z3
- movq_r2r(mm3, mm0); // copy tmp7
+ pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
+ movq_r2r(mm3, mm0); /* copy tmp7 */
movq_m2r(*(dataptr+1), mm7);
- paddw_r2r(mm1, mm4); // z2
+ paddw_r2r(mm1, mm4); /* z2 */
- paddw_r2r(mm1, mm2); // z4
+ paddw_r2r(mm1, mm2); /* z4 */
- paddw_r2r(mm5, mm0); // z11
- psubw_r2r(mm5, mm3); // z13
+ paddw_r2r(mm5, mm0); /* z11 */
+ psubw_r2r(mm5, mm3); /* z13 */
/* stage 6 */
- movq_r2r(mm3, mm5); // copy z13
- psubw_r2r(mm4, mm3); // y3=z13 - z2
+ movq_r2r(mm3, mm5); /* copy z13 */
+ psubw_r2r(mm4, mm3); /* y3=z13 - z2 */
- paddw_r2r(mm4, mm5); // y5=z13 + z2
- movq_r2r(mm0, mm6); // copy z11
+ paddw_r2r(mm4, mm5); /* y5=z13 + z2 */
+ movq_r2r(mm0, mm6); /* copy z11 */
- movq_r2m(mm3, *(dataptr+6)); //save y3
- psubw_r2r(mm2, mm0); // y7=z11 - z4
+ movq_r2m(mm3, *(dataptr+6)); /*save y3 */
+ psubw_r2r(mm2, mm0); /* y7=z11 - z4 */
- movq_r2m(mm5, *(dataptr+10)); //save y5
- paddw_r2r(mm2, mm6); // y1=z11 + z4
+ movq_r2m(mm5, *(dataptr+10)); /*save y5 */
+ paddw_r2r(mm2, mm6); /* y1=z11 + z4 */
- movq_r2m(mm0, *(dataptr+14)); //save y7
+ movq_r2m(mm0, *(dataptr+14)); /*save y7 */
/************************************************
* End of 1st 4 rows
************************************************/
- movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */
- movq_r2r(mm7, mm0); // copy x0
+ movq_m2r(*(dataptr+3), mm1); /* load x1 : stage 1 */
+ movq_r2r(mm7, mm0); /* copy x0 */
- movq_r2m(mm6, *(dataptr+2)); //save y1
+ movq_r2m(mm6, *(dataptr+2)); /*save y1 */
- movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */
- movq_r2r(mm1, mm6); // copy x1
+ movq_m2r(*(dataptr+5), mm2); /* load x2 : stage 1 */
+ movq_r2r(mm1, mm6); /* copy x1 */
- paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7
+ paddw_m2r(*(dataptr+15), mm0); /* tmp00 = x0 + x7 */
- movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */
- movq_r2r(mm2, mm5); // copy x2
+ movq_m2r(*(dataptr+7), mm3); /* load x3 : stage 1 */
+ movq_r2r(mm2, mm5); /* copy x2 */
- psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7
- movq_r2r(mm3, mm4); // copy x3
+ psubw_m2r(*(dataptr+15), mm7); /* tmp07 = x0 - x7 */
+ movq_r2r(mm3, mm4); /* copy x3 */
- paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6
+ paddw_m2r(*(dataptr+13), mm1); /* tmp01 = x1 + x6 */
- movq_r2m(mm7, tmp7); // save tmp07
- movq_r2r(mm0, mm7); // copy tmp00
+ movq_r2m(mm7, tmp7); /* save tmp07 */
+ movq_r2r(mm0, mm7); /* copy tmp00 */
- psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6
+ psubw_m2r(*(dataptr+13), mm6); /* tmp06 = x1 - x6 */
/* stage 2, Even Part */
- paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4
+ paddw_m2r(*(dataptr+9), mm3); /* tmp03 = x3 + x4 */
- movq_r2m(mm6, tmp6); // save tmp07
- movq_r2r(mm1, mm6); // copy tmp01
+ movq_r2m(mm6, tmp6); /* save tmp07 */
+ movq_r2r(mm1, mm6); /* copy tmp01 */
- paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5
- paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03
+ paddw_m2r(*(dataptr+11), mm2); /* tmp02 = x2 + x5 */
+ paddw_r2r(mm3, mm0); /* tmp10 = tmp00 + tmp03 */
- psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03
+ psubw_r2r(mm3, mm7); /* tmp13 = tmp00 - tmp03 */
- psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4
- psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02
+ psubw_m2r(*(dataptr+9), mm4); /* tmp04 = x3 - x4 */
+ psubw_r2r(mm2, mm6); /* tmp12 = tmp01 - tmp02 */
- paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02
+ paddw_r2r(mm2, mm1); /* tmp11 = tmp01 + tmp02 */
- psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5
- paddw_r2r(mm7, mm6); // tmp12 + tmp13
+ psubw_m2r(*(dataptr+11), mm5); /* tmp05 = x2 - x5 */
+ paddw_r2r(mm7, mm6); /* tmp12 + tmp13 */
/* stage 3, Even and stage 4 & 5 even */
- movq_m2r(tmp6, mm2); // load tmp6
- movq_r2r(mm0, mm3); // copy tmp10
+ movq_m2r(tmp6, mm2); /* load tmp6 */
+ movq_r2r(mm0, mm3); /* copy tmp10 */
- psllw_i2r(2, mm6); // shift z1
- paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11
+ psllw_i2r(2, mm6); /* shift z1 */
+ paddw_r2r(mm1, mm0); /* y0=tmp10 + tmp11 */
- pmulhw_m2r(RTjpeg_C4, mm6); // z1
- psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11
+ pmulhw_m2r(RTjpeg_C4, mm6); /* z1 */
+ psubw_r2r(mm1, mm3); /* y4=tmp10 - tmp11 */
- movq_r2m(mm0, *(dataptr+1)); //save y0
- movq_r2r(mm7, mm0); // copy tmp13
+ movq_r2m(mm0, *(dataptr+1)); /*save y0 */
+ movq_r2r(mm7, mm0); /* copy tmp13 */
/* odd part */
- movq_r2m(mm3, *(dataptr+9)); //save y4
- paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5
+ movq_r2m(mm3, *(dataptr+9)); /*save y4 */
+ paddw_r2r(mm5, mm4); /* tmp10 = tmp4 + tmp5 */
- movq_m2r(tmp7, mm3); // load tmp7
- paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1
+ movq_m2r(tmp7, mm3); /* load tmp7 */
+ paddw_r2r(mm6, mm0); /* tmp32 = tmp13 + z1 */
- paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6
- psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1
+ paddw_r2r(mm2, mm5); /* tmp11 = tmp5 + tmp6 */
+ psubw_r2r(mm6, mm7); /* tmp33 = tmp13 - z1 */
- movq_r2m(mm0, *(dataptr+5)); //save y2
- paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7
+ movq_r2m(mm0, *(dataptr+5)); /*save y2 */
+ paddw_r2r(mm3, mm2); /* tmp12 = tmp6 + tmp7 */
/* stage 4 */
- movq_r2m(mm7, *(dataptr+13)); //save y6
- movq_r2r(mm4, mm1); // copy tmp10
+ movq_r2m(mm7, *(dataptr+13)); /*save y6 */
+ movq_r2r(mm4, mm1); /* copy tmp10 */
- psubw_r2r(mm2, mm1); // tmp10 - tmp12
- psllw_i2r(2, mm4); // shift tmp10
+ psubw_r2r(mm2, mm1); /* tmp10 - tmp12 */
+ psllw_i2r(2, mm4); /* shift tmp10 */
- movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6
- psllw_i2r(2, mm1); // shift (tmp10-tmp12)
+ movq_m2r(RTjpeg_C2mC6, mm0); /* load C2mC6 */
+ psllw_i2r(2, mm1); /* shift (tmp10-tmp12) */
- pmulhw_m2r(RTjpeg_C6, mm1); // z5
- psllw_i2r(2, mm5); // prepare for multiply
+ pmulhw_m2r(RTjpeg_C6, mm1); /* z5 */
+ psllw_i2r(2, mm5); /* prepare for multiply */
- pmulhw_r2r(mm0, mm4); // multiply by converted real
+ pmulhw_r2r(mm0, mm4); /* multiply by converted real */
/* stage 5 */
- pmulhw_m2r(RTjpeg_C4, mm5); // z3
- psllw_i2r(2, mm2); // prepare for multiply
+ pmulhw_m2r(RTjpeg_C4, mm5); /* z3 */
+ psllw_i2r(2, mm2); /* prepare for multiply */
- pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply
- movq_r2r(mm3, mm0); // copy tmp7
+ pmulhw_m2r(RTjpeg_C2pC6, mm2); /* multiply */
+ movq_r2r(mm3, mm0); /* copy tmp7 */
- movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7
- paddw_r2r(mm1, mm4); // z2
+ movq_m2r(*(dataptr+9), mm7); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
+ paddw_r2r(mm1, mm4); /* z2 */
- paddw_r2r(mm5, mm0); // z11
- psubw_r2r(mm5, mm3); // z13
+ paddw_r2r(mm5, mm0); /* z11 */
+ psubw_r2r(mm5, mm3); /* z13 */
/* stage 6 */
- movq_r2r(mm3, mm5); // copy z13
- paddw_r2r(mm1, mm2); // z4
+ movq_r2r(mm3, mm5); /* copy z13 */
+ paddw_r2r(mm1, mm2); /* z4 */
- movq_r2r(mm0, mm6); // copy z11
- psubw_r2r(mm4, mm5); // y3
+ movq_r2r(mm0, mm6); /* copy z11 */
+ psubw_r2r(mm4, mm5); /* y3 */
- paddw_r2r(mm2, mm6); // y1
- paddw_r2r(mm4, mm3); // y5
+ paddw_r2r(mm2, mm6); /* y1 */
+ paddw_r2r(mm4, mm3); /* y5 */
- movq_r2m(mm5, *(dataptr+7)); //save y3
- psubw_r2r(mm2, mm0); // yè=z11 - z4
+ movq_r2m(mm5, *(dataptr+7)); /*save y3 */
+ psubw_r2r(mm2, mm0); /* yè=z11 - z4 */
- movq_r2m(mm3, *(dataptr+11)); //save y5
+ movq_r2m(mm3, *(dataptr+11)); /*save y5 */
- movq_r2m(mm6, *(dataptr+3)); //save y1
+ movq_r2m(mm6, *(dataptr+3)); /*save y1 */
- movq_r2m(mm0, *(dataptr+15)); //save y7
+ movq_r2m(mm0, *(dataptr+15)); /*save y7 */
#endif
@@ -1257,126 +1257,126 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/* Odd part */
- movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5]
+ movq_m2r(*(idata+10), mm1); /* load idata[DCTSIZE*5] */
- movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3]
+ movq_m2r(*(idata+6), mm0); /* load idata[DCTSIZE*3] */
- movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1]
+ movq_m2r(*(idata+2), mm3); /* load idata[DCTSIZE*1] */
- movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */
+ movq_r2r(mm1, mm2); /* copy tmp6 : phase 6 */ */
- movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7]
+ movq_m2r(*(idata+14), mm4); /* load idata[DCTSIZE*7] */
- paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5;
+ paddw_r2r(mm0, mm1); /* z13 = tmp6 + tmp5; */
- psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5
+ psubw_r2r(mm0, mm2); /* z10 = tmp6 - tmp5 */
- psllw_i2r(2, mm2); // shift z10
- movq_r2r(mm2, mm0); // copy z10
+ psllw_i2r(2, mm2); /* shift z10 */
+ movq_r2r(mm2, mm0); /* copy z10 */
- pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm3, mm5); // copy tmp4
+ pmulhw_m2r(fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */
+ movq_r2r(mm3, mm5); /* copy tmp4 */
- pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
- paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7;
+ pmulhw_m2r(fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */
+ paddw_r2r(mm4, mm3); /* z11 = tmp4 + tmp7; */
- movq_r2r(mm3, mm6); // copy z11 /* phase 5 */
- psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7;
+ movq_r2r(mm3, mm6); /* copy z11 : phase 5 */
+ psubw_r2r(mm4, mm5); /* z12 = tmp4 - tmp7; */
- psubw_r2r(mm1, mm6); // z11-z13
- psllw_i2r(2, mm5); // shift z12
+ psubw_r2r(mm1, mm6); /* z11-z13 */
+ psllw_i2r(2, mm5); /* shift z12 */
- movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part
- movq_r2r(mm5, mm7); // copy z12
+ movq_m2r(*(idata+12), mm4); /* load idata[DCTSIZE*6], even part */
+ movq_r2r(mm5, mm7); /* copy z12 */
- pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
- paddw_r2r(mm1, mm3); // tmp7 = z11 + z13;
+ pmulhw_m2r(fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6): even part */
+ paddw_r2r(mm1, mm3); /* tmp7 = z11 + z13; */
- //ok
+ /*ok */
/* Even part */
- pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
+ pmulhw_m2r(fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */
psllw_i2r(2, mm6);
- movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2]
+ movq_m2r(*(idata+4), mm1); /* load idata[DCTSIZE*2] */
- paddw_r2r(mm5, mm0); // tmp10
+ paddw_r2r(mm5, mm0); /* tmp10 */
- paddw_r2r(mm7, mm2); // tmp12
+ paddw_r2r(mm7, mm2); /* tmp12 */
- pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
- psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7
+ pmulhw_m2r(fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */
+ psubw_r2r(mm3, mm2); /* tmp6 = tmp12 - tmp7 */
- movq_r2r(mm1, mm5); // copy tmp1
- paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */
+ movq_r2r(mm1, mm5); /* copy tmp1 */
+ paddw_r2r(mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */
- psubw_r2r(mm4, mm5); // tmp1-tmp3
- psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6;
+ psubw_r2r(mm4, mm5); /* tmp1-tmp3 */
+ psubw_r2r(mm2, mm6); /* tmp5 = tmp11 - tmp6; */
- movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace
- psllw_i2r(2, mm5); // shift tmp1-tmp3
+ movq_r2m(mm1, *(wsptr)); /* save tmp13 in workspace */
+ psllw_i2r(2, mm5); /* shift tmp1-tmp3 */
- movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0]
+ movq_m2r(*(idata), mm7); /* load idata[DCTSIZE*0] */
- pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
- paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5;
+ pmulhw_m2r(fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
+ paddw_r2r(mm6, mm0); /* tmp4 = tmp10 + tmp5; */
- movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4]
+ movq_m2r(*(idata+8), mm4); /* load idata[DCTSIZE*4] */
- psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+ psubw_r2r(mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */
- movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace
- movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */
+ movq_r2m(mm0, *(wsptr+4)); /* save tmp4 in workspace */
+ movq_r2r(mm7, mm1); /* copy tmp0 : phase 3 */
- movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace
- psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2;
+ movq_r2m(mm5, *(wsptr+2)); /* save tmp12 in workspace */
+ psubw_r2r(mm4, mm1); /* tmp11 = tmp0 - tmp2; */
- paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2;
- movq_r2r(mm1, mm5); // copy tmp11
+ paddw_r2r(mm4, mm7); /* tmp10 = tmp0 + tmp2; */
+ movq_r2r(mm1, mm5); /* copy tmp11 */
- paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12;
- movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */
+ paddw_m2r(*(wsptr+2), mm1); /* tmp1 = tmp11 + tmp12; */
+ movq_r2r(mm7, mm4); /* copy tmp10 : phase 2 */
- paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13;
+ paddw_m2r(*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */
- psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13;
- movq_r2r(mm7, mm0); // copy tmp0
+ psubw_m2r(*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */
+ movq_r2r(mm7, mm0); /* copy tmp0 */
- psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12;
- paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+ psubw_m2r(*(wsptr+2), mm5); /* tmp2 = tmp11 - tmp12; */
+ paddw_r2r(mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
- psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+ psubw_r2r(mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
- movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0]
- movq_r2r(mm1, mm3); // copy tmp1
+ movq_r2m(mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */
+ movq_r2r(mm1, mm3); /* copy tmp1 */
- movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7]
- paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+ movq_r2m(mm0, *(wsptr+14)); /* wsptr[DCTSIZE*7] */
+ paddw_r2r(mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
- psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+ psubw_r2r(mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
- movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1]
- movq_r2r(mm4, mm1); // copy tmp3
+ movq_r2m(mm1, *(wsptr+2)); /* wsptr[DCTSIZE*1] */
+ movq_r2r(mm4, mm1); /* copy tmp3 */
- movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6]
+ movq_r2m(mm3, *(wsptr+12)); /* wsptr[DCTSIZE*6] */
- paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+ paddw_m2r(*(wsptr+4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
- psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+ psubw_m2r(*(wsptr+4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
movq_r2m(mm4, *(wsptr+8));
- movq_r2r(mm5, mm7); // copy tmp2
+ movq_r2r(mm5, mm7); /* copy tmp2 */
- paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
+ paddw_r2r(mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
movq_r2m(mm1, *(wsptr+6));
- psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+ psubw_r2r(mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
movq_r2m(mm5, *(wsptr+4));
movq_r2m(mm7, *(wsptr+10));
- //ok
+ /*ok */
/*****************************************************************/
@@ -1386,118 +1386,118 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/*****************************************************************/
- movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5]
+ movq_m2r(*(idata+10), mm1); /* load idata[DCTSIZE*5] */
- movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3]
+ movq_m2r(*(idata+6), mm0); /* load idata[DCTSIZE*3] */
- movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1]
- movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */
+ movq_m2r(*(idata+2), mm3); /* load idata[DCTSIZE*1] */
+ movq_r2r(mm1, mm2); /* copy tmp6 : phase 6 */ */
- movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7]
- paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5;
+ movq_m2r(*(idata+14), mm4); /* load idata[DCTSIZE*7] */
+ paddw_r2r(mm0, mm1); /* z13 = tmp6 + tmp5; */
- psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5
+ psubw_r2r(mm0, mm2); /* z10 = tmp6 - tmp5 */
- psllw_i2r(2, mm2); // shift z10
- movq_r2r(mm2, mm0); // copy z10
+ psllw_i2r(2, mm2); /* shift z10 */
+ movq_r2r(mm2, mm0); /* copy z10 */
- pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm3, mm5); // copy tmp4
+ pmulhw_m2r(fix_184n261, mm2); /* MULTIPLY( z12, FIX_1_847759065); : 2*c2 */
+ movq_r2r(mm3, mm5); /* copy tmp4 */
- pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
- paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7;
+ pmulhw_m2r(fix_n184, mm0); /* MULTIPLY(z10, -FIX_1_847759065); : 2*c2 */
+ paddw_r2r(mm4, mm3); /* z11 = tmp4 + tmp7; */
- movq_r2r(mm3, mm6); // copy z11 /* phase 5 */
- psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7;
+ movq_r2r(mm3, mm6); /* copy z11 : phase 5 */
+ psubw_r2r(mm4, mm5); /* z12 = tmp4 - tmp7; */
- psubw_r2r(mm1, mm6); // z11-z13
- psllw_i2r(2, mm5); // shift z12
+ psubw_r2r(mm1, mm6); /* z11-z13 */
+ psllw_i2r(2, mm5); /* shift z12 */
- movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part
- movq_r2r(mm5, mm7); // copy z12
+ movq_m2r(*(idata+12), mm4); /* load idata[DCTSIZE*6], even part */
+ movq_r2r(mm5, mm7); /* copy z12 */
- pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
- paddw_r2r(mm1, mm3); // tmp7 = z11 + z13;
+ pmulhw_m2r(fix_108n184, mm5); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; 2*(c2-c6) even part */
+ paddw_r2r(mm1, mm3); /* tmp7 = z11 + z13; */
- //ok
+ /*ok */
/* Even part */
- pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
+ pmulhw_m2r(fix_184, mm7); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; -2*(c2+c6) */
psllw_i2r(2, mm6);
- movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2]
+ movq_m2r(*(idata+4), mm1); /* load idata[DCTSIZE*2] */
- paddw_r2r(mm5, mm0); // tmp10
+ paddw_r2r(mm5, mm0); /* tmp10 */
- paddw_r2r(mm7, mm2); // tmp12
+ paddw_r2r(mm7, mm2); /* tmp12 */
- pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
- psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7
+ pmulhw_m2r(fix_141, mm6); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); 2*c4 */
+ psubw_r2r(mm3, mm2); /* tmp6 = tmp12 - tmp7 */
- movq_r2r(mm1, mm5); // copy tmp1
- paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */
+ movq_r2r(mm1, mm5); /* copy tmp1 */
+ paddw_r2r(mm4, mm1); /* tmp13= tmp1 + tmp3; phases 5-3 */
- psubw_r2r(mm4, mm5); // tmp1-tmp3
- psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6;
+ psubw_r2r(mm4, mm5); /* tmp1-tmp3 */
+ psubw_r2r(mm2, mm6); /* tmp5 = tmp11 - tmp6; */
- movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace
- psllw_i2r(2, mm5); // shift tmp1-tmp3
+ movq_r2m(mm1, *(wsptr)); /* save tmp13 in workspace */
+ psllw_i2r(2, mm5); /* shift tmp1-tmp3 */
- movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0]
- paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5;
+ movq_m2r(*(idata), mm7); /* load idata[DCTSIZE*0] */
+ paddw_r2r(mm6, mm0); /* tmp4 = tmp10 + tmp5; */
- pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
+ pmulhw_m2r(fix_141, mm5); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
- movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4]
+ movq_m2r(*(idata+8), mm4); /* load idata[DCTSIZE*4] */
- psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+ psubw_r2r(mm1, mm5); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; 2*c4 */
- movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace
- movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */
+ movq_r2m(mm0, *(wsptr+4)); /* save tmp4 in workspace */
+ movq_r2r(mm7, mm1); /* copy tmp0: phase 3 */
- movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace
- psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2;
+ movq_r2m(mm5, *(wsptr+2)); /* save tmp12 in workspace */
+ psubw_r2r(mm4, mm1); /* tmp11 = tmp0 - tmp2; */
- paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2;
- movq_r2r(mm1, mm5); // copy tmp11
+ paddw_r2r(mm4, mm7); /* tmp10 = tmp0 + tmp2; */
+ movq_r2r(mm1, mm5); /* copy tmp11 */
- paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12;
- movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */
+ paddw_m2r(*(wsptr+2), mm1); /* tmp1 = tmp11 + tmp12; */
+ movq_r2r(mm7, mm4); /* copy tmp10: phase 2 */
- paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13;
+ paddw_m2r(*(wsptr), mm7); /* tmp0 = tmp10 + tmp13; */
- psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13;
- movq_r2r(mm7, mm0); // copy tmp0
+ psubw_m2r(*(wsptr), mm4); /* tmp3 = tmp10 - tmp13; */
+ movq_r2r(mm7, mm0); /* copy tmp0 */
- psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12;
- paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+ psubw_m2r(*(wsptr+2), mm5); /* tmp2 = tmp11 - tmp12; */
+ paddw_r2r(mm3, mm7); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
- psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+ psubw_r2r(mm3, mm0); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
- movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0]
- movq_r2r(mm1, mm3); // copy tmp1
+ movq_r2m(mm7, *(wsptr)); /* wsptr[DCTSIZE*0] */
+ movq_r2r(mm1, mm3); /* copy tmp1 */
- movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7]
- paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+ movq_r2m(mm0, *(wsptr+14)); /* wsptr[DCTSIZE*7] */
+ paddw_r2r(mm2, mm1); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
- psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+ psubw_r2r(mm2, mm3); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
- movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1]
- movq_r2r(mm4, mm1); // copy tmp3
+ movq_r2m(mm1, *(wsptr+2)); /* wsptr[DCTSIZE*1] */
+ movq_r2r(mm4, mm1); /* copy tmp3 */
- movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6]
+ movq_r2m(mm3, *(wsptr+12)); /* wsptr[DCTSIZE*6] */
- paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+ paddw_m2r(*(wsptr+4), mm4); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
- psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+ psubw_m2r(*(wsptr+4), mm1); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
movq_r2m(mm4, *(wsptr+8));
- movq_r2r(mm5, mm7); // copy tmp2
+ movq_r2r(mm5, mm7); /* copy tmp2 */
- paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
+ paddw_r2r(mm6, mm5); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
movq_r2m(mm1, *(wsptr+6));
- psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+ psubw_r2r(mm6, mm7); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
movq_r2m(mm5, *(wsptr+4));
@@ -1514,355 +1514,355 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
wsptr--;
-// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
- movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3]
+/* tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); */
+/* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */
+/* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */
+/* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */
+ movq_m2r(*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */
- movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7]
+ movq_m2r(*(wsptr+1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */
movq_r2r(mm0, mm2);
- movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3]
- paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
+ movq_m2r(*(wsptr+2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ paddw_r2r(mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
- movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7]
- psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
+ movq_m2r(*(wsptr+3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */
+ psubw_r2r(mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
movq_r2r(mm0, mm6);
movq_r2r(mm3, mm5);
- paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
+ paddw_r2r(mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
movq_r2r(mm2, mm1);
- psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
- punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
+ psubw_r2r(mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
+ punpcklwd_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
- movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7]
- punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
+ movq_m2r(*(wsptr+7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ punpckhwd_r2r(mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
- movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
+ movq_m2r(*(wsptr+4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckldq_r2r(mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
- punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
+ punpcklwd_r2r(mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
movq_r2r(mm3, mm4);
- movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3]
- punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
+ movq_m2r(*(wsptr+6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */
+ punpckhwd_r2r(mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
- movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+ movq_m2r(*(wsptr+5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckldq_r2r(mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
- paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
+ paddw_r2r(mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
movq_r2r(mm6, mm2);
- psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
- paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
+ psubw_r2r(mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
+ paddw_r2r(mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
movq_r2r(mm3, mm5);
- punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
+ punpcklwd_r2r(mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
- psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
- punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
+ psubw_r2r(mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
+ punpckhwd_r2r(mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
movq_r2r(mm4, mm7);
- punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
+ punpckldq_r2r(mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
- punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
+ punpcklwd_r2r(mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
- punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
+ punpckhwd_r2r(mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
- punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
+ punpckldq_r2r(mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
movq_r2r(mm1, mm6);
- //ok
+ /*ok */
-// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+/* mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
+/* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
movq_r2r(mm0, mm2);
- punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
+ punpckhdq_r2r(mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
- punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
+ punpckldq_r2r(mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
psllw_i2r(2, mm6);
pmulhw_m2r(fix_141, mm6);
- punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
+ punpckldq_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
- punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
+ punpckhdq_r2r(mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
movq_r2r(mm0, mm7);
-// tmp0 = tmp10 + tmp13;
-// tmp3 = tmp10 - tmp13;
- paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
- psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
+/* tmp0 = tmp10 + tmp13; */
+/* tmp3 = tmp10 - tmp13; */
+ paddw_r2r(mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
+ psubw_r2r(mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
-// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
- psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
-// tmp1 = tmp11 + tmp12;
-// tmp2 = tmp11 - tmp12;
+/* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */
+ psubw_r2r(mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
+/* tmp1 = tmp11 + tmp12; */
+/* tmp2 = tmp11 - tmp12; */
movq_r2r(mm1, mm5);
- //OK
+ /*OK */
/* Odd part */
-// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
- movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3]
- paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
+/* z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; */
+/* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */
+/* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */
+/* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */
+ movq_m2r(*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+ paddw_r2r(mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
- movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7]
- psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
+ movq_m2r(*(wsptr+1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */
+ psubw_r2r(mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
movq_r2r(mm3, mm6);
- punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5]
+ punpckldq_r2r(mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */
- punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3]
+ punpckhdq_r2r(mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */
movq_r2r(mm3, mm2);
-//Save tmp0 and tmp1 in wsptr
- movq_r2m(mm0, *(wsptr)); // save tmp0
- paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13]
+/*Save tmp0 and tmp1 in wsptr */
+ movq_r2m(mm0, *(wsptr)); /* save tmp0 */
+ paddw_r2r(mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
-//Continue with z10 --- z13
- movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3]
- psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10]
+/*Continue with z10 --- z13 */
+ movq_m2r(*(wsptr+2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ psubw_r2r(mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
- movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7]
+ movq_m2r(*(wsptr+3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */
movq_r2r(mm6, mm4);
- movq_r2m(mm1, *(wsptr+1)); // save tmp1
- punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5]
+ movq_r2m(mm1, *(wsptr+1)); /* save tmp1 */
+ punpckldq_r2r(mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */
- punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3]
+ punpckhdq_r2r(mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */
movq_r2r(mm6, mm1);
-//Save tmp2 and tmp3 in wsptr
- paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13]
+/*Save tmp2 and tmp3 in wsptr */
+ paddw_r2r(mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
movq_r2r(mm2, mm4);
-//Continue with z10 --- z13
- movq_r2m(mm5, *(wsptr+2)); // save tmp2
- punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11]
+/*Continue with z10 --- z13 */
+ movq_r2m(mm5, *(wsptr+2)); /* save tmp2 */
+ punpcklwd_r2r(mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
- psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10]
- punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13]
+ psubw_r2r(mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
+ punpckhwd_r2r(mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
movq_r2r(mm3, mm0);
- punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12]
+ punpcklwd_r2r(mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
- movq_r2m(mm7, *(wsptr+3)); // save tmp3
- punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10]
+ movq_r2m(mm7, *(wsptr+3)); /* save tmp3 */
+ punpckhwd_r2r(mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
- movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11]
+ movq_m2r(*(wsptr+4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckhdq_r2r(mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
- movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13]
+ movq_m2r(*(wsptr+5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckhdq_r2r(mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
- movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3]
+ movq_m2r(*(wsptr+6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */
movq_r2r(mm6, mm4);
- punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5]
+ punpckldq_r2r(mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */
movq_r2r(mm1, mm5);
- punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3]
+ punpckhdq_r2r(mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */
movq_r2r(mm6, mm2);
- movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7]
- paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13]
+ movq_m2r(*(wsptr+7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ paddw_r2r(mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
- psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10]
- punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5]
+ psubw_r2r(mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
+ punpckldq_r2r(mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */
- punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3]
+ punpckhdq_r2r(mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */
movq_r2r(mm1, mm7);
- paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13]
- psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10]
+ paddw_r2r(mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
+ psubw_r2r(mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
movq_r2r(mm6, mm5);
- punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11]
+ punpcklwd_r2r(mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
- punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13]
+ punpckhwd_r2r(mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
movq_r2r(mm2, mm4);
- punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12]
+ punpcklwd_r2r(mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
- punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10]
+ punpckhwd_r2r(mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
- punpckhdq_r2r(mm6, mm4); /// wsptr[2,z10],[3,z10],[2,z11],[3,z11]
+ punpckhdq_r2r(mm6, mm4); /*/ wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
- punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13]
+ punpckhdq_r2r(mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
movq_r2r(mm0, mm5);
- punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10]
+ punpckldq_r2r(mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
- punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11]
+ punpckhdq_r2r(mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
movq_r2r(mm3, mm4);
- punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13]
+ punpckhdq_r2r(mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
movq_r2r(mm5, mm1);
- punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12]
-// tmp7 = z11 + z13; /* phase 5 */
-// tmp8 = z11 - z13; /* phase 5 */
- psubw_r2r(mm4, mm1); // tmp8
+ punpckldq_r2r(mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
+/* tmp7 = z11 + z13; : phase 5 */
+/* tmp8 = z11 - z13; : phase 5 */
+ psubw_r2r(mm4, mm1); /* tmp8 */
- paddw_r2r(mm4, mm5); // tmp7
-// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
+ paddw_r2r(mm4, mm5); /* tmp7 */
+/* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); 2*c4 */
psllw_i2r(2, mm1);
psllw_i2r(2, mm0);
- pmulhw_m2r(fix_141, mm1); // tmp21
-// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */
-// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
+ pmulhw_m2r(fix_141, mm1); /* tmp21 */
+/* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) 2*(c2-c6) */
+/* + MULTIPLY(z10, - FIX_1_847759065); : 2*c2 */
psllw_i2r(2, mm3);
movq_r2r(mm0, mm7);
pmulhw_m2r(fix_n184, mm7);
movq_r2r(mm3, mm6);
- movq_m2r(*(wsptr), mm2); // tmp0,final1
+ movq_m2r(*(wsptr), mm2); /* tmp0,final1 */
pmulhw_m2r(fix_108n184, mm6);
-// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
-// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm2, mm4); // final1
+/* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) : -2*(c2+c6) */
+/* + MULTIPLY(z12, FIX_1_847759065); 2*c2 */
+ movq_r2r(mm2, mm4); /* final1 */
pmulhw_m2r(fix_184n261, mm0);
- paddw_r2r(mm5, mm2); // tmp0+tmp7,final1
+ paddw_r2r(mm5, mm2); /* tmp0+tmp7,final1 */
pmulhw_m2r(fix_184, mm3);
- psubw_r2r(mm5, mm4); // tmp0-tmp7,final1
+ psubw_r2r(mm5, mm4); /* tmp0-tmp7,final1 */
-// tmp6 = tmp22 - tmp7; /* phase 2 */
- psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1
+/* tmp6 = tmp22 - tmp7; phase 2 */
+ psraw_i2r(3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
- paddw_r2r(mm6, mm7); // tmp20
- psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1
+ paddw_r2r(mm6, mm7); /* tmp20 */
+ psraw_i2r(3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
- paddw_r2r(mm0, mm3); // tmp22
+ paddw_r2r(mm0, mm3); /* tmp22 */
-// tmp5 = tmp21 - tmp6;
- psubw_r2r(mm5, mm3); // tmp6
+/* tmp5 = tmp21 - tmp6; */
+ psubw_r2r(mm5, mm3); /* tmp6 */
-// tmp4 = tmp20 + tmp5;
- movq_m2r(*(wsptr+1), mm0); // tmp1,final2
- psubw_r2r(mm3, mm1); // tmp5
+/* tmp4 = tmp20 + tmp5; */
+ movq_m2r(*(wsptr+1), mm0); /* tmp1,final2 */
+ psubw_r2r(mm3, mm1); /* tmp5 */
- movq_r2r(mm0, mm6); // final2
- paddw_r2r(mm3, mm0); // tmp1+tmp6,final2
+ movq_r2r(mm0, mm6); /* final2 */
+ paddw_r2r(mm3, mm0); /* tmp1+tmp6,final2 */
/* Final output stage: scale down by a factor of 8 and range-limit */
-// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-// & RANGE_MASK]; final1
+/* outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) */
+/* & RANGE_MASK]; final1 */
-// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-// & RANGE_MASK]; final2
- psubw_r2r(mm3, mm6); // tmp1-tmp6,final2
- psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1]
+/* outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */
+/* & RANGE_MASK]; final2 */
+ psubw_r2r(mm3, mm6); /* tmp1-tmp6,final2 */
+ psraw_i2r(3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */
- psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6]
+ psraw_i2r(3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */
- packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
+ packuswb_r2r(mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
- movq_m2r(*(wsptr+2), mm5); // tmp2,final3
- packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
-
-// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-// & RANGE_MASK]; final3
- paddw_r2r(mm1, mm7); // tmp4
+ movq_m2r(*(wsptr+2), mm5); /* tmp2,final3 */
+ packuswb_r2r(mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
+
+/* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */
+/* & RANGE_MASK]; final3 */
+ paddw_r2r(mm1, mm7); /* tmp4 */
movq_r2r(mm5, mm3);
- paddw_r2r(mm1, mm5); // tmp2+tmp5
- psubw_r2r(mm1, mm3); // tmp2-tmp5
+ paddw_r2r(mm1, mm5); /* tmp2+tmp5 */
+ psubw_r2r(mm1, mm3); /* tmp2-tmp5 */
- psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2]
+ psraw_i2r(3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */
- movq_m2r(*(wsptr+3), mm4); // tmp3,final4
- psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5]
+ movq_m2r(*(wsptr+3), mm4); /* tmp3,final4 */
+ psraw_i2r(3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */
-// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-// & RANGE_MASK]; final4
+/* outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */
+/* & RANGE_MASK]; final4 */
movq_r2r(mm4, mm6);
- paddw_r2r(mm7, mm4); // tmp3+tmp4
+ paddw_r2r(mm7, mm4); /* tmp3+tmp4 */
- psubw_r2r(mm7, mm6); // tmp3-tmp4
- psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4]
+ psubw_r2r(mm7, mm6); /* tmp3-tmp4 */
+ psraw_i2r(3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */
- // mov ecx, [dataptr]
+ /* mov ecx, [dataptr] */
- psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3]
+ psraw_i2r(3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */
- packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
+ packuswb_r2r(mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
- packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
+ packuswb_r2r(mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
movq_r2r(mm2, mm4);
movq_r2r(mm5, mm7);
- punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
+ punpcklbw_r2r(mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
- punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
+ punpckhbw_r2r(mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
movq_r2r(mm2, mm1);
- punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
+ punpcklbw_r2r(mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
- // add dataptr, 4
+ /* add dataptr, 4 */
- punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
+ punpckhbw_r2r(mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
- punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
+ punpcklwd_r2r(mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
- // add ecx, output_col
+ /* add ecx, output_col */
movq_r2r(mm7, mm6);
- punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
+ punpckhwd_r2r(mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
movq_r2r(mm2, mm0);
- punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
+ punpcklwd_r2r(mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
- // mov idata, [dataptr]
+ /* mov idata, [dataptr] */
- punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
+ punpckldq_r2r(mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
- // add dataptr, 4
+ /* add dataptr, 4 */
movq_r2r(mm1, mm3);
- // add idata, output_col
+ /* add idata, output_col */
- punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
+ punpckhwd_r2r(mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
movq_r2m(mm2, *(dataptr));
- punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
+ punpckhdq_r2r(mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
dataptr += rskip;
movq_r2m(mm0, *(dataptr));
- punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
- punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
+ punpckldq_r2r(mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
+ punpckhdq_r2r(mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
dataptr += rskip;
movq_r2m(mm1, *(dataptr));
@@ -1876,302 +1876,302 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
/*******************************************************************/
-// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
- movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3]
+/* tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); */
+/* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */
+/* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */
+/* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */
+ movq_m2r(*(wsptr), mm0); /* wsptr[0,0],[0,1],[0,2],[0,3] */
- movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7]
+ movq_m2r(*(wsptr+1), mm1); /* wsptr[0,4],[0,5],[0,6],[0,7] */
movq_r2r(mm0, mm2);
- movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3]
- paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
+ movq_m2r(*(wsptr+2), mm3); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ paddw_r2r(mm1, mm0); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
- movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7]
- psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
+ movq_m2r(*(wsptr+3), mm4); /* wsptr[1,4],[1,5],[1,6],[1,7] */
+ psubw_r2r(mm1, mm2); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
movq_r2r(mm0, mm6);
movq_r2r(mm3, mm5);
- paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
+ paddw_r2r(mm4, mm3); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
movq_r2r(mm2, mm1);
- psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
- punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
+ psubw_r2r(mm4, mm5); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
+ punpcklwd_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
- movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7]
- punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
+ movq_m2r(*(wsptr+7), mm7); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ punpckhwd_r2r(mm3, mm6); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
- movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
+ movq_m2r(*(wsptr+4), mm3); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckldq_r2r(mm6, mm0); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
- punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
+ punpcklwd_r2r(mm5, mm1); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
movq_r2r(mm3, mm4);
- movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3]
- punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
+ movq_m2r(*(wsptr+6), mm6); /* wsptr[3,0],[3,1],[3,2],[3,3] */
+ punpckhwd_r2r(mm5, mm2); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
- movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+ movq_m2r(*(wsptr+5), mm5); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckldq_r2r(mm2, mm1); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
- paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
+ paddw_r2r(mm5, mm3); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
movq_r2r(mm6, mm2);
- psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
- paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
+ psubw_r2r(mm5, mm4); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
+ paddw_r2r(mm7, mm6); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
movq_r2r(mm3, mm5);
- punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
+ punpcklwd_r2r(mm6, mm3); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
- psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
- punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
+ psubw_r2r(mm7, mm2); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
+ punpckhwd_r2r(mm6, mm5); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
movq_r2r(mm4, mm7);
- punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
+ punpckldq_r2r(mm5, mm3); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
- punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
+ punpcklwd_r2r(mm2, mm4); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
- punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
+ punpckhwd_r2r(mm2, mm7); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
- punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
+ punpckldq_r2r(mm7, mm4); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
movq_r2r(mm1, mm6);
- //OK
+ /*OK */
-// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+/* mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
+/* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
movq_r2r(mm0, mm2);
- punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
+ punpckhdq_r2r(mm4, mm6); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
- punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
+ punpckldq_r2r(mm4, mm1); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
psllw_i2r(2, mm6);
pmulhw_m2r(fix_141, mm6);
- punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
+ punpckldq_r2r(mm3, mm0); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
- punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
+ punpckhdq_r2r(mm3, mm2); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
movq_r2r(mm0, mm7);
-// tmp0 = tmp10 + tmp13;
-// tmp3 = tmp10 - tmp13;
- paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
- psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
+/* tmp0 = tmp10 + tmp13; */
+/* tmp3 = tmp10 - tmp13; */
+ paddw_r2r(mm2, mm0); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
+ psubw_r2r(mm2, mm7); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
-// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
- psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
-// tmp1 = tmp11 + tmp12;
-// tmp2 = tmp11 - tmp12;
+/* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */
+ psubw_r2r(mm2, mm6); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
+/* tmp1 = tmp11 + tmp12; */
+/* tmp2 = tmp11 - tmp12; */
movq_r2r(mm1, mm5);
- //OK
+ /*OK */
/* Odd part */
-// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
- movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3]
- paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
+/* z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; */
+/* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */
+/* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */
+/* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */
+ movq_m2r(*(wsptr), mm3); /* wsptr[0,0],[0,1],[0,2],[0,3] */
+ paddw_r2r(mm6, mm1); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
- movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7]
- psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
+ movq_m2r(*(wsptr+1), mm4); /* wsptr[0,4],[0,5],[0,6],[0,7] */
+ psubw_r2r(mm6, mm5); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
movq_r2r(mm3, mm6);
- punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5]
+ punpckldq_r2r(mm4, mm3); /* wsptr[0,0],[0,1],[0,4],[0,5] */
- punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3]
+ punpckhdq_r2r(mm6, mm4); /* wsptr[0,6],[0,7],[0,2],[0,3] */
movq_r2r(mm3, mm2);
-//Save tmp0 and tmp1 in wsptr
- movq_r2m(mm0, *(wsptr)); // save tmp0
- paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13]
+/*Save tmp0 and tmp1 in wsptr */
+ movq_r2m(mm0, *(wsptr)); /* save tmp0 */
+ paddw_r2r(mm4, mm2); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
-//Continue with z10 --- z13
- movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3]
- psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10]
+/*Continue with z10 --- z13 */
+ movq_m2r(*(wsptr+2), mm6); /* wsptr[1,0],[1,1],[1,2],[1,3] */
+ psubw_r2r(mm4, mm3); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
- movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7]
+ movq_m2r(*(wsptr+3), mm0); /* wsptr[1,4],[1,5],[1,6],[1,7] */
movq_r2r(mm6, mm4);
- movq_r2m(mm1, *(wsptr+1)); // save tmp1
- punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5]
+ movq_r2m(mm1, *(wsptr+1)); /* save tmp1 */
+ punpckldq_r2r(mm0, mm6); /* wsptr[1,0],[1,1],[1,4],[1,5] */
- punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3]
+ punpckhdq_r2r(mm4, mm0); /* wsptr[1,6],[1,7],[1,2],[1,3] */
movq_r2r(mm6, mm1);
-//Save tmp2 and tmp3 in wsptr
- paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13]
+/*Save tmp2 and tmp3 in wsptr */
+ paddw_r2r(mm0, mm6); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
movq_r2r(mm2, mm4);
-//Continue with z10 --- z13
- movq_r2m(mm5, *(wsptr+2)); // save tmp2
- punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11]
+/*Continue with z10 --- z13 */
+ movq_r2m(mm5, *(wsptr+2)); /* save tmp2 */
+ punpcklwd_r2r(mm6, mm2); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
- psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10]
- punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13]
+ psubw_r2r(mm0, mm1); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
+ punpckhwd_r2r(mm6, mm4); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
movq_r2r(mm3, mm0);
- punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12]
+ punpcklwd_r2r(mm1, mm3); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
- movq_r2m(mm7, *(wsptr+3)); // save tmp3
- punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10]
+ movq_r2m(mm7, *(wsptr+3)); /* save tmp3 */
+ punpckhwd_r2r(mm1, mm0); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
- movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11]
+ movq_m2r(*(wsptr+4), mm6); /* wsptr[2,0],[2,1],[2,2],[2,3] */
+ punpckhdq_r2r(mm2, mm0); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
- movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13]
+ movq_m2r(*(wsptr+5), mm7); /* wsptr[2,4],[2,5],[2,6],[2,7] */
+ punpckhdq_r2r(mm4, mm3); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
- movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3]
+ movq_m2r(*(wsptr+6), mm1); /* wsptr[3,0],[3,1],[3,2],[3,3] */
movq_r2r(mm6, mm4);
- punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5]
+ punpckldq_r2r(mm7, mm6); /* wsptr[2,0],[2,1],[2,4],[2,5] */
movq_r2r(mm1, mm5);
- punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3]
+ punpckhdq_r2r(mm4, mm7); /* wsptr[2,6],[2,7],[2,2],[2,3] */
movq_r2r(mm6, mm2);
- movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7]
- paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13]
+ movq_m2r(*(wsptr+7), mm4); /* wsptr[3,4],[3,5],[3,6],[3,7] */
+ paddw_r2r(mm7, mm6); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
- psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10]
- punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5]
+ psubw_r2r(mm7, mm2); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
+ punpckldq_r2r(mm4, mm1); /* wsptr[3,0],[3,1],[3,4],[3,5] */
- punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3]
+ punpckhdq_r2r(mm5, mm4); /* wsptr[3,6],[3,7],[3,2],[3,3] */
movq_r2r(mm1, mm7);
- paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13]
- psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10]
+ paddw_r2r(mm4, mm1); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
+ psubw_r2r(mm4, mm7); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
movq_r2r(mm6, mm5);
- punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11]
+ punpcklwd_r2r(mm1, mm6); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
- punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13]
+ punpckhwd_r2r(mm1, mm5); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
movq_r2r(mm2, mm4);
- punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12]
+ punpcklwd_r2r(mm7, mm2); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
- punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10]
+ punpckhwd_r2r(mm7, mm4); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
- punpckhdq_r2r(mm6, mm4); // wsptr[2,z10],[3,z10],[2,z11],[3,z11]
+ punpckhdq_r2r(mm6, mm4); /* wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
- punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13]
+ punpckhdq_r2r(mm5, mm2); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
movq_r2r(mm0, mm5);
- punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10]
+ punpckldq_r2r(mm4, mm0); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
- punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11]
+ punpckhdq_r2r(mm4, mm5); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
movq_r2r(mm3, mm4);
- punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13]
+ punpckhdq_r2r(mm2, mm4); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
movq_r2r(mm5, mm1);
- punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12]
-// tmp7 = z11 + z13; /* phase 5 */
-// tmp8 = z11 - z13; /* phase 5 */
- psubw_r2r(mm4, mm1); // tmp8
+ punpckldq_r2r(mm2, mm3); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
+/* tmp7 = z11 + z13; : phase 5 */
+/* tmp8 = z11 - z13; : phase 5 */
+ psubw_r2r(mm4, mm1); /* tmp8 */
- paddw_r2r(mm4, mm5); // tmp7
-// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
+ paddw_r2r(mm4, mm5); /* tmp7 */
+/* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); 2*c4 */
psllw_i2r(2, mm1);
psllw_i2r(2, mm0);
- pmulhw_m2r(fix_141, mm1); // tmp21
-// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */
-// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
+ pmulhw_m2r(fix_141, mm1); /* tmp21 */
+/* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) : 2*(c2-c6) */
+/* + MULTIPLY(z10, - FIX_1_847759065); : 2*c2 */
psllw_i2r(2, mm3);
movq_r2r(mm0, mm7);
pmulhw_m2r(fix_n184, mm7);
movq_r2r(mm3, mm6);
- movq_m2r(*(wsptr), mm2); // tmp0,final1
+ movq_m2r(*(wsptr), mm2); /* tmp0,final1 */
pmulhw_m2r(fix_108n184, mm6);
-// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
-// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm2, mm4); // final1
+/* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) : -2*(c2+c6) */
+/* + MULTIPLY(z12, FIX_1_847759065); : 2*c2 */
+ movq_r2r(mm2, mm4); /* final1 */
pmulhw_m2r(fix_184n261, mm0);
- paddw_r2r(mm5, mm2); // tmp0+tmp7,final1
+ paddw_r2r(mm5, mm2); /* tmp0+tmp7,final1 */
pmulhw_m2r(fix_184, mm3);
- psubw_r2r(mm5, mm4); // tmp0-tmp7,final1
+ psubw_r2r(mm5, mm4); /* tmp0-tmp7,final1 */
-// tmp6 = tmp22 - tmp7; /* phase 2 */
- psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1
+/* tmp6 = tmp22 - tmp7; phase 2 */
+ psraw_i2r(3, mm2); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
- paddw_r2r(mm6, mm7); // tmp20
- psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1
+ paddw_r2r(mm6, mm7); /* tmp20 */
+ psraw_i2r(3, mm4); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
- paddw_r2r(mm0, mm3); // tmp22
+ paddw_r2r(mm0, mm3); /* tmp22 */
-// tmp5 = tmp21 - tmp6;
- psubw_r2r(mm5, mm3); // tmp6
+/* tmp5 = tmp21 - tmp6; */
+ psubw_r2r(mm5, mm3); /* tmp6 */
-// tmp4 = tmp20 + tmp5;
- movq_m2r(*(wsptr+1), mm0); // tmp1,final2
- psubw_r2r(mm3, mm1); // tmp5
+/* tmp4 = tmp20 + tmp5; */
+ movq_m2r(*(wsptr+1), mm0); /* tmp1,final2 */
+ psubw_r2r(mm3, mm1); /* tmp5 */
- movq_r2r(mm0, mm6); // final2
- paddw_r2r(mm3, mm0); // tmp1+tmp6,final2
+ movq_r2r(mm0, mm6); /* final2 */
+ paddw_r2r(mm3, mm0); /* tmp1+tmp6,final2 */
/* Final output stage: scale down by a factor of 8 and range-limit */
-// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-// & RANGE_MASK]; final1
+/* outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) */
+/* & RANGE_MASK]; final1 */
-// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-// & RANGE_MASK]; final2
- psubw_r2r(mm3, mm6); // tmp1-tmp6,final2
- psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1]
+/* outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */
+/* & RANGE_MASK]; final2 */
+ psubw_r2r(mm3, mm6); /* tmp1-tmp6,final2 */
+ psraw_i2r(3, mm0); /* outptr[0,1],[1,1],[2,1],[3,1] */
- psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6]
+ psraw_i2r(3, mm6); /* outptr[0,6],[1,6],[2,6],[3,6] */
- packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
+ packuswb_r2r(mm4, mm0); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
- movq_m2r(*(wsptr+2), mm5); // tmp2,final3
- packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
-
-// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-// & RANGE_MASK]; final3
- paddw_r2r(mm1, mm7); // tmp4
+ movq_m2r(*(wsptr+2), mm5); /* tmp2,final3 */
+ packuswb_r2r(mm6, mm2); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
+
+/* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */
+/* & RANGE_MASK]; final3 */
+ paddw_r2r(mm1, mm7); /* tmp4 */
movq_r2r(mm5, mm3);
- paddw_r2r(mm1, mm5); // tmp2+tmp5
- psubw_r2r(mm1, mm3); // tmp2-tmp5
+ paddw_r2r(mm1, mm5); /* tmp2+tmp5 */
+ psubw_r2r(mm1, mm3); /* tmp2-tmp5 */
- psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2]
+ psraw_i2r(3, mm5); /* outptr[0,2],[1,2],[2,2],[3,2] */
- movq_m2r(*(wsptr+3), mm4); // tmp3,final4
- psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5]
+ movq_m2r(*(wsptr+3), mm4); /* tmp3,final4 */
+ psraw_i2r(3, mm3); /* outptr[0,5],[1,5],[2,5],[3,5] */
-// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-// & RANGE_MASK]; final4
+/* outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) */
+/* & RANGE_MASK]; */
+/* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */
+/* & RANGE_MASK]; final4 */
movq_r2r(mm4, mm6);
- paddw_r2r(mm7, mm4); // tmp3+tmp4
+ paddw_r2r(mm7, mm4); /* tmp3+tmp4 */
- psubw_r2r(mm7, mm6); // tmp3-tmp4
- psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4]
+ psubw_r2r(mm7, mm6); /* tmp3-tmp4 */
+ psraw_i2r(3, mm4); /* outptr[0,4],[1,4],[2,4],[3,4] */
- psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3]
+ psraw_i2r(3, mm6); /* outptr[0,3],[1,3],[2,3],[3,3] */
/*
movq_r2m(mm4, *dummy);
@@ -2181,46 +2181,46 @@ static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
*/
- packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
+ packuswb_r2r(mm4, mm5); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
- packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
+ packuswb_r2r(mm3, mm6); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
movq_r2r(mm2, mm4);
movq_r2r(mm5, mm7);
- punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
+ punpcklbw_r2r(mm0, mm2); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
- punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
+ punpckhbw_r2r(mm0, mm4); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
movq_r2r(mm2, mm1);
- punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
+ punpcklbw_r2r(mm6, mm5); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
- punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
+ punpckhbw_r2r(mm6, mm7); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
- punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
+ punpcklwd_r2r(mm5, mm2); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
movq_r2r(mm7, mm6);
- punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
+ punpckhwd_r2r(mm5, mm1); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
movq_r2r(mm2, mm0);
- punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
+ punpcklwd_r2r(mm4, mm6); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
- punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
+ punpckldq_r2r(mm6, mm2); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
movq_r2r(mm1, mm3);
- punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
+ punpckhwd_r2r(mm4, mm7); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
dataptr += rskip;
movq_r2m(mm2, *(dataptr));
- punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
+ punpckhdq_r2r(mm6, mm0); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
dataptr += rskip;
movq_r2m(mm0, *(dataptr));
- punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
+ punpckldq_r2r(mm7, mm1); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
- punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
+ punpckhdq_r2r(mm7, mm3); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
dataptr += rskip;
movq_r2m(mm1, *(dataptr));
@@ -2520,7 +2520,7 @@ void RTjpeg_init_decompress(__u32 *buf, int width, int height)
RTjpeg_idct_init();
-// RTjpeg_color_init();
+/* RTjpeg_color_init(); */
}
int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
@@ -2851,7 +2851,7 @@ int RTjpeg_bcomp(__s16 *old, mmx_t *mask)
for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
return 0;
}
-// printf(".");
+/* printf("."); */
return 1;
}
@@ -2945,7 +2945,7 @@ int RTjpeg_mcompress(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
bp2+=RTjpeg_width<<2;
bp3+=RTjpeg_width<<2;
}
- //printf ("%d\n", block - RTjpeg_old);
+ /*printf ("%d\n", block - RTjpeg_old); */
#ifdef HAVE_LIBMMX
emms();
#endif
@@ -2978,7 +2978,7 @@ int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask)
if(RTjpeg_bcomp(block, &RTjpeg_lmask))
{
*((__u8 *)sp++)=255;
-// printf("* %d ", sp[-1]);
+/* printf("* %d ", sp[-1]); */
} else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
block+=64;
}