From 4359de26166000e317604b6b20283c79dd2a0521 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 19 Mar 2002 04:10:06 +0000 Subject: removal of //-style comments don't link plugins to core libs -- the versioning is done internally to the plugins with... Original commit message from CVS: * removal of //-style comments * don't link plugins to core libs -- the versioning is done internally to the plugins with the plugin_info struct, and symbol resolution is lazy, so we can always know if a plugin can be loaded by the plugin_info data. in theory. --- gst-libs/gst/audio/Makefile.am | 2 +- gst-libs/gst/audio/audio.c | 2 +- gst-libs/gst/idct/Makefile.am | 2 +- gst-libs/gst/idct/ieeetest.c | 2 +- gst-libs/gst/idct/mmx32idct.c | 958 +++++++++++++++++++------------------- gst-libs/gst/resample/Makefile.am | 2 +- gst-libs/gst/resample/dtos.c | 4 +- gst-libs/gst/resample/functable.c | 58 +-- gst-libs/gst/resample/resample.c | 40 +- gst-libs/gst/resample/test.c | 34 +- gst-libs/gst/riff/Makefile.am | 2 +- gst-libs/gst/riff/riff.c | 38 +- gst-libs/gst/riff/riffencode.c | 2 +- gst-libs/gst/riff/riffparse.c | 32 +- gst-libs/gst/riff/riffutil.c | 2 +- 15 files changed, 590 insertions(+), 590 deletions(-) (limited to 'gst-libs') diff --git a/gst-libs/gst/audio/Makefile.am b/gst-libs/gst/audio/Makefile.am index 082ace35..b6e7b47d 100644 --- a/gst-libs/gst/audio/Makefile.am +++ b/gst-libs/gst/audio/Makefile.am @@ -7,6 +7,6 @@ libgstaudio_la_SOURCES = audio.c libgstaudioincludedir = $(includedir)/gst/audio libgstaudioinclude_HEADERS = audio.h -libgstaudio_la_LIBADD = $(GST_LIBS) +libgstaudio_la_LIBADD = libgstaudio_la_CFLAGS = $(GST_CFLAGS) -finline-functions -ffast-math libgstaudio_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) diff --git a/gst-libs/gst/audio/audio.c b/gst-libs/gst/audio/audio.c index 31b9ed89..1a0bcf22 100644 --- a/gst-libs/gst/audio/audio.c +++ b/gst-libs/gst/audio/audio.c @@ -131,7 +131,7 @@ gst_audio_highest_sample_value (GstPad* pad) GstCaps *caps = NULL; caps = GST_PAD_CAPS (pad); - // FIXME : Please change this to a better warning method ! + /* FIXME : Please change this to a better warning method ! */ if (caps == NULL) printf ("WARNING: gstaudio: could not get caps of pad !\n"); width = gst_caps_get_int (caps, "width"); diff --git a/gst-libs/gst/idct/Makefile.am b/gst-libs/gst/idct/Makefile.am index 8fea3121..8b9d86ba 100644 --- a/gst-libs/gst/idct/Makefile.am +++ b/gst-libs/gst/idct/Makefile.am @@ -27,6 +27,6 @@ ieeetest_LDFLAGS = $(GST_LIBS) noinst_HEADERS = dct.h -libgstidct_la_LIBADD = $(GST_LIBS) +libgstidct_la_LIBADD = libgstidct_la_CFLAGS = $(GST_CFLAGS) -finline-functions -ffast-math libgstidct_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) diff --git a/gst-libs/gst/idct/ieeetest.c b/gst-libs/gst/idct/ieeetest.c index 23d64eb5..5925a6fd 100644 --- a/gst-libs/gst/idct/ieeetest.c +++ b/gst-libs/gst/idct/ieeetest.c @@ -117,7 +117,7 @@ main(int argc, char **argv) gst_trace_read_tsc(&tscstart); gst_idct_convert(idct, testout); gst_trace_read_tsc(&tscstop); - //printf("time %llu, %llu %lld\n", tscstart, tscstop, tscstop-tscstart); + /*printf("time %llu, %llu %lld\n", tscstart, tscstop, tscstop-tscstart); */ if (tscstop - tscstart < tscmin) tscmin = tscstop-tscstart; if (tscstop - tscstart > tscmax) tscmax = tscstop-tscstart; diff --git a/gst-libs/gst/idct/mmx32idct.c b/gst-libs/gst/idct/mmx32idct.c index 78bf45bf..0090d8a7 100644 --- a/gst-libs/gst/idct/mmx32idct.c +++ b/gst-libs/gst/idct/mmx32idct.c @@ -22,148 +22,148 @@ */ -// MMX32 iDCT algorithm (IEEE-1180 compliant) :: idct_mmx32() -// -// MPEG2AVI -// -------- -// v0.16B33 initial release -// -// This was one of the harder pieces of work to code. -// Intel's app-note focuses on the numerical issues of the algorithm, but -// assumes the programmer is familiar with IDCT mathematics, leaving the -// form of the complete function up to the programmer's imagination. -// -// ALGORITHM OVERVIEW -// ------------------ -// I played around with the code for quite a few hours. I came up -// with *A* working IDCT algorithm, however I'm not sure whether my routine -// is "the correct one." But rest assured, my code passes all six IEEE -// accuracy tests with plenty of margin. -// -// My IDCT algorithm consists of 4 steps: -// -// 1) IDCT-row transformation (using the IDCT-row function) on all 8 rows -// This yields an intermediate 8x8 matrix. -// -// 2) intermediate matrix transpose (mandatory) -// -// 3) IDCT-row transformation (2nd time) on all 8 rows of the intermediate -// matrix. The output is the final-result, in transposed form. -// -// 4) post-transformation matrix transpose -// (not necessary if the input-data is already transposed, this could -// be done during the MPEG "zig-zag" scan, but since my algorithm -// requires at least one transpose operation, why not re-use the -// transpose-code.) -// -// Although the (1st) and (3rd) steps use the SAME row-transform operation, -// the (3rd) step uses different shift&round constants (explained later.) -// -// Also note that the intermediate transpose (2) would not be neccessary, -// if the subsequent operation were a iDCT-column transformation. Since -// we only have the iDCT-row transform, we transpose the intermediate -// matrix and use the iDCT-row transform a 2nd time. -// -// I had to change some constants/variables for my method to work : -// -// As given by Intel, the #defines for SHIFT_INV_COL and RND_INV_COL are -// wrong. Not surprising since I'm not using a true column-transform -// operation, but the row-transform operation (as mentioned earlier.) -// round_inv_col[], which is given as "4 short" values, should have the -// same dimensions as round_inv_row[]. The corrected variables are -// shown. -// -// Intel's code defines a different table for each each row operation. -// The tables given are 0/4, 1/7, 2/6, and 5/3. My code only uses row#0. -// Using the other rows messes up the overall transform. -// -// IMPLEMENTATION DETAILs -// ---------------------- -// -// I divided the algorithm's work into two subroutines, -// 1) idct_mmx32_rows() - transforms 8 rows, then transpose -// 2) idct_mmx32_cols() - transforms 8 rows, then transpose -// yields final result ("drop-in" direct replacement for INT32 IDCT) -// -// The 2nd function is a clone of the 1st, with changes made only to the -// shift&rounding instructions. -// -// In the 1st function (rows), the shift & round instructions use -// SHIFT_INV_ROW & round_inv_row[] (renamed to r_inv_row[]) -// -// In the 2nd function (cols)-> r_inv_col[], and -// SHIFT_INV_COL & round_inv_col[] (renamed to r_inv_col[]) -// -// Each function contains an integrated transpose-operator, which comes -// AFTER the primary transformation operation. In the future, I'll optimize -// the code to do more of the transpose-work "in-place". Right now, I've -// left the code as two subroutines and a main calling function, so other -// people can read the code more easily. -// -// liaor@umcc.ais.org http://members.tripod.com/~liaor -// - - -//;============================================================================= -//; -//; AP-922 http://developer.intel.com/vtune/cbts/strmsimd -//; These examples contain code fragments for first stage iDCT 8x8 -//; (for rows) and first stage DCT 8x8 (for columns) -//; -//;============================================================================= +/* MMX32 iDCT algorithm (IEEE-1180 compliant) :: idct_mmx32() */ +/* */ +/* MPEG2AVI */ +/* -------- */ +/* v0.16B33 initial release */ +/* */ +/* This was one of the harder pieces of work to code. */ +/* Intel's app-note focuses on the numerical issues of the algorithm, but */ +/* assumes the programmer is familiar with IDCT mathematics, leaving the */ +/* form of the complete function up to the programmer's imagination. */ +/* */ +/* ALGORITHM OVERVIEW */ +/* ------------------ */ +/* I played around with the code for quite a few hours. I came up */ +/* with *A* working IDCT algorithm, however I'm not sure whether my routine */ +/* is "the correct one." But rest assured, my code passes all six IEEE */ +/* accuracy tests with plenty of margin. */ +/* */ +/* My IDCT algorithm consists of 4 steps: */ +/* */ +/* 1) IDCT-row transformation (using the IDCT-row function) on all 8 rows */ +/* This yields an intermediate 8x8 matrix. */ +/* */ +/* 2) intermediate matrix transpose (mandatory) */ +/* */ +/* 3) IDCT-row transformation (2nd time) on all 8 rows of the intermediate */ +/* matrix. The output is the final-result, in transposed form. */ +/* */ +/* 4) post-transformation matrix transpose */ +/* (not necessary if the input-data is already transposed, this could */ +/* be done during the MPEG "zig-zag" scan, but since my algorithm */ +/* requires at least one transpose operation, why not re-use the */ +/* transpose-code.) */ +/* */ +/* Although the (1st) and (3rd) steps use the SAME row-transform operation, */ +/* the (3rd) step uses different shift&round constants (explained later.) */ +/* */ +/* Also note that the intermediate transpose (2) would not be neccessary, */ +/* if the subsequent operation were a iDCT-column transformation. Since */ +/* we only have the iDCT-row transform, we transpose the intermediate */ +/* matrix and use the iDCT-row transform a 2nd time. */ +/* */ +/* I had to change some constants/variables for my method to work : */ +/* */ +/* As given by Intel, the #defines for SHIFT_INV_COL and RND_INV_COL are */ +/* wrong. Not surprising since I'm not using a true column-transform */ +/* operation, but the row-transform operation (as mentioned earlier.) */ +/* round_inv_col[], which is given as "4 short" values, should have the */ +/* same dimensions as round_inv_row[]. The corrected variables are */ +/* shown. */ +/* */ +/* Intel's code defines a different table for each each row operation. */ +/* The tables given are 0/4, 1/7, 2/6, and 5/3. My code only uses row#0. */ +/* Using the other rows messes up the overall transform. */ +/* */ +/* IMPLEMENTATION DETAILs */ +/* ---------------------- */ +/* */ +/* I divided the algorithm's work into two subroutines, */ +/* 1) idct_mmx32_rows() - transforms 8 rows, then transpose */ +/* 2) idct_mmx32_cols() - transforms 8 rows, then transpose */ +/* yields final result ("drop-in" direct replacement for INT32 IDCT) */ +/* */ +/* The 2nd function is a clone of the 1st, with changes made only to the */ +/* shift&rounding instructions. */ +/* */ +/* In the 1st function (rows), the shift & round instructions use */ +/* SHIFT_INV_ROW & round_inv_row[] (renamed to r_inv_row[]) */ +/* */ +/* In the 2nd function (cols)-> r_inv_col[], and */ +/* SHIFT_INV_COL & round_inv_col[] (renamed to r_inv_col[]) */ +/* */ +/* Each function contains an integrated transpose-operator, which comes */ +/* AFTER the primary transformation operation. In the future, I'll optimize */ +/* the code to do more of the transpose-work "in-place". Right now, I've */ +/* left the code as two subroutines and a main calling function, so other */ +/* people can read the code more easily. */ +/* */ +/* liaor@umcc.ais.org http://members.tripod.com/~liaor */ +/* */ + + +/*;============================================================================= */ +/*; */ +/*; AP-922 http://developer.intel.com/vtune/cbts/strmsimd */ +/*; These examples contain code fragments for first stage iDCT 8x8 */ +/*; (for rows) and first stage DCT 8x8 (for columns) */ +/*; */ +/*;============================================================================= */ /* mword typedef qword qword ptr equ mword ptr */ #include -#define BITS_INV_ACC 4 //; 4 or 5 for IEEE - // 5 yields higher accuracy, but lessens dynamic range on the input matrix +#define BITS_INV_ACC 4 /*; 4 or 5 for IEEE */ + /* 5 yields higher accuracy, but lessens dynamic range on the input matrix */ #define SHIFT_INV_ROW (16 - BITS_INV_ACC) -#define SHIFT_INV_COL (1 + BITS_INV_ACC +14 ) // changed from Intel's val) -//#define SHIFT_INV_COL (1 + BITS_INV_ACC ) +#define SHIFT_INV_COL (1 + BITS_INV_ACC +14 ) /* changed from Intel's val) */ +/*#define SHIFT_INV_COL (1 + BITS_INV_ACC ) */ #define RND_INV_ROW (1 << (SHIFT_INV_ROW-1)) #define RND_INV_COL (1 << (SHIFT_INV_COL-1)) -#define RND_INV_CORR (RND_INV_COL - 1) //; correction -1.0 and round -//#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) //; 1 << (SHIFT_INV_ROW-1) -//#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) //; 1 << (SHIFT_INV_COL-1) +#define RND_INV_CORR (RND_INV_COL - 1) /*; correction -1.0 and round */ +/*#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) //; 1 << (SHIFT_INV_ROW-1) */ +/*#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) //; 1 << (SHIFT_INV_COL-1) */ -//.data -//Align 16 +/*.data */ +/*Align 16 */ const static long r_inv_row[2] = { RND_INV_ROW, RND_INV_ROW}; const static long r_inv_col[2] = {RND_INV_COL, RND_INV_COL}; const static long r_inv_corr[2] = {RND_INV_CORR, RND_INV_CORR }; -//const static short r_inv_col[4] = -// {RND_INV_COL, RND_INV_COL, RND_INV_COL, RND_INV_COL}; -//const static short r_inv_corr[4] = -// {RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR}; +/*const static short r_inv_col[4] = */ +/* {RND_INV_COL, RND_INV_COL, RND_INV_COL, RND_INV_COL}; */ +/*const static short r_inv_corr[4] = */ +/* {RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR}; */ /* constants for the forward DCT -//#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy -//#define SHIFT_FRW_COL BITS_FRW_ACC -//#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) -//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) +/*#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy */ +/*#define SHIFT_FRW_COL BITS_FRW_ACC */ +/*#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) */ +/*#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) */ const static __int64 one_corr = 0x0001000100010001; const static long r_frw_row[2] = {RND_FRW_ROW, RND_FRW_ROW }; -//const static short tg_1_16[4] = {13036, 13036, 13036, 13036 }; //tg * (2<<16) + 0.5 -//const static short tg_2_16[4] = {27146, 27146, 27146, 27146 }; //tg * (2<<16) + 0.5 -//const static short tg_3_16[4] = {-21746, -21746, -21746, -21746 }; //tg * (2<<16) + 0.5 -//const static short cos_4_16[4] = {-19195, -19195, -19195, -19195 }; //cos * (2<<16) + 0.5 -//const static short ocos_4_16[4] = {23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5 +/*const static short tg_1_16[4] = {13036, 13036, 13036, 13036 }; //tg * (2<<16) + 0.5 */ +/*const static short tg_2_16[4] = {27146, 27146, 27146, 27146 }; //tg * (2<<16) + 0.5 */ +/*const static short tg_3_16[4] = {-21746, -21746, -21746, -21746 }; //tg * (2<<16) + 0.5 */ +/*const static short cos_4_16[4] = {-19195, -19195, -19195, -19195 }; //cos * (2<<16) + 0.5 */ +/*const static short ocos_4_16[4] = {23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5 */ -//concatenated table, for forward DCT transformation +/*concatenated table, for forward DCT transformation */ const static short tg_all_16[] = { - 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 - 27146, 27146, 27146, 27146, //tg * (2<<16) + 0.5 - -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 - -19195, -19195, -19195, -19195, //cos * (2<<16) + 0.5 - 23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5 + 13036, 13036, 13036, 13036, /* tg * (2<<16) + 0.5 */ + 27146, 27146, 27146, 27146, /*tg * (2<<16) + 0.5 */ + -21746, -21746, -21746, -21746, /* tg * (2<<16) + 0.5 */ + -19195, -19195, -19195, -19195, /*cos * (2<<16) + 0.5 */ + 23170, 23170, 23170, 23170 }; /*cos * (2<<15) + 0.5 */ #define tg_1_16 (tg_all_16 + 0) #define tg_2_16 (tg_all_16 + 8) @@ -231,553 +231,553 @@ const static short tg_all_16[] = { IF _MMX ; MMX code ;============================================================================= -//; Table for rows 0,4 - constants are multiplied by cos_4_16 +/*; Table for rows 0,4 - constants are multiplied by cos_4_16 */ const short tab_i_04[] = { - 16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00 - 21407, 8867, 8867, -21407, // w07 w05 w03 w01 - 16384, -16384, 16384, 16384, //; w14 w12 w10 w08 - -8867, 21407, -21407, -8867, //; w15 w13 w11 w09 - 22725, 12873, 19266, -22725, //; w22 w20 w18 w16 - 19266, 4520, -4520, -12873, //; w23 w21 w19 w17 - 12873, 4520, 4520, 19266, //; w30 w28 w26 w24 - -22725, 19266, -12873, -22725 };//w31 w29 w27 w25 - -//; Table for rows 1,7 - constants are multiplied by cos_1_16 + 16384, 16384, 16384, -16384, /* ; movq-> w06 w04 w02 w00 */ + 21407, 8867, 8867, -21407, /* w07 w05 w03 w01 */ + 16384, -16384, 16384, 16384, /*; w14 w12 w10 w08 */ + -8867, 21407, -21407, -8867, /*; w15 w13 w11 w09 */ + 22725, 12873, 19266, -22725, /*; w22 w20 w18 w16 */ + 19266, 4520, -4520, -12873, /*; w23 w21 w19 w17 */ + 12873, 4520, 4520, 19266, /*; w30 w28 w26 w24 */ + -22725, 19266, -12873, -22725 };/*w31 w29 w27 w25 */ + +/*; Table for rows 1,7 - constants are multiplied by cos_1_16 */ const short tab_i_17[] = { - 22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00 - 29692, 12299, 12299, -29692, // ; w07 w05 w03 w01 - 22725, -22725, 22725, 22725, //; w14 w12 w10 w08 - -12299, 29692, -29692, -12299, //; w15 w13 w11 w09 - 31521, 17855, 26722, -31521, //; w22 w20 w18 w16 - 26722, 6270, -6270, -17855, //; w23 w21 w19 w17 - 17855, 6270, 6270, 26722, //; w30 w28 w26 w24 - -31521, 26722, -17855, -31521}; // w31 w29 w27 w25 - -//; Table for rows 2,6 - constants are multiplied by cos_2_16 + 22725, 22725, 22725, -22725, /* ; movq-> w06 w04 w02 w00 */ + 29692, 12299, 12299, -29692, /* ; w07 w05 w03 w01 */ + 22725, -22725, 22725, 22725, /*; w14 w12 w10 w08 */ + -12299, 29692, -29692, -12299, /*; w15 w13 w11 w09 */ + 31521, 17855, 26722, -31521, /*; w22 w20 w18 w16 */ + 26722, 6270, -6270, -17855, /*; w23 w21 w19 w17 */ + 17855, 6270, 6270, 26722, /*; w30 w28 w26 w24 */ + -31521, 26722, -17855, -31521}; /* w31 w29 w27 w25 */ + +/*; Table for rows 2,6 - constants are multiplied by cos_2_16 */ const short tab_i_26[] = { - 21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00 - 27969, 11585, 11585, -27969, // ; w07 w05 w03 w01 - 21407, -21407, 21407, 21407, // ; w14 w12 w10 w08 - -11585, 27969, -27969, -11585, // ;w15 w13 w11 w09 - 29692, 16819, 25172, -29692, // ;w22 w20 w18 w16 - 25172, 5906, -5906, -16819, // ;w23 w21 w19 w17 - 16819, 5906, 5906, 25172, // ;w30 w28 w26 w24 - -29692, 25172, -16819, -29692}; // ;w31 w29 w27 w25 + 21407, 21407, 21407, -21407, /* ; movq-> w06 w04 w02 w00 */ + 27969, 11585, 11585, -27969, /* ; w07 w05 w03 w01 */ + 21407, -21407, 21407, 21407, /* ; w14 w12 w10 w08 */ + -11585, 27969, -27969, -11585, /* ;w15 w13 w11 w09 */ + 29692, 16819, 25172, -29692, /* ;w22 w20 w18 w16 */ + 25172, 5906, -5906, -16819, /* ;w23 w21 w19 w17 */ + 16819, 5906, 5906, 25172, /* ;w30 w28 w26 w24 */ + -29692, 25172, -16819, -29692}; /* ;w31 w29 w27 w25 */ -//; Table for rows 3,5 - constants are multiplied by cos_3_16 +/*; Table for rows 3,5 - constants are multiplied by cos_3_16 */ const short tab_i_35[] = { - 19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00 - 25172, 10426, 10426, -25172, //; w07 w05 w03 w01 - 19266, -19266, 19266, 19266, //; w14 w12 w10 w08 - -10426, 25172, -25172, -10426, //; w15 w13 w11 w09 - 26722, 15137, 22654, -26722, //; w22 w20 w18 w16 - 22654, 5315, -5315, -15137, //; w23 w21 w19 w17 - 15137, 5315, 5315, 22654, //; w30 w28 w26 w24 - -26722, 22654, -15137, -26722}; //; w31 w29 w27 w25 + 19266, 19266, 19266, -19266, /*; movq-> w06 w04 w02 w00 */ + 25172, 10426, 10426, -25172, /*; w07 w05 w03 w01 */ + 19266, -19266, 19266, 19266, /*; w14 w12 w10 w08 */ + -10426, 25172, -25172, -10426, /*; w15 w13 w11 w09 */ + 26722, 15137, 22654, -26722, /*; w22 w20 w18 w16 */ + 22654, 5315, -5315, -15137, /*; w23 w21 w19 w17 */ + 15137, 5315, 5315, 22654, /*; w30 w28 w26 w24 */ + -26722, 22654, -15137, -26722}; /*; w31 w29 w27 w25 */ */ -// CONCATENATED TABLE, rows 0,1,2,3,4,5,6,7 (in order ) -// -// In our implementation, however, we only use row0 ! -// +/* CONCATENATED TABLE, rows 0,1,2,3,4,5,6,7 (in order ) */ +/* */ +/* In our implementation, however, we only use row0 ! */ +/* */ static const short tab_i_01234567[] = { - //row0, this row is required - 16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00 - 21407, 8867, 8867, -21407, // w07 w05 w03 w01 - 16384, -16384, 16384, 16384, //; w14 w12 w10 w08 - -8867, 21407, -21407, -8867, //; w15 w13 w11 w09 - 22725, 12873, 19266, -22725, //; w22 w20 w18 w16 - 19266, 4520, -4520, -12873, //; w23 w21 w19 w17 - 12873, 4520, 4520, 19266, //; w30 w28 w26 w24 - -22725, 19266, -12873, -22725, //w31 w29 w27 w25 - - // the rest of these rows (1-7), aren't used ! - - //row1 - 22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00 - 29692, 12299, 12299, -29692, // ; w07 w05 w03 w01 - 22725, -22725, 22725, 22725, //; w14 w12 w10 w08 - -12299, 29692, -29692, -12299, //; w15 w13 w11 w09 - 31521, 17855, 26722, -31521, //; w22 w20 w18 w16 - 26722, 6270, -6270, -17855, //; w23 w21 w19 w17 - 17855, 6270, 6270, 26722, //; w30 w28 w26 w24 - -31521, 26722, -17855, -31521, // w31 w29 w27 w25 - - //row2 - 21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00 - 27969, 11585, 11585, -27969, // ; w07 w05 w03 w01 - 21407, -21407, 21407, 21407, // ; w14 w12 w10 w08 - -11585, 27969, -27969, -11585, // ;w15 w13 w11 w09 - 29692, 16819, 25172, -29692, // ;w22 w20 w18 w16 - 25172, 5906, -5906, -16819, // ;w23 w21 w19 w17 - 16819, 5906, 5906, 25172, // ;w30 w28 w26 w24 - -29692, 25172, -16819, -29692, // ;w31 w29 w27 w25 - - //row3 - 19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00 - 25172, 10426, 10426, -25172, //; w07 w05 w03 w01 - 19266, -19266, 19266, 19266, //; w14 w12 w10 w08 - -10426, 25172, -25172, -10426, //; w15 w13 w11 w09 - 26722, 15137, 22654, -26722, //; w22 w20 w18 w16 - 22654, 5315, -5315, -15137, //; w23 w21 w19 w17 - 15137, 5315, 5315, 22654, //; w30 w28 w26 w24 - -26722, 22654, -15137, -26722, //; w31 w29 w27 w25 - - //row4 - 16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00 - 21407, 8867, 8867, -21407, // w07 w05 w03 w01 - 16384, -16384, 16384, 16384, //; w14 w12 w10 w08 - -8867, 21407, -21407, -8867, //; w15 w13 w11 w09 - 22725, 12873, 19266, -22725, //; w22 w20 w18 w16 - 19266, 4520, -4520, -12873, //; w23 w21 w19 w17 - 12873, 4520, 4520, 19266, //; w30 w28 w26 w24 - -22725, 19266, -12873, -22725, //w31 w29 w27 w25 - - //row5 - 19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00 - 25172, 10426, 10426, -25172, //; w07 w05 w03 w01 - 19266, -19266, 19266, 19266, //; w14 w12 w10 w08 - -10426, 25172, -25172, -10426, //; w15 w13 w11 w09 - 26722, 15137, 22654, -26722, //; w22 w20 w18 w16 - 22654, 5315, -5315, -15137, //; w23 w21 w19 w17 - 15137, 5315, 5315, 22654, //; w30 w28 w26 w24 - -26722, 22654, -15137, -26722, //; w31 w29 w27 w25 - - //row6 - 21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00 - 27969, 11585, 11585, -27969, // ; w07 w05 w03 w01 - 21407, -21407, 21407, 21407, // ; w14 w12 w10 w08 - -11585, 27969, -27969, -11585, // ;w15 w13 w11 w09 - 29692, 16819, 25172, -29692, // ;w22 w20 w18 w16 - 25172, 5906, -5906, -16819, // ;w23 w21 w19 w17 - 16819, 5906, 5906, 25172, // ;w30 w28 w26 w24 - -29692, 25172, -16819, -29692, // ;w31 w29 w27 w25 - - //row7 - 22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00 - 29692, 12299, 12299, -29692, // ; w07 w05 w03 w01 - 22725, -22725, 22725, 22725, //; w14 w12 w10 w08 - -12299, 29692, -29692, -12299, //; w15 w13 w11 w09 - 31521, 17855, 26722, -31521, //; w22 w20 w18 w16 - 26722, 6270, -6270, -17855, //; w23 w21 w19 w17 - 17855, 6270, 6270, 26722, //; w30 w28 w26 w24 - -31521, 26722, -17855, -31521}; // w31 w29 w27 w25 - - -#define INP eax // pointer to (short *blk) -#define OUT ecx // pointer to output (temporary store space qwTemp[]) -#define TABLE ebx // pointer to tab_i_01234567[] + /*row0, this row is required */ + 16384, 16384, 16384, -16384, /* ; movq-> w06 w04 w02 w00 */ + 21407, 8867, 8867, -21407, /* w07 w05 w03 w01 */ + 16384, -16384, 16384, 16384, /*; w14 w12 w10 w08 */ + -8867, 21407, -21407, -8867, /*; w15 w13 w11 w09 */ + 22725, 12873, 19266, -22725, /*; w22 w20 w18 w16 */ + 19266, 4520, -4520, -12873, /*; w23 w21 w19 w17 */ + 12873, 4520, 4520, 19266, /*; w30 w28 w26 w24 */ + -22725, 19266, -12873, -22725, /*w31 w29 w27 w25 */ + + /* the rest of these rows (1-7), aren't used ! */ + + /*row1 */ + 22725, 22725, 22725, -22725, /* ; movq-> w06 w04 w02 w00 */ + 29692, 12299, 12299, -29692, /* ; w07 w05 w03 w01 */ + 22725, -22725, 22725, 22725, /*; w14 w12 w10 w08 */ + -12299, 29692, -29692, -12299, /*; w15 w13 w11 w09 */ + 31521, 17855, 26722, -31521, /*; w22 w20 w18 w16 */ + 26722, 6270, -6270, -17855, /*; w23 w21 w19 w17 */ + 17855, 6270, 6270, 26722, /*; w30 w28 w26 w24 */ + -31521, 26722, -17855, -31521, /* w31 w29 w27 w25 */ + + /*row2 */ + 21407, 21407, 21407, -21407, /* ; movq-> w06 w04 w02 w00 */ + 27969, 11585, 11585, -27969, /* ; w07 w05 w03 w01 */ + 21407, -21407, 21407, 21407, /* ; w14 w12 w10 w08 */ + -11585, 27969, -27969, -11585, /* ;w15 w13 w11 w09 */ + 29692, 16819, 25172, -29692, /* ;w22 w20 w18 w16 */ + 25172, 5906, -5906, -16819, /* ;w23 w21 w19 w17 */ + 16819, 5906, 5906, 25172, /* ;w30 w28 w26 w24 */ + -29692, 25172, -16819, -29692, /* ;w31 w29 w27 w25 */ + + /*row3 */ + 19266, 19266, 19266, -19266, /*; movq-> w06 w04 w02 w00 */ + 25172, 10426, 10426, -25172, /*; w07 w05 w03 w01 */ + 19266, -19266, 19266, 19266, /*; w14 w12 w10 w08 */ + -10426, 25172, -25172, -10426, /*; w15 w13 w11 w09 */ + 26722, 15137, 22654, -26722, /*; w22 w20 w18 w16 */ + 22654, 5315, -5315, -15137, /*; w23 w21 w19 w17 */ + 15137, 5315, 5315, 22654, /*; w30 w28 w26 w24 */ + -26722, 22654, -15137, -26722, /*; w31 w29 w27 w25 */ + + /*row4 */ + 16384, 16384, 16384, -16384, /* ; movq-> w06 w04 w02 w00 */ + 21407, 8867, 8867, -21407, /* w07 w05 w03 w01 */ + 16384, -16384, 16384, 16384, /*; w14 w12 w10 w08 */ + -8867, 21407, -21407, -8867, /*; w15 w13 w11 w09 */ + 22725, 12873, 19266, -22725, /*; w22 w20 w18 w16 */ + 19266, 4520, -4520, -12873, /*; w23 w21 w19 w17 */ + 12873, 4520, 4520, 19266, /*; w30 w28 w26 w24 */ + -22725, 19266, -12873, -22725, /*w31 w29 w27 w25 */ + + /*row5 */ + 19266, 19266, 19266, -19266, /*; movq-> w06 w04 w02 w00 */ + 25172, 10426, 10426, -25172, /*; w07 w05 w03 w01 */ + 19266, -19266, 19266, 19266, /*; w14 w12 w10 w08 */ + -10426, 25172, -25172, -10426, /*; w15 w13 w11 w09 */ + 26722, 15137, 22654, -26722, /*; w22 w20 w18 w16 */ + 22654, 5315, -5315, -15137, /*; w23 w21 w19 w17 */ + 15137, 5315, 5315, 22654, /*; w30 w28 w26 w24 */ + -26722, 22654, -15137, -26722, /*; w31 w29 w27 w25 */ + + /*row6 */ + 21407, 21407, 21407, -21407, /* ; movq-> w06 w04 w02 w00 */ + 27969, 11585, 11585, -27969, /* ; w07 w05 w03 w01 */ + 21407, -21407, 21407, 21407, /* ; w14 w12 w10 w08 */ + -11585, 27969, -27969, -11585, /* ;w15 w13 w11 w09 */ + 29692, 16819, 25172, -29692, /* ;w22 w20 w18 w16 */ + 25172, 5906, -5906, -16819, /* ;w23 w21 w19 w17 */ + 16819, 5906, 5906, 25172, /* ;w30 w28 w26 w24 */ + -29692, 25172, -16819, -29692, /* ;w31 w29 w27 w25 */ + + /*row7 */ + 22725, 22725, 22725, -22725, /* ; movq-> w06 w04 w02 w00 */ + 29692, 12299, 12299, -29692, /* ; w07 w05 w03 w01 */ + 22725, -22725, 22725, 22725, /*; w14 w12 w10 w08 */ + -12299, 29692, -29692, -12299, /*; w15 w13 w11 w09 */ + 31521, 17855, 26722, -31521, /*; w22 w20 w18 w16 */ + 26722, 6270, -6270, -17855, /*; w23 w21 w19 w17 */ + 17855, 6270, 6270, 26722, /*; w30 w28 w26 w24 */ + -31521, 26722, -17855, -31521}; /* w31 w29 w27 w25 */ + + +#define INP eax /* pointer to (short *blk) */ +#define OUT ecx /* pointer to output (temporary store space qwTemp[]) */ +#define TABLE ebx /* pointer to tab_i_01234567[] */ #define round_inv_row edx #define round_inv_col edx -#define ROW_STRIDE 8 // for 8x8 matrix transposer +#define ROW_STRIDE 8 /* for 8x8 matrix transposer */ -// private variables and functions +/* private variables and functions */ -//temporary storage space, 8x8 of shorts +/*temporary storage space, 8x8 of shorts */ -__inline static void idct_mmx32_rows( short *blk ); // transform rows -__inline static void idct_mmx32_cols( short *blk ); // transform "columns" - // the "column" transform actually transforms rows, it is - // identical to the row-transform except for the ROUNDING - // and SHIFTING coefficients. +__inline static void idct_mmx32_rows( short *blk ); /* transform rows */ +__inline static void idct_mmx32_cols( short *blk ); /* transform "columns" */ + /* the "column" transform actually transforms rows, it is */ + /* identical to the row-transform except for the ROUNDING */ + /* and SHIFTING coefficients. */ static void -idct_mmx32_rows( short *blk ) // transform all 8 rows of 8x8 iDCT block +idct_mmx32_rows( short *blk ) /* transform all 8 rows of 8x8 iDCT block */ { int x; short qwTemp[64]; short *out = &qwTemp[0]; short *inptr = blk; - // this subroutine performs two operations - // 1) iDCT row transform - // for( i = 0; i < 8; ++ i) - // DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] ); - // - // 2) transpose the matrix (which was stored in qwTemp[]) - // qwTemp[] -> [8x8 matrix transpose] -> blk[] + /* this subroutine performs two operations */ + /* 1) iDCT row transform */ + /* for( i = 0; i < 8; ++ i) */ + /* DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] ); */ + /* */ + /* 2) transpose the matrix (which was stored in qwTemp[]) */ + /* qwTemp[] -> [8x8 matrix transpose] -> blk[] */ - for (x=0; x<8; x++) { // transform one row per iteration - movq_m2r(*(inptr), mm0); // 0 ; x3 x2 x1 x0 + for (x=0; x<8; x++) { /* transform one row per iteration */ + movq_m2r(*(inptr), mm0); /* 0 ; x3 x2 x1 x0 */ - movq_m2r(*(inptr+4), mm1); // 1 ; x7 x6 x5 x4 - movq_r2r(mm0, mm2); // 2 ; x3 x2 x1 x0 + movq_m2r(*(inptr+4), mm1); /* 1 ; x7 x6 x5 x4 */ + movq_r2r(mm0, mm2); /* 2 ; x3 x2 x1 x0 */ - movq_m2r(*(tab_i_01234567), mm3); // 3 ; w06 w04 w02 w00 - punpcklwd_r2r(mm1, mm0); // x5 x1 x4 x0 + movq_m2r(*(tab_i_01234567), mm3); /* 3 ; w06 w04 w02 w00 */ + punpcklwd_r2r(mm1, mm0); /* x5 x1 x4 x0 */ - // ---------- - movq_r2r(mm0, mm5); // 5 ; x5 x1 x4 x0 - punpckldq_r2r(mm0, mm0); // x4 x0 x4 x0 + /* ---------- */ + movq_r2r(mm0, mm5); /* 5 ; x5 x1 x4 x0 */ + punpckldq_r2r(mm0, mm0); /* x4 x0 x4 x0 */ - movq_m2r(*(tab_i_01234567+4), mm4); // 4 ; w07 w05 w03 w01 - punpckhwd_r2r(mm1, mm2); // 1 ; x7 x3 x6 x2 + movq_m2r(*(tab_i_01234567+4), mm4); /* 4 ; w07 w05 w03 w01 */ + punpckhwd_r2r(mm1, mm2); /* 1 ; x7 x3 x6 x2 */ - pmaddwd_r2r(mm0, mm3); // x4*w06+x0*w04 x4*w02+x0*w00 - movq_r2r(mm2, mm6); // 6 ; x7 x3 x6 x2 + pmaddwd_r2r(mm0, mm3); /* x4*w06+x0*w04 x4*w02+x0*w00 */ + movq_r2r(mm2, mm6); /* 6 ; x7 x3 x6 x2 */ - movq_m2r(*(tab_i_01234567+16), mm1);// 1 ; w22 w20 w18 w16 - punpckldq_r2r(mm2, mm2); // x6 x2 x6 x2 + movq_m2r(*(tab_i_01234567+16), mm1);/* 1 ; w22 w20 w18 w16 */ + punpckldq_r2r(mm2, mm2); /* x6 x2 x6 x2 */ - pmaddwd_r2r(mm2, mm4); // x6*w07+x2*w05 x6*w03+x2*w01 - punpckhdq_r2r(mm5, mm5); // x5 x1 x5 x1 + pmaddwd_r2r(mm2, mm4); /* x6*w07+x2*w05 x6*w03+x2*w01 */ + punpckhdq_r2r(mm5, mm5); /* x5 x1 x5 x1 */ - pmaddwd_m2r(*(tab_i_01234567+8), mm0);// x4*w14+x0*w12 x4*w10+x0*w08 - punpckhdq_r2r(mm6, mm6); // x7 x3 x7 x3 + pmaddwd_m2r(*(tab_i_01234567+8), mm0);/* x4*w14+x0*w12 x4*w10+x0*w08 */ + punpckhdq_r2r(mm6, mm6); /* x7 x3 x7 x3 */ - movq_m2r(*(tab_i_01234567+20), mm7);// 7 ; w23 w21 w19 w17 - pmaddwd_r2r(mm5, mm1); // x5*w22+x1*w20 x5*w18+x1*w16 + movq_m2r(*(tab_i_01234567+20), mm7);/* 7 ; w23 w21 w19 w17 */ + pmaddwd_r2r(mm5, mm1); /* x5*w22+x1*w20 x5*w18+x1*w16 */ - paddd_m2r(*(r_inv_row), mm3);// +rounder - pmaddwd_r2r(mm6, mm7); // x7*w23+x3*w21 x7*w19+x3*w17 + paddd_m2r(*(r_inv_row), mm3);/* +rounder */ + pmaddwd_r2r(mm6, mm7); /* x7*w23+x3*w21 x7*w19+x3*w17 */ - pmaddwd_m2r(*(tab_i_01234567+12), mm2);// x6*w15+x2*w13 x6*w11+x2*w09 - paddd_r2r(mm4, mm3); // 4 ; a1=sum(even1) a0=sum(even0) + pmaddwd_m2r(*(tab_i_01234567+12), mm2);/* x6*w15+x2*w13 x6*w11+x2*w09 */ + paddd_r2r(mm4, mm3); /* 4 ; a1=sum(even1) a0=sum(even0) */ - pmaddwd_m2r(*(tab_i_01234567+24), mm5);// x5*w30+x1*w28 x5*w26+x1*w24 - movq_r2r(mm3, mm4); // 4 ; a1 a0 + pmaddwd_m2r(*(tab_i_01234567+24), mm5);/* x5*w30+x1*w28 x5*w26+x1*w24 */ + movq_r2r(mm3, mm4); /* 4 ; a1 a0 */ - pmaddwd_m2r(*(tab_i_01234567+28), mm6);// x7*w31+x3*w29 x7*w27+x3*w25 - paddd_r2r(mm7, mm1); // 7 ; b1=sum(odd1) b0=sum(odd0) + pmaddwd_m2r(*(tab_i_01234567+28), mm6);/* x7*w31+x3*w29 x7*w27+x3*w25 */ + paddd_r2r(mm7, mm1); /* 7 ; b1=sum(odd1) b0=sum(odd0) */ - paddd_m2r(*(r_inv_row), mm0);// +rounder - psubd_r2r(mm1, mm3); // a1-b1 a0-b0 + paddd_m2r(*(r_inv_row), mm0);/* +rounder */ + psubd_r2r(mm1, mm3); /* a1-b1 a0-b0 */ - psrad_i2r(SHIFT_INV_ROW, mm3); // y6=a1-b1 y7=a0-b0 - paddd_r2r(mm4, mm1); // 4 ; a1+b1 a0+b0 + psrad_i2r(SHIFT_INV_ROW, mm3); /* y6=a1-b1 y7=a0-b0 */ + paddd_r2r(mm4, mm1); /* 4 ; a1+b1 a0+b0 */ - paddd_r2r(mm2, mm0); // 2 ; a3=sum(even3) a2=sum(even2) - psrad_i2r(SHIFT_INV_ROW, mm1); // y1=a1+b1 y0=a0+b0 + paddd_r2r(mm2, mm0); /* 2 ; a3=sum(even3) a2=sum(even2) */ + psrad_i2r(SHIFT_INV_ROW, mm1); /* y1=a1+b1 y0=a0+b0 */ - paddd_r2r(mm6, mm5); // 6 ; b3=sum(odd3) b2=sum(odd2) - movq_r2r(mm0, mm4); // 4 ; a3 a2 + paddd_r2r(mm6, mm5); /* 6 ; b3=sum(odd3) b2=sum(odd2) */ + movq_r2r(mm0, mm4); /* 4 ; a3 a2 */ - paddd_r2r(mm5, mm0); // a3+b3 a2+b2 - psubd_r2r(mm5, mm4); // 5 ; a3-b3 a2-b2 + paddd_r2r(mm5, mm0); /* a3+b3 a2+b2 */ + psubd_r2r(mm5, mm4); /* 5 ; a3-b3 a2-b2 */ - psrad_i2r(SHIFT_INV_ROW, mm4); // y4=a3-b3 y5=a2-b2 - psrad_i2r(SHIFT_INV_ROW, mm0); // y3=a3+b3 y2=a2+b2 + psrad_i2r(SHIFT_INV_ROW, mm4); /* y4=a3-b3 y5=a2-b2 */ + psrad_i2r(SHIFT_INV_ROW, mm0); /* y3=a3+b3 y2=a2+b2 */ - packssdw_r2r(mm3, mm4); // 3 ; y6 y7 y4 y5 + packssdw_r2r(mm3, mm4); /* 3 ; y6 y7 y4 y5 */ - packssdw_r2r(mm0, mm1); // 0 ; y3 y2 y1 y0 - movq_r2r(mm4, mm7); // 7 ; y6 y7 y4 y5 + packssdw_r2r(mm0, mm1); /* 0 ; y3 y2 y1 y0 */ + movq_r2r(mm4, mm7); /* 7 ; y6 y7 y4 y5 */ - psrld_i2r(16, mm4); // 0 y6 0 y4 + psrld_i2r(16, mm4); /* 0 y6 0 y4 */ - movq_r2m(mm1, *(out)); // 1 ; save y3 y2 y1 y0 - pslld_i2r(16, mm7); // y7 0 y5 0 + movq_r2m(mm1, *(out)); /* 1 ; save y3 y2 y1 y0 */ + pslld_i2r(16, mm7); /* y7 0 y5 0 */ - por_r2r(mm4, mm7); // 4 ; y7 y6 y5 y4 + por_r2r(mm4, mm7); /* 4 ; y7 y6 y5 y4 */ - // begin processing row 1 - movq_r2m(mm7, *(out+4)); // 7 ; save y7 y6 y5 y4 + /* begin processing row 1 */ + movq_r2m(mm7, *(out+4)); /* 7 ; save y7 y6 y5 y4 */ inptr += 8; out += 8; } - // done with the iDCT row-transformation + /* done with the iDCT row-transformation */ - // now we have to transpose the output 8x8 matrix - // 8x8 (OUT) -> 8x8't' (IN) - // the transposition is implemented as 4 sub-operations. - // 1) transpose upper-left quad - // 2) transpose lower-right quad - // 3) transpose lower-left quad - // 4) transpose upper-right quad + /* now we have to transpose the output 8x8 matrix */ + /* 8x8 (OUT) -> 8x8't' (IN) */ + /* the transposition is implemented as 4 sub-operations. */ + /* 1) transpose upper-left quad */ + /* 2) transpose lower-right quad */ + /* 3) transpose lower-left quad */ + /* 4) transpose upper-right quad */ - // mm0 = 1st row [ A B C D ] row1 - // mm1 = 2nd row [ E F G H ] 2 - // mm2 = 3rd row [ I J K L ] 3 - // mm3 = 4th row [ M N O P ] 4 + /* mm0 = 1st row [ A B C D ] row1 */ + /* mm1 = 2nd row [ E F G H ] 2 */ + /* mm2 = 3rd row [ I J K L ] 3 */ + /* mm3 = 4th row [ M N O P ] 4 */ - // 1) transpose upper-left quad + /* 1) transpose upper-left quad */ out = &qwTemp[0]; movq_m2r(*(out + ROW_STRIDE * 0), mm0); movq_m2r(*(out + ROW_STRIDE * 1), mm1); - movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + movq_r2r(mm0, mm4); /* mm4 = copy of row1[A B C D] */ movq_m2r(*(out + ROW_STRIDE * 2), mm2); - punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + punpcklwd_r2r(mm1, mm0); /* mm0 = [ 0 4 1 5] */ movq_m2r(*(out + ROW_STRIDE * 3), mm3); - punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] + punpckhwd_r2r(mm1, mm4); /* mm4 = [ 2 6 3 7] */ movq_r2r(mm2, mm6); - punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] + punpcklwd_r2r(mm3, mm2); /* mm2 = [ 8 12 9 13] */ - punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] - movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + punpckhwd_r2r(mm3, mm6); /* mm6 = 10 14 11 15] */ + movq_r2r(mm0, mm1); /* mm1 = [ 0 4 1 5] */ inptr = blk; - punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + punpckldq_r2r(mm2, mm0); /* final result mm0 = row1 [0 4 8 12] */ - movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] - punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] + movq_r2r(mm4, mm3); /* mm3 = [ 2 6 3 7] */ + punpckhdq_r2r(mm2, mm1); /* mm1 = final result mm1 = row2 [1 5 9 13] */ - movq_r2m(mm0, *(inptr + ROW_STRIDE * 0)); // store row 1 - punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + movq_r2m(mm0, *(inptr + ROW_STRIDE * 0)); /* store row 1 */ + punpckldq_r2r(mm6, mm4); /* final result mm4 = row3 [2 6 10 14] */ -// begin reading next quadrant (lower-right) +/* begin reading next quadrant (lower-right) */ movq_m2r(*(out + ROW_STRIDE*4 + 4), mm0); - punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + punpckhdq_r2r(mm6, mm3); /* final result mm3 = row4 [3 7 11 15] */ - movq_r2m(mm4, *(inptr + ROW_STRIDE * 2)); // store row 3 - movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + movq_r2m(mm4, *(inptr + ROW_STRIDE * 2)); /* store row 3 */ + movq_r2r(mm0, mm4); /* mm4 = copy of row1[A B C D] */ - movq_r2m(mm1, *(inptr + ROW_STRIDE * 1)); // store row 2 + movq_r2m(mm1, *(inptr + ROW_STRIDE * 1)); /* store row 2 */ movq_m2r(*(out + ROW_STRIDE*5 + 4), mm1); - movq_r2m(mm3, *(inptr + ROW_STRIDE * 3)); // store row 4 - punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + movq_r2m(mm3, *(inptr + ROW_STRIDE * 3)); /* store row 4 */ + punpcklwd_r2r(mm1, mm0); /* mm0 = [ 0 4 1 5] */ - // 2) transpose lower-right quadrant + /* 2) transpose lower-right quadrant */ -// movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8] +/* movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8] */ -// movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8] -// movq mm4, mm0; // mm4 = copy of row1[A B C D] +/* movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8] */ +/* movq mm4, mm0; // mm4 = copy of row1[A B C D] */ movq_m2r(*(out + ROW_STRIDE*6 + 4), mm2); -// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] - punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] +/* punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] */ + punpckhwd_r2r(mm1, mm4); /* mm4 = [ 2 6 3 7] */ movq_m2r(*(out + ROW_STRIDE*7 + 4), mm3); movq_r2r(mm2, mm6); - punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] - movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + punpcklwd_r2r(mm3, mm2); /* mm2 = [ 8 12 9 13] */ + movq_r2r(mm0, mm1); /* mm1 = [ 0 4 1 5] */ - punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] - movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + punpckhwd_r2r(mm3, mm6); /* mm6 = 10 14 11 15] */ + movq_r2r(mm4, mm3); /* mm3 = [ 2 6 3 7] */ - punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + punpckldq_r2r(mm2, mm0); /* final result mm0 = row1 [0 4 8 12] */ - punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] - ; // slot + punpckhdq_r2r(mm2, mm1); /* mm1 = final result mm1 = row2 [1 5 9 13] */ + ; /* slot */ - movq_r2m(mm0, *(inptr + ROW_STRIDE*4 + 4)); // store row 1 - punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + movq_r2m(mm0, *(inptr + ROW_STRIDE*4 + 4)); /* store row 1 */ + punpckldq_r2r(mm6, mm4); /* final result mm4 = row3 [2 6 10 14] */ movq_m2r(*(out + ROW_STRIDE * 4 ), mm0); - punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + punpckhdq_r2r(mm6, mm3); /* final result mm3 = row4 [3 7 11 15] */ - movq_r2m(mm4, *(inptr + ROW_STRIDE*6 + 4)); // store row 3 - movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + movq_r2m(mm4, *(inptr + ROW_STRIDE*6 + 4)); /* store row 3 */ + movq_r2r(mm0, mm4); /* mm4 = copy of row1[A B C D] */ - movq_r2m(mm1, *(inptr + ROW_STRIDE*5 + 4)); // store row 2 - ; // slot + movq_r2m(mm1, *(inptr + ROW_STRIDE*5 + 4)); /* store row 2 */ + ; /* slot */ movq_m2r(*(out + ROW_STRIDE * 5 ), mm1); - ; // slot + ; /* slot */ - movq_r2m(mm3, *(inptr + ROW_STRIDE*7 + 4)); // store row 4 - punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + movq_r2m(mm3, *(inptr + ROW_STRIDE*7 + 4)); /* store row 4 */ + punpcklwd_r2r(mm1, mm0); /* mm0 = [ 0 4 1 5] */ - // 3) transpose lower-left -// movq mm0, qword ptr [OUT + ROW_STRIDE * 4 ] + /* 3) transpose lower-left */ +/* movq mm0, qword ptr [OUT + ROW_STRIDE * 4 ] */ -// movq mm1, qword ptr [OUT + ROW_STRIDE * 5 ] -// movq mm4, mm0; // mm4 = copy of row1[A B C D] +/* movq mm1, qword ptr [OUT + ROW_STRIDE * 5 ] */ +/* movq mm4, mm0; // mm4 = copy of row1[A B C D] */ movq_m2r(*(out + ROW_STRIDE * 6 ), mm2); -// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] - punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] +/* punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] */ + punpckhwd_r2r(mm1, mm4); /* mm4 = [ 2 6 3 7] */ movq_m2r(*(out + ROW_STRIDE * 7 ), mm3); movq_r2r(mm2, mm6); - punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] - movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + punpcklwd_r2r(mm3, mm2); /* mm2 = [ 8 12 9 13] */ + movq_r2r(mm0, mm1); /* mm1 = [ 0 4 1 5] */ - punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] - movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + punpckhwd_r2r(mm3, mm6); /* mm6 = 10 14 11 15] */ + movq_r2r(mm4, mm3); /* mm3 = [ 2 6 3 7] */ - punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + punpckldq_r2r(mm2, mm0); /* final result mm0 = row1 [0 4 8 12] */ - punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] - ;//slot + punpckhdq_r2r(mm2, mm1); /* mm1 = final result mm1 = row2 [1 5 9 13] */ + ;/*slot */ - movq_r2m(mm0, *(inptr + ROW_STRIDE * 0 + 4 )); // store row 1 - punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + movq_r2m(mm0, *(inptr + ROW_STRIDE * 0 + 4 )); /* store row 1 */ + punpckldq_r2r(mm6, mm4); /* final result mm4 = row3 [2 6 10 14] */ -// begin reading next quadrant (upper-right) +/* begin reading next quadrant (upper-right) */ movq_m2r(*(out + ROW_STRIDE*0 + 4), mm0); - punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + punpckhdq_r2r(mm6, mm3); /* final result mm3 = row4 [3 7 11 15] */ - movq_r2m(mm4, *(inptr + ROW_STRIDE * 2 + 4)); // store row 3 - movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + movq_r2m(mm4, *(inptr + ROW_STRIDE * 2 + 4)); /* store row 3 */ + movq_r2r(mm0, mm4); /* mm4 = copy of row1[A B C D] */ - movq_r2m(mm1, *(inptr + ROW_STRIDE * 1 + 4)); // store row 2 + movq_r2m(mm1, *(inptr + ROW_STRIDE * 1 + 4)); /* store row 2 */ movq_m2r(*(out + ROW_STRIDE*1 + 4), mm1); - movq_r2m(mm3, *(inptr + ROW_STRIDE * 3 + 4)); // store row 4 - punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + movq_r2m(mm3, *(inptr + ROW_STRIDE * 3 + 4)); /* store row 4 */ + punpcklwd_r2r(mm1, mm0); /* mm0 = [ 0 4 1 5] */ - // 2) transpose lower-right quadrant + /* 2) transpose lower-right quadrant */ -// movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8] +/* movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8] */ -// movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8] -// movq mm4, mm0; // mm4 = copy of row1[A B C D] +/* movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8] */ +/* movq mm4, mm0; // mm4 = copy of row1[A B C D] */ movq_m2r(*(out + ROW_STRIDE*2 + 4), mm2); -// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] - punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] +/* punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] */ + punpckhwd_r2r(mm1, mm4); /* mm4 = [ 2 6 3 7] */ movq_m2r(*(out + ROW_STRIDE*3 + 4), mm3); movq_r2r(mm2, mm6); - punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] - movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + punpcklwd_r2r(mm3, mm2); /* mm2 = [ 8 12 9 13] */ + movq_r2r(mm0, mm1); /* mm1 = [ 0 4 1 5] */ - punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] - movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + punpckhwd_r2r(mm3, mm6); /* mm6 = 10 14 11 15] */ + movq_r2r(mm4, mm3); /* mm3 = [ 2 6 3 7] */ - punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + punpckldq_r2r(mm2, mm0); /* final result mm0 = row1 [0 4 8 12] */ - punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] - ; // slot + punpckhdq_r2r(mm2, mm1); /* mm1 = final result mm1 = row2 [1 5 9 13] */ + ; /* slot */ - movq_r2m(mm0, *(inptr + ROW_STRIDE*4)); // store row 1 - punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + movq_r2m(mm0, *(inptr + ROW_STRIDE*4)); /* store row 1 */ + punpckldq_r2r(mm6, mm4); /* final result mm4 = row3 [2 6 10 14] */ - movq_r2m(mm1, *(inptr + ROW_STRIDE*5)); // store row 2 - punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + movq_r2m(mm1, *(inptr + ROW_STRIDE*5)); /* store row 2 */ + punpckhdq_r2r(mm6, mm3); /* final result mm3 = row4 [3 7 11 15] */ - movq_r2m(mm4, *(inptr + ROW_STRIDE*6)); // store row 3 - ; // slot + movq_r2m(mm4, *(inptr + ROW_STRIDE*6)); /* store row 3 */ + ; /* slot */ - movq_r2m(mm3, *(inptr + ROW_STRIDE*7)); // store row 4 - ; // slot + movq_r2m(mm3, *(inptr + ROW_STRIDE*7)); /* store row 4 */ + ; /* slot */ } static void -idct_mmx32_cols( short *blk ) // transform all 8 cols of 8x8 iDCT block +idct_mmx32_cols( short *blk ) /* transform all 8 cols of 8x8 iDCT block */ { int x; short *inptr = blk; - // Despite the function's name, the matrix is transformed - // row by row. This function is identical to idct_mmx32_rows(), - // except for the SHIFT amount and ROUND_INV amount. + /* Despite the function's name, the matrix is transformed */ + /* row by row. This function is identical to idct_mmx32_rows(), */ + /* except for the SHIFT amount and ROUND_INV amount. */ - // this subroutine performs two operations - // 1) iDCT row transform - // for( i = 0; i < 8; ++ i) - // DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] ); - // - // 2) transpose the matrix (which was stored in qwTemp[]) - // qwTemp[] -> [8x8 matrix transpose] -> blk[] + /* this subroutine performs two operations */ + /* 1) iDCT row transform */ + /* for( i = 0; i < 8; ++ i) */ + /* DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] ); */ + /* */ + /* 2) transpose the matrix (which was stored in qwTemp[]) */ + /* qwTemp[] -> [8x8 matrix transpose] -> blk[] */ - for (x=0; x<8; x++) { // transform one row per iteration + for (x=0; x<8; x++) { /* transform one row per iteration */ - movq_m2r(*(inptr), mm0); // 0 ; x3 x2 x1 x0 + movq_m2r(*(inptr), mm0); /* 0 ; x3 x2 x1 x0 */ - movq_m2r(*(inptr+4), mm1); // 1 ; x7 x6 x5 x4 - movq_r2r(mm0, mm2); // 2 ; x3 x2 x1 x0 + movq_m2r(*(inptr+4), mm1); /* 1 ; x7 x6 x5 x4 */ + movq_r2r(mm0, mm2); /* 2 ; x3 x2 x1 x0 */ - movq_m2r(*(tab_i_01234567), mm3); // 3 ; w06 w04 w02 w00 - punpcklwd_r2r(mm1, mm0); // x5 x1 x4 x0 + movq_m2r(*(tab_i_01234567), mm3); /* 3 ; w06 w04 w02 w00 */ + punpcklwd_r2r(mm1, mm0); /* x5 x1 x4 x0 */ -// ---------- - movq_r2r(mm0, mm5); // 5 ; x5 x1 x4 x0 - punpckldq_r2r(mm0, mm0); // x4 x0 x4 x0 +/* ---------- */ + movq_r2r(mm0, mm5); /* 5 ; x5 x1 x4 x0 */ + punpckldq_r2r(mm0, mm0); /* x4 x0 x4 x0 */ - movq_m2r(*(tab_i_01234567+4), mm4); // 4 ; w07 w05 w03 w01 - punpckhwd_r2r(mm1, mm2); // 1 ; x7 x3 x6 x2 + movq_m2r(*(tab_i_01234567+4), mm4); /* 4 ; w07 w05 w03 w01 */ + punpckhwd_r2r(mm1, mm2); /* 1 ; x7 x3 x6 x2 */ - pmaddwd_r2r(mm0, mm3); // x4*w06+x0*w04 x4*w02+x0*w00 - movq_r2r(mm2, mm6); // 6 ; x7 x3 x6 x2 + pmaddwd_r2r(mm0, mm3); /* x4*w06+x0*w04 x4*w02+x0*w00 */ + movq_r2r(mm2, mm6); /* 6 ; x7 x3 x6 x2 */ - movq_m2r(*(tab_i_01234567+16), mm1);// 1 ; w22 w20 w18 w16 - punpckldq_r2r(mm2, mm2); // x6 x2 x6 x2 + movq_m2r(*(tab_i_01234567+16), mm1);/* 1 ; w22 w20 w18 w16 */ + punpckldq_r2r(mm2, mm2); /* x6 x2 x6 x2 */ - pmaddwd_r2r(mm2, mm4); // x6*w07+x2*w05 x6*w03+x2*w01 - punpckhdq_r2r(mm5, mm5); // x5 x1 x5 x1 + pmaddwd_r2r(mm2, mm4); /* x6*w07+x2*w05 x6*w03+x2*w01 */ + punpckhdq_r2r(mm5, mm5); /* x5 x1 x5 x1 */ - pmaddwd_m2r(*(tab_i_01234567+8), mm0);// x4*w14+x0*w12 x4*w10+x0*w08 - punpckhdq_r2r(mm6, mm6); // x7 x3 x7 x3 + pmaddwd_m2r(*(tab_i_01234567+8), mm0);/* x4*w14+x0*w12 x4*w10+x0*w08 */ + punpckhdq_r2r(mm6, mm6); /* x7 x3 x7 x3 */ - movq_m2r(*(tab_i_01234567+20), mm7);// 7 ; w23 w21 w19 w17 - pmaddwd_r2r(mm5, mm1); // x5*w22+x1*w20 x5*w18+x1*w16 + movq_m2r(*(tab_i_01234567+20), mm7);/* 7 ; w23 w21 w19 w17 */ + pmaddwd_r2r(mm5, mm1); /* x5*w22+x1*w20 x5*w18+x1*w16 */ - paddd_m2r(*(r_inv_col), mm3);// +rounder - pmaddwd_r2r(mm6, mm7); // x7*w23+x3*w21 x7*w19+x3*w17 + paddd_m2r(*(r_inv_col), mm3);/* +rounder */ + pmaddwd_r2r(mm6, mm7); /* x7*w23+x3*w21 x7*w19+x3*w17 */ - pmaddwd_m2r(*(tab_i_01234567+12), mm2);// x6*w15+x2*w13 x6*w11+x2*w09 - paddd_r2r(mm4, mm3); // 4 ; a1=sum(even1) a0=sum(even0) + pmaddwd_m2r(*(tab_i_01234567+12), mm2);/* x6*w15+x2*w13 x6*w11+x2*w09 */ + paddd_r2r(mm4, mm3); /* 4 ; a1=sum(even1) a0=sum(even0) */ - pmaddwd_m2r(*(tab_i_01234567+24), mm5);// x5*w30+x1*w28 x5*w26+x1*w24 - movq_r2r(mm3, mm4); // 4 ; a1 a0 + pmaddwd_m2r(*(tab_i_01234567+24), mm5);/* x5*w30+x1*w28 x5*w26+x1*w24 */ + movq_r2r(mm3, mm4); /* 4 ; a1 a0 */ - pmaddwd_m2r(*(tab_i_01234567+28), mm6);// x7*w31+x3*w29 x7*w27+x3*w25 - paddd_r2r(mm7, mm1); // 7 ; b1=sum(odd1) b0=sum(odd0) + pmaddwd_m2r(*(tab_i_01234567+28), mm6);/* x7*w31+x3*w29 x7*w27+x3*w25 */ + paddd_r2r(mm7, mm1); /* 7 ; b1=sum(odd1) b0=sum(odd0) */ - paddd_m2r(*(r_inv_col), mm0);// +rounder - psubd_r2r(mm1, mm3); // a1-b1 a0-b0 + paddd_m2r(*(r_inv_col), mm0);/* +rounder */ + psubd_r2r(mm1, mm3); /* a1-b1 a0-b0 */ - psrad_i2r(SHIFT_INV_COL, mm3); // y6=a1-b1 y7=a0-b0 - paddd_r2r(mm4, mm1); // 4 ; a1+b1 a0+b0 + psrad_i2r(SHIFT_INV_COL, mm3); /* y6=a1-b1 y7=a0-b0 */ + paddd_r2r(mm4, mm1); /* 4 ; a1+b1 a0+b0 */ - paddd_r2r(mm2, mm0); // 2 ; a3=sum(even3) a2=sum(even2) - psrad_i2r(SHIFT_INV_COL, mm1); // y1=a1+b1 y0=a0+b0 + paddd_r2r(mm2, mm0); /* 2 ; a3=sum(even3) a2=sum(even2) */ + psrad_i2r(SHIFT_INV_COL, mm1); /* y1=a1+b1 y0=a0+b0 */ - paddd_r2r(mm6, mm5); // 6 ; b3=sum(odd3) b2=sum(odd2) - movq_r2r(mm0, mm4); // 4 ; a3 a2 + paddd_r2r(mm6, mm5); /* 6 ; b3=sum(odd3) b2=sum(odd2) */ + movq_r2r(mm0, mm4); /* 4 ; a3 a2 */ - paddd_r2r(mm5, mm0); // a3+b3 a2+b2 - psubd_r2r(mm5, mm4); // 5 ; a3-b3 a2-b2 + paddd_r2r(mm5, mm0); /* a3+b3 a2+b2 */ + psubd_r2r(mm5, mm4); /* 5 ; a3-b3 a2-b2 */ - psrad_i2r(SHIFT_INV_COL, mm4); // y4=a3-b3 y5=a2-b2 - psrad_i2r(SHIFT_INV_COL, mm0); // y3=a3+b3 y2=a2+b2 + psrad_i2r(SHIFT_INV_COL, mm4); /* y4=a3-b3 y5=a2-b2 */ + psrad_i2r(SHIFT_INV_COL, mm0); /* y3=a3+b3 y2=a2+b2 */ - packssdw_r2r(mm3, mm4); // 3 ; y6 y7 y4 y5 + packssdw_r2r(mm3, mm4); /* 3 ; y6 y7 y4 y5 */ - packssdw_r2r(mm0, mm1); // 0 ; y3 y2 y1 y0 - movq_r2r(mm4, mm7); // 7 ; y6 y7 y4 y5 + packssdw_r2r(mm0, mm1); /* 0 ; y3 y2 y1 y0 */ + movq_r2r(mm4, mm7); /* 7 ; y6 y7 y4 y5 */ - psrld_i2r(16, mm4); // 0 y6 0 y4 + psrld_i2r(16, mm4); /* 0 y6 0 y4 */ - movq_r2m(mm1, *(inptr)); // 1 ; save y3 y2 y1 y0 - pslld_i2r(16, mm7); // y7 0 y5 0 + movq_r2m(mm1, *(inptr)); /* 1 ; save y3 y2 y1 y0 */ + pslld_i2r(16, mm7); /* y7 0 y5 0 */ - por_r2r(mm4, mm7); // 4 ; y7 y6 y5 y4 + por_r2r(mm4, mm7); /* 4 ; y7 y6 y5 y4 */ - // begin processing row 1 - movq_r2m(mm7, *(inptr+4)); // 7 ; save y7 y6 y5 y4 + /* begin processing row 1 */ + movq_r2m(mm7, *(inptr+4)); /* 7 ; save y7 y6 y5 y4 */ inptr += 8; } - // done with the iDCT column-transformation + /* done with the iDCT column-transformation */ } -// -// public interface to MMX32 IDCT 8x8 operation -// +/* */ +/* public interface to MMX32 IDCT 8x8 operation */ +/* */ void gst_idct_mmx32_idct( short *blk ) { - // 1) iDCT row transformation - idct_mmx32_rows( blk ); // 1) transform iDCT row, and transpose + /* 1) iDCT row transformation */ + idct_mmx32_rows( blk ); /* 1) transform iDCT row, and transpose */ - // 2) iDCT column transformation - idct_mmx32_cols( blk ); // 2) transform iDCT row, and transpose + /* 2) iDCT column transformation */ + idct_mmx32_cols( blk ); /* 2) transform iDCT row, and transpose */ - emms(); // restore processor state - // all done + emms(); /* restore processor state */ + /* all done */ } diff --git a/gst-libs/gst/resample/Makefile.am b/gst-libs/gst/resample/Makefile.am index c1def79d..f7056789 100644 --- a/gst-libs/gst/resample/Makefile.am +++ b/gst-libs/gst/resample/Makefile.am @@ -13,7 +13,7 @@ endif endif libgstresample_la_SOURCES = dtos.c functable.c resample.c resample.h -libgstresample_la_LIBADD = $(GST_LIBS) +libgstresample_la_LIBADD = libgstresample_la_CFLAGS = $(GST_CFLAGS) -ffast-math $(ARCHCFLAGS) libgstresample_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) diff --git a/gst-libs/gst/resample/dtos.c b/gst-libs/gst/resample/dtos.c index 7762595f..96bfe41d 100644 --- a/gst-libs/gst/resample/dtos.c +++ b/gst-libs/gst/resample/dtos.c @@ -23,13 +23,13 @@ #include #include -//#include +/*#include */ #include #define short_to_double_table -//#define short_to_double_altivec +/*#define short_to_double_altivec */ #define short_to_double_unroll #ifdef short_to_double_table diff --git a/gst-libs/gst/resample/functable.c b/gst-libs/gst/resample/functable.c index d61efca4..c225958e 100644 --- a/gst-libs/gst/resample/functable.c +++ b/gst-libs/gst/resample/functable.c @@ -124,12 +124,12 @@ double functable_eval(functable_t *t,double x) w0 = (x - 2 * x2 + x3) * t->offset; w1 = (-x2 + x3) * t->offset; - //printf("i=%d x=%g f0=%g f1=%g w0=%g w1=%g\n",i,x,f0,f1,w0,w1); + /*printf("i=%d x=%g f0=%g f1=%g w0=%g w1=%g\n",i,x,f0,f1,w0,w1); */ w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1; - //w = t->fx[i] * (1-x) + t->fx[i+1] * x; + /*w = t->fx[i] * (1-x) + t->fx[i+1] * x; */ return w; } @@ -266,37 +266,37 @@ void functable_fir2_altivec(functable_t *t, float *r0, float *r1, sum0 = 0; sum1 = 0; for(j=0;jfx, t->fdx needs to be multiplexed by n - // we need 5 consecutive floats, which fit into 2 vecs - // load v0, t->fx[i] - // load v1, t->fx[i+n] - // v2 = v0 (not correct) - // v3 = (v0>>32) || (v1<<3*32) (not correct) - // - // load v4, t->dfx[i] - // load v5, t->dfx[i+n] - // v6 = v4 (not correct) - // v7 = (v4>>32) || (v5<<3*32) (not correct) - // - // v8 = splat(f0) - // v9 = splat(f1) - // v10 = splat(w0) - // v11 = splat(w1) - // - // v12 = v2 * v8 - // v12 += v3 * v9 - // v12 += v6 * v10 - // v12 += v7 * v11 + /* t->fx, t->fdx needs to be multiplexed by n */ + /* we need 5 consecutive floats, which fit into 2 vecs */ + /* load v0, t->fx[i] */ + /* load v1, t->fx[i+n] */ + /* v2 = v0 (not correct) */ + /* v3 = (v0>>32) || (v1<<3*32) (not correct) */ + /* */ + /* load v4, t->dfx[i] */ + /* load v5, t->dfx[i+n] */ + /* v6 = v4 (not correct) */ + /* v7 = (v4>>32) || (v5<<3*32) (not correct) */ + /* */ + /* v8 = splat(f0) */ + /* v9 = splat(f1) */ + /* v10 = splat(w0) */ + /* v11 = splat(w1) */ + /* */ + /* v12 = v2 * v8 */ + /* v12 += v3 * v9 */ + /* v12 += v6 * v10 */ + /* v12 += v7 * v11 */ w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1; - // v13 = data[j*2] - // v14 = data[j*2+4] - // v15 = deinterlace_high(v13,v14) - // v16 = deinterlace_low(v13,v14) - // (sum0) v17 += multsum(v13,v15) - // (sum1) v18 += multsum(v14,v16) + /* v13 = data[j*2] */ + /* v14 = data[j*2+4] */ + /* v15 = deinterlace_high(v13,v14) */ + /* v16 = deinterlace_low(v13,v14) */ + /* (sum0) v17 += multsum(v13,v15) */ + /* (sum1) v18 += multsum(v14,v16) */ sum0 += data[j*2] * w; sum1 += data[j*2+1] * w; diff --git a/gst-libs/gst/resample/resample.c b/gst-libs/gst/resample/resample.c index cedb874e..286cb84b 100644 --- a/gst-libs/gst/resample/resample.c +++ b/gst-libs/gst/resample/resample.c @@ -243,7 +243,7 @@ void resample_bilinear(resample_t * r) b = r->i_start; for (i = 0; i < r->i_samples; i++) { b += r->i_inc; - //printf("in %d\n",i_ptr[0]); + /*printf("in %d\n",i_ptr[0]); */ if(b>=2){ printf("not expecting b>=2\n"); } @@ -252,7 +252,7 @@ void resample_bilinear(resample_t * r) acc1 += (1.0 - (b-r->i_inc)) * i_ptr[1]; o_ptr[0] = rint(acc0); - //printf("out %d\n",o_ptr[0]); + /*printf("out %d\n",o_ptr[0]); */ o_ptr[1] = rint(acc1); o_ptr += 2; o_count++; @@ -319,8 +319,8 @@ void resample_sinc_slow(resample_t * r) c1 = 0; for (j = 0; j < r->filter_length; j++) { weight = (x==0)?1:(sinx/x); -//printf("j %d sin %g cos %g\n",j,sinx,cosx); -//printf("j %d sin %g x %g sinc %g\n",j,sinx,x,weight); +/*printf("j %d sin %g cos %g\n",j,sinx,cosx); */ +/*printf("j %d sin %g x %g sinc %g\n",j,sinx,x,weight); */ c0 += weight * GETBUF((start + j), 0); c1 += weight * GETBUF((start + j), 1); t = cosx * cosd - sinx * sind; @@ -368,11 +368,11 @@ void resample_sinc(resample_t * r) for (i = 0; i < r->o_samples; i++) { a = r->o_start + i * r->o_inc; start = floor(a - r->halftaps); -//printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1); +/*printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1); */ center = a; - //x = M_PI * (start - center) * r->o_inc; - //d = M_PI * r->o_inc; - //x = (start - center) * r->o_inc; + /*x = M_PI * (start - center) * r->o_inc; */ + /*d = M_PI * r->o_inc; */ + /*x = (start - center) * r->o_inc; */ x0 = (start - center) * r->o_inc; d = r->o_inc; c0 = 0; @@ -439,20 +439,20 @@ static void resample_sinc_ft(resample_t * r) double *ptr; signed short *o_ptr; int i; - //int j; + /*int j; */ double c0, c1; - //double a; + /*double a; */ double start_f, start_x; int start; double center; - //double weight; + /*double weight; */ double x, d; double scale; int n = 4; - scale = r->i_inc; // cutoff at 22050 - //scale = 1.0; // cutoff at 24000 - //scale = r->i_inc * 0.5; // cutoff at 11025 + scale = r->i_inc; /* cutoff at 22050 */ + /*scale = 1.0; // cutoff at 24000 */ + /*scale = r->i_inc * 0.5; // cutoff at 11025 */ if(!ft){ ft = malloc(sizeof(*ft)); @@ -472,7 +472,7 @@ static void resample_sinc_ft(resample_t * r) functable_init(ft); - //printf("len=%d offset=%g start=%g\n",ft->len,ft->offset,ft->start); + /*printf("len=%d offset=%g start=%g\n",ft->len,ft->offset,ft->start); */ } ptr = r->buffer; @@ -484,18 +484,18 @@ static void resample_sinc_ft(resample_t * r) start_x -= start_f; start = start_f; for (i = 0; i < r->o_samples; i++) { - //start_f = floor(center - r->halftaps); -//printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1); + /*start_f = floor(center - r->halftaps); */ +/*printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1); */ x = start_f - center; d = 1; c0 = 0; c1 = 0; -//#define slow +/*#define slow */ #ifdef slow for (j = 0; j < r->filter_length; j++) { weight = functable_eval(ft,x)*scale; - //weight = sinc(M_PI * scale * x)*scale*r->i_inc; - //weight *= window_func(x / r->halftaps); + /*weight = sinc(M_PI * scale * x)*scale*r->i_inc; */ + /*weight *= window_func(x / r->halftaps); */ c0 += weight * ptr[(start + j + r->filter_length)*2 + 0]; c1 += weight * ptr[(start + j + r->filter_length)*2 + 1]; x += d; diff --git a/gst-libs/gst/resample/test.c b/gst-libs/gst/resample/test.c index 44d19a65..f268a592 100644 --- a/gst-libs/gst/resample/test.c +++ b/gst-libs/gst/resample/test.c @@ -9,11 +9,11 @@ #define AMP 16000 #define I_RATE 48000 #define O_RATE 44100 -//#define O_RATE 24000 +/*#define O_RATE 24000 */ -//#define test_func(x) 1 -//#define test_func(x) sin(2*M_PI*(x)*10) -//#define test_func(x) sin(2*M_PI*(x)*(x)*1000) +/*#define test_func(x) 1 */ +/*#define test_func(x) sin(2*M_PI*(x)*10) */ +/*#define test_func(x) sin(2*M_PI*(x)*(x)*1000) */ #define test_func(x) sin(2*M_PI*(x)*(x)*12000) short i_buf[I_RATE*2*2]; @@ -53,7 +53,7 @@ struct timeval start_time; void start_timer(void) { gettimeofday(&start_time,NULL); - //printf("start %ld.%06ld\n",start_time.tv_sec,start_time.tv_usec); + /*printf("start %ld.%06ld\n",start_time.tv_sec,start_time.tv_usec); */ } void end_timer(void) @@ -62,7 +62,7 @@ void end_timer(void) double diff; gettimeofday(&end_time,NULL); - //printf("end %ld.%06ld\n",end_time.tv_sec,end_time.tv_usec); + /*printf("end %ld.%06ld\n",end_time.tv_sec,end_time.tv_usec); */ diff = (end_time.tv_sec - start_time.tv_sec) + 1e-6*(end_time.tv_usec - start_time.tv_usec); @@ -81,7 +81,7 @@ void test_res1(void) for(i=0;ii_rate = I_RATE; r->o_rate = O_RATE; - //r->method = RESAMPLE_SINC_SLOW; + /*r->method = RESAMPLE_SINC_SLOW; */ r->method = RESAMPLE_SINC; r->channels = 2; - //r->verbose = 1; + /*r->verbose = 1; */ r->filter_length = 64; r->get_buffer = get_buffer; @@ -115,7 +115,7 @@ void test_res1(void) for(i=0;ii_rate = I_RATE; r->o_rate = O_RATE; - //r->method = RESAMPLE_SINC_SLOW; + /*r->method = RESAMPLE_SINC_SLOW; */ r->method = RESAMPLE_SINC; r->channels = 1; - //r->verbose = 1; + /*r->verbose = 1; */ r->filter_length = 64; r->get_buffer = get_buffer; @@ -322,7 +322,7 @@ void test_res7(void) for(i=0;i -//#define debug(format,args...) g_print(format,##args) +/*#define debug(format,args...) g_print(format,##args) */ #define debug(format,args...) @@ -69,33 +69,33 @@ gint gst_riff_next_buffer(GstRiff *riff,GstBuffer *buf,gulong off) { GST_BUFFER_DATA(buf) = riff->dataleft; GST_BUFFER_SIZE(buf) = newsize; off -= riff->dataleft_size; - //last -= riff->dataleft_size; + /*last -= riff->dataleft_size; */ riff->dataleft = NULL; } if (off == 0) { guint32 *words = (guin32 *)GST_BUFFER_DATA(buf); - // don't even try to parse the head if it's not there FIXME + /* don't even try to parse the head if it's not there FIXME */ if (last < 12) { riff->state = GST_RIFF_ENOTRIFF; return riff->state; } - //g_print("testing is 0x%08lx '%s'\n",words[0],gst_riff_id_to_fourcc(words[0])); + /*g_print("testing is 0x%08lx '%s'\n",words[0],gst_riff_id_to_fourcc(words[0])); */ /* verify this is a valid RIFF file, first of all */ if (GUINT32_FROM_LE (words[0]) != GST_RIFF_TAG_RIFF) { riff->state = GST_RIFF_ENOTRIFF; return riff->state; } riff->form = GUINT32_FROM_LE (words[2]); - //g_print("form is 0x%08lx '%s'\n",words[2],gst_riff_id_to_fourcc(words[2])); + /*g_print("form is 0x%08lx '%s'\n",words[2],gst_riff_id_to_fourcc(words[2])); */ riff->nextlikely = 12; /* skip 'RIFF', length, and form */ - // all OK here + /* all OK here */ riff->incomplete_chunk = NULL; } - // if we have an incomplete chunk from the previous buffer + /* if we have an incomplete chunk from the previous buffer */ if (riff->incomplete_chunk) { guint leftover; debug("have incomplete chunk %08x filled\n", riff->incomplete_chunk_size); @@ -135,14 +135,14 @@ gint gst_riff_next_buffer(GstRiff *riff,GstBuffer *buf,gulong off) { while ((riff->nextlikely+12) <= last) { guin32 *words = (guin32 *)((guchar *)GST_BUFFER_DATA(buf) + riff->nextlikely - off ); - // loop over all of the chunks to check which one is finished + /* loop over all of the chunks to check which one is finished */ while (riff->chunks) { chunk = g_list_nth_data(riff->chunks, 0); debug("next 0x%08x offset 0x%08lx size 0x%08x\n",riff->nextlikely, chunk->offset, chunk->size); if (riff->nextlikely >= chunk->offset+chunk->size) { - //g_print("found END LIST\n"); - // we have the end of the chunk on the stack, remove it + /*g_print("found END LIST\n"); */ + /* we have the end of the chunk on the stack, remove it */ riff->chunks = g_list_remove(riff->chunks, chunk); } else break; @@ -157,17 +157,17 @@ gint gst_riff_next_buffer(GstRiff *riff,GstBuffer *buf,gulong off) { chunk->id = GUINT32_FROM_LE (words[0])); chunk->size = GUINT32_FROM_LE (words[1]); chunk->data = (gchar *)(words+2); - // we need word alignment - //if (chunk->size & 0x01) chunk->size++; + /* we need word alignment */ + /*if (chunk->size & 0x01) chunk->size++; */ chunk->form = GUINT32_FROM_LE (words[2]); /* fill in the form, might not be valid */ if (chunk->id == GST_RIFF_TAG_LIST) { - //g_print("found LIST %s\n", gst_riff_id_to_fourcc(chunk->form)); + /*g_print("found LIST %s\n", gst_riff_id_to_fourcc(chunk->form)); */ riff->nextlikely += 12; - // we push the list chunk on our 'stack' + /* we push the list chunk on our 'stack' */ riff->chunks = g_list_prepend(riff->chunks,chunk); - // send the buffer to the listener if we have received a function + /* send the buffer to the listener if we have received a function */ if (riff->new_tag_found) { riff->new_tag_found(chunk, riff->callback_data); } @@ -178,25 +178,25 @@ gint gst_riff_next_buffer(GstRiff *riff,GstBuffer *buf,gulong off) { gst_riff_id_to_fourcc(GUINT32_FROM_LE (words[0])), GUINT32_FROM_LE (words[1])); riff->nextlikely += 8 + chunk->size; /* doesn't include hdr */ - // if this buffer is incomplete + /* if this buffer is incomplete */ if (riff->nextlikely > last) { guint left = size - (riff->nextlikely - chunk->size - off); - //g_print("make incomplete buffer %08x\n", left); + /*g_print("make incomplete buffer %08x\n", left); */ chunk->data = g_malloc(chunk->size); memcpy(chunk->data, (gchar *)(words+2), left); riff->incomplete_chunk = chunk; riff->incomplete_chunk_size = left; } else { - // send the buffer to the listener if we have received a function + /* send the buffer to the listener if we have received a function */ if (riff->new_tag_found) { riff->new_tag_found(chunk, riff->callback_data); } g_free(chunk); } - //riff->chunks = g_list_prepend(riff->chunks,chunk); + /*riff->chunks = g_list_prepend(riff->chunks,chunk); */ } } diff --git a/gst-libs/gst/riff/riffencode.c b/gst-libs/gst/riff/riffencode.c index a6caed9e..febf2599 100644 --- a/gst-libs/gst/riff/riffencode.c +++ b/gst-libs/gst/riff/riffencode.c @@ -19,7 +19,7 @@ #include -//#define DEBUG_ENABLED +/*#define DEBUG_ENABLED */ #include "riff.h" #define GST_RIFF_ENCODER_BUF_SIZE 1024 diff --git a/gst-libs/gst/riff/riffparse.c b/gst-libs/gst/riff/riffparse.c index eb4746fc..c7fa3f75 100644 --- a/gst-libs/gst/riff/riffparse.c +++ b/gst-libs/gst/riff/riffparse.c @@ -19,7 +19,7 @@ #include -//#define DEBUG_ENABLED +/*#define DEBUG_ENABLED */ #include GstRiff* @@ -71,33 +71,33 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) GST_BUFFER_DATA(buf) = riff->dataleft; size = GST_BUFFER_SIZE(buf) = newsize; off -= riff->dataleft_size; - //last -= riff->dataleft_size; + /*last -= riff->dataleft_size; */ riff->dataleft = NULL; } if (off == 0) { guint32 *words = (guint32 *)GST_BUFFER_DATA(buf); - // don't even try to parse the head if it's not there FIXME + /* don't even try to parse the head if it's not there FIXME */ if (last < 12) { riff->state = GST_RIFF_ENOTRIFF; return riff->state; } - //g_print("testing is 0x%08lx '%s'\n",words[0],gst_riff_id_to_fourcc(words[0])); + /*g_print("testing is 0x%08lx '%s'\n",words[0],gst_riff_id_to_fourcc(words[0])); */ /* verify this is a valid RIFF file, first of all */ if (GUINT32_FROM_LE (words[0]) != GST_RIFF_TAG_RIFF) { riff->state = GST_RIFF_ENOTRIFF; return riff->state; } riff->form = GUINT32_FROM_LE (words[2]); - //g_print("form is 0x%08lx '%s'\n",words[2],gst_riff_id_to_fourcc(words[2])); + /*g_print("form is 0x%08lx '%s'\n",words[2],gst_riff_id_to_fourcc(words[2])); */ riff->nextlikely = 12; /* skip 'RIFF', length, and form */ - // all OK here + /* all OK here */ riff->incomplete_chunk = NULL; } - // if we have an incomplete chunk from the previous buffer + /* if we have an incomplete chunk from the previous buffer */ if (riff->incomplete_chunk) { guint leftover; GST_DEBUG (0,"gst_riff_parser: have incomplete chunk %08x filled\n", riff->incomplete_chunk_size); @@ -130,7 +130,7 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) while ((riff->nextlikely+12) <= last) { guint32 *words = (guint32 *)((guchar *)GST_BUFFER_DATA(buf) + riff->nextlikely - off ); - // loop over all of the chunks to check which one is finished + /* loop over all of the chunks to check which one is finished */ while (riff->chunks) { chunk = g_list_nth_data(riff->chunks, 0); @@ -138,7 +138,7 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) chunk->offset, chunk->size); if (riff->nextlikely >= chunk->offset+chunk->size) { GST_DEBUG (0,"gst_riff_parser: found END LIST\n"); - // we have the end of the chunk on the stack, remove it + /* we have the end of the chunk on the stack, remove it */ riff->chunks = g_list_remove(riff->chunks, chunk); } else break; @@ -153,17 +153,17 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) chunk->id = GUINT32_FROM_LE (words[0]); chunk->size = GUINT32_FROM_LE (words[1]); chunk->data = (gchar *)(words+2); - // we need word alignment - //if (chunk->size & 0x01) chunk->size++; + /* we need word alignment */ + /*if (chunk->size & 0x01) chunk->size++; */ chunk->form = GUINT32_FROM_LE (words[2]); /* fill in the form, might not be valid */ if (chunk->id == GST_RIFF_TAG_LIST) { GST_DEBUG (0,"found LIST %s\n", gst_riff_id_to_fourcc(chunk->form)); riff->nextlikely += 12; - // we push the list chunk on our 'stack' + /* we push the list chunk on our 'stack' */ riff->chunks = g_list_prepend(riff->chunks,chunk); - // send the buffer to the listener if we have received a function + /* send the buffer to the listener if we have received a function */ if (riff->new_tag_found) { riff->new_tag_found(chunk, riff->callback_data); } @@ -175,7 +175,7 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) gst_riff_id_to_fourcc(GUINT32_FROM_LE (words[0])), GUINT32_FROM_LE (words[1])); riff->nextlikely += 8 + chunk->size; /* doesn't include hdr */ - // if this buffer is incomplete + /* if this buffer is incomplete */ if (riff->nextlikely > last) { guint left = size - (riff->nextlikely - chunk->size - off); @@ -186,7 +186,7 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) riff->incomplete_chunk_size = left; } else { - // send the buffer to the listener if we have received a function + /* send the buffer to the listener if we have received a function */ if (riff->new_tag_found) { riff->new_tag_found(chunk, riff->callback_data); } @@ -194,7 +194,7 @@ gst_riff_parser_next_buffer (GstRiff *riff, GstBuffer *buf, gulong off) } if (riff->nextlikely & 0x01) riff->nextlikely++; - //riff->chunks = g_list_prepend(riff->chunks,chunk); + /*riff->chunks = g_list_prepend(riff->chunks,chunk); */ } } if ((riff->nextlikely+12) > last && !riff->incomplete_chunk) { diff --git a/gst-libs/gst/riff/riffutil.c b/gst-libs/gst/riff/riffutil.c index ca01f6e6..9005279f 100644 --- a/gst-libs/gst/riff/riffutil.c +++ b/gst-libs/gst/riff/riffutil.c @@ -20,7 +20,7 @@ #include -//#define debug(format,args...) g_print(format,##args) +/*#define debug(format,args...) g_print(format,##args) */ #define debug(format,args...) -- cgit v1.2.1