summaryrefslogtreecommitdiffstats
path: root/ext/hermes/yuv2rgb.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/hermes/yuv2rgb.c')
-rw-r--r--ext/hermes/yuv2rgb.c823
1 files changed, 823 insertions, 0 deletions
diff --git a/ext/hermes/yuv2rgb.c b/ext/hermes/yuv2rgb.c
new file mode 100644
index 00000000..0b5a9b4c
--- /dev/null
+++ b/ext/hermes/yuv2rgb.c
@@ -0,0 +1,823 @@
+/* Gnome-Streamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "yuv2rgb.h"
+
+//#undef HAVE_LIBMMX
+
+#ifdef HAVE_LIBMMX
+#include "mmx.h"
+#endif
+
+#define CB_BASE 1
+#define CR_BASE (CB_BASE*CB_RANGE)
+#define LUM_BASE (CR_BASE*CR_RANGE)
+
+#define Min(x,y) (((x) < (y)) ? (x) : (y))
+#define Max(x,y) (((x) > (y)) ? (x) : (y))
+
+#define GAMMA_CORRECTION(x) ((int)(pow((x) / 255.0, 1.0 / gammaCorrect) * 255.0))
+#define CHROMA_CORRECTION256(x) ((x) >= 128 \
+ ? 128 + Min(127, (int)(((x) - 128.0) * chromaCorrect)) \
+ : 128 - Min(128, (int)((128.0 - (x)) * chromaCorrect)))
+#define CHROMA_CORRECTION128(x) ((x) >= 0 \
+ ? Min(127, (int)(((x) * chromaCorrect))) \
+ : Max(-128, (int)(((x) * chromaCorrect))))
+#define CHROMA_CORRECTION256D(x) ((x) >= 128 \
+ ? 128.0 + Min(127.0, (((x) - 128.0) * chromaCorrect)) \
+ : 128.0 - Min(128.0, (((128.0 - (x)) * chromaCorrect))))
+#define CHROMA_CORRECTION128D(x) ((x) >= 0 \
+ ? Min(127.0, ((x) * chromaCorrect)) \
+ : Max(-128.0, ((x) * chromaCorrect)))
+
+
+static void gst_colorspace_yuv420P_to_rgb16 (GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest);
+static void gst_colorspace_yuv420P_to_rgb24 (GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest);
+static void gst_colorspace_yuv420P_to_rgb32 (GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest);
+
+#ifdef HAVE_LIBMMX
+static void gst_colorspace_yuv420P_to_bgr16_mmx (GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest);
+static void gst_colorspace_yuv420P_to_bgr32_mmx (GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest);
+#endif
+
+static void gst_colorspace_yuv_to_rgb16(GstColorSpaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+static void gst_colorspace_yuv_to_rgb24(GstColorSpaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+static void gst_colorspace_yuv_to_rgb32(GstColorSpaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+#ifdef HAVE_LIBMMX
+static void gst_colorspace_yuv_to_bgr32_mmx(GstColorSpaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+extern void gst_colorspace_yuv_to_bgr16_mmx(GstColorSpaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+#endif
+
+static GstColorSpaceYUVTables * gst_colorspace_init_yuv(long depth,
+ long red_mask, long green_mask, long blue_mask);
+
+GstColorSpaceConverter*
+gst_colorspace_yuv2rgb_get_converter (GstCaps *from, GstCaps *to)
+{
+ gulong from_space, to_space;
+ GstColorSpaceConverter *new;
+ gint to_bpp;
+
+ GST_DEBUG (0,"gst_colorspace_yuv2rgb_get_converter\n");
+
+ new = g_malloc (sizeof (GstColorSpaceConverter));
+
+ new->width = gst_caps_get_int (from, "width");
+ new->height = gst_caps_get_int (from, "height");
+ new->color_tables = NULL;
+
+ from_space = gst_caps_get_fourcc_int (from, "format");
+ to_space = gst_caps_get_fourcc_int (to, "format");
+ to_bpp = gst_caps_get_int (to, "bpp");
+
+ // FIXME we leak new here.
+ g_return_val_if_fail (to_space == GST_STR_FOURCC ("RGB "), NULL);
+
+ switch(from_space) {
+ case GST_MAKE_FOURCC ('I','4','2','0'):
+ {
+ gulong red_mask;
+ gulong green_mask;
+ gulong blue_mask;
+
+ red_mask = gst_caps_get_int (to, "red_mask");
+ green_mask = gst_caps_get_int (to, "green_mask");
+ blue_mask = gst_caps_get_int (to, "blue_mask");
+
+ GST_INFO (GST_CAT_PLUGIN_INFO, "red_mask %08lx", red_mask);
+ GST_INFO (GST_CAT_PLUGIN_INFO, "green_mask %08lx", green_mask);
+ GST_INFO (GST_CAT_PLUGIN_INFO, "blue_mask %08lx", blue_mask);
+
+ new->insize = new->width * new->height + new->width * new->height/2;
+ new->color_tables = gst_colorspace_init_yuv (to_bpp, red_mask, green_mask, blue_mask);
+ new->outsize = new->width * new->height * (to_bpp/8);
+
+ switch(to_bpp) {
+ case 32:
+#ifdef HAVE_LIBMMX
+ if (red_mask == 0xff0000 && green_mask == 0x00ff00 && red_mask == 0x0000ff &&
+ (gst_cpu_get_flags () & GST_CPU_FLAG_MMX) ) {
+ new->convert = gst_colorspace_yuv420P_to_bgr32_mmx;
+ }
+ else
+#endif
+ new->convert = gst_colorspace_yuv420P_to_rgb32;
+ break;
+ case 24:
+ new->convert = gst_colorspace_yuv420P_to_rgb24;
+ break;
+ case 15:
+ case 16:
+#ifdef HAVE_LIBMMX
+ if (red_mask == 0xf800 && green_mask == 0x07e0 && blue_mask == 0x001f &&
+ (gst_cpu_get_flags () & GST_CPU_FLAG_MMX) ) {
+ new->convert = gst_colorspace_yuv420P_to_bgr16_mmx;
+ }
+ else
+#endif
+ new->convert = gst_colorspace_yuv420P_to_rgb16;
+ break;
+ default:
+ g_print("gst_colorspace_yuv2rgb not implemented\n");
+ g_free (new);
+ new = NULL;
+ }
+ break;
+ }
+ default:
+ g_print("gst_colorspace_yuv2rgb not implemented\n");
+ g_free (new);
+ new = NULL;
+ }
+ return new;
+}
+
+static void gst_colorspace_yuv420P_to_rgb32(GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest)
+{
+ int size;
+ GST_DEBUG (0,"gst_colorspace_yuv420P_to_rgb32\n");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_rgb32(space->color_tables,
+ src, // Y component
+ src+size, // cr component
+ src+size+(size>>2), // cb component
+ dest,
+ space->height,
+ space->width);
+
+}
+
+static void gst_colorspace_yuv420P_to_rgb24(GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG (0,"gst_colorspace_yuv420P_to_rgb24\n");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_rgb24(space->color_tables,
+ src, // Y component
+ src+size, // cr component
+ src+size+(size>>2), // cb component
+ dest,
+ space->height,
+ space->width);
+
+}
+
+static void gst_colorspace_yuv420P_to_rgb16(GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG (0,"gst_colorspace_yuv420P_to_rgb16\n");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_rgb16(space->color_tables,
+ src, // Y component
+ src+size, // cr component
+ src+size+(size>>2), // cb component
+ dest,
+ space->height,
+ space->width);
+
+}
+
+#ifdef HAVE_LIBMMX
+static void gst_colorspace_yuv420P_to_bgr32_mmx(GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG (0,"gst_colorspace_yuv420P_to_rgb32_mmx\n");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_bgr32_mmx(NULL,
+ src, // Y component
+ src+size, // cr component
+ src+size+(size>>2), // cb component
+ dest,
+ space->height,
+ space->width);
+
+}
+static void gst_colorspace_yuv420P_to_bgr16_mmx(GstColorSpaceConverter *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG (0,"gst_colorspace_yuv420P_to_bgr16_mmx \n");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_bgr16_mmx(NULL,
+ src, // Y component
+ src+size, // cr component
+ src+size+(size>>2), // cb component
+ dest,
+ space->height,
+ space->width);
+ GST_DEBUG (0,"gst_colorspace_yuv420P_to_bgr16_mmx done\n");
+
+}
+#endif
+
+/*
+ * How many 1 bits are there in the longword.
+ * Low performance, do not call often.
+ */
+
+static int
+number_of_bits_set(a)
+unsigned long a;
+{
+ if(!a) return 0;
+ if(a & 1) return 1 + number_of_bits_set(a >> 1);
+ return(number_of_bits_set(a >> 1));
+}
+
+/*
+ * Shift the 0s in the least significant end out of the longword.
+ * Low performance, do not call often.
+ */
+static unsigned long
+shifted_down(a)
+unsigned long a;
+{
+ if(!a) return 0;
+ if(a & 1) return a;
+ return a >> 1;
+}
+
+/*
+ * How many 0 bits are there at most significant end of longword.
+ * Low performance, do not call often.
+ */
+static int
+free_bits_at_top(a)
+unsigned long a;
+{
+ /* assume char is 8 bits */
+ if(!a) return sizeof(unsigned long) * 8;
+ /* assume twos complement */
+ if(((long)a) < 0l) return 0;
+ return 1 + free_bits_at_top ( a << 1);
+}
+
+/*
+ * How many 0 bits are there at least significant end of longword.
+ * Low performance, do not call often.
+ */
+static int
+free_bits_at_bottom(a)
+unsigned long a;
+{
+ /* assume char is 8 bits */
+ if(!a) return sizeof(unsigned long) * 8;
+ if(((long)a) & 1l) return 0;
+ return 1 + free_bits_at_bottom ( a >> 1);
+}
+
+/*
+ *--------------------------------------------------------------
+ *
+ * InitColor16Dither --
+ *
+ * To get rid of the multiply and other conversions in color
+ * dither, we use a lookup table.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The lookup tables are initialized.
+ *
+ *--------------------------------------------------------------
+ */
+
+static GstColorSpaceYUVTables *
+gst_colorspace_init_yuv(long depth, long red_mask, long green_mask, long blue_mask)
+{
+ int CR, CB, i;
+ int *L_tab, *Cr_r_tab, *Cr_g_tab, *Cb_g_tab, *Cb_b_tab;
+ long *r_2_pix_alloc;
+ long *g_2_pix_alloc;
+ long *b_2_pix_alloc;
+ GstColorSpaceYUVTables *tables = g_malloc(sizeof(GstColorSpaceYUVTables));
+
+ L_tab = tables->L_tab = (int *)malloc(256*sizeof(int));
+ Cr_r_tab = tables->Cr_r_tab = (int *)malloc(256*sizeof(int));
+ Cr_g_tab = tables->Cr_g_tab = (int *)malloc(256*sizeof(int));
+ Cb_g_tab = tables->Cb_g_tab = (int *)malloc(256*sizeof(int));
+ Cb_b_tab = tables->Cb_b_tab = (int *)malloc(256*sizeof(int));
+
+ r_2_pix_alloc = (long *)malloc(768*sizeof(long));
+ g_2_pix_alloc = (long *)malloc(768*sizeof(long));
+ b_2_pix_alloc = (long *)malloc(768*sizeof(long));
+
+ if (L_tab == NULL ||
+ Cr_r_tab == NULL ||
+ Cr_g_tab == NULL ||
+ Cb_g_tab == NULL ||
+ Cb_b_tab == NULL ||
+ r_2_pix_alloc == NULL ||
+ g_2_pix_alloc == NULL ||
+ b_2_pix_alloc == NULL) {
+ fprintf(stderr, "Could not get enough memory in InitColorDither\n");
+ exit(1);
+ }
+
+ for (i=0; i<256; i++) {
+ L_tab[i] = i;
+ /*
+ if (gammaCorrectFlag) {
+ L_tab[i] = GAMMA_CORRECTION(i);
+ }
+ */
+
+ CB = CR = i;
+ /*
+ if (chromaCorrectFlag) {
+ CB -= 128;
+ CB = CHROMA_CORRECTION128(CB);
+ CR -= 128;
+ CR = CHROMA_CORRECTION128(CR);
+ }
+ else
+ */
+ {
+ CB -= 128; CR -= 128;
+ }
+ Cr_r_tab[i] = (0.419/0.299) * CR;
+ Cr_g_tab[i] = -(0.299/0.419) * CR;
+ Cb_g_tab[i] = -(0.114/0.331) * CB;
+ Cb_b_tab[i] = (0.587/0.331) * CB;
+
+ }
+
+ /*
+ * Set up entries 0-255 in rgb-to-pixel value tables.
+ */
+ for (i = 0; i < 256; i++) {
+ r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(red_mask));
+ r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(red_mask);
+ g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(green_mask));
+ g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(green_mask);
+ b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(blue_mask));
+ b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(blue_mask);
+ /*
+ * If we have 16-bit output depth, then we double the value
+ * in the top word. This means that we can write out both
+ * pixels in the pixel doubling mode with one op. It is
+ * harmless in the normal case as storing a 32-bit value
+ * through a short pointer will lose the top bits anyway.
+ * A similar optimisation for Alpha for 64 bit has been
+ * prepared for, but is not yet implemented.
+ */
+ if(!(depth == 32) && !(depth == 24)) {
+
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
+
+ }
+#ifdef SIXTYFOUR_BIT
+ if(depth == 32) {
+
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 32;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 32;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 32;
+
+ }
+#endif
+ }
+
+ /*
+ * Spread out the values we have to the rest of the array so that
+ * we do not need to check for overflow.
+ */
+ for (i = 0; i < 256; i++) {
+ r_2_pix_alloc[i] = r_2_pix_alloc[256];
+ r_2_pix_alloc[i+ 512] = r_2_pix_alloc[511];
+ g_2_pix_alloc[i] = g_2_pix_alloc[256];
+ g_2_pix_alloc[i+ 512] = g_2_pix_alloc[511];
+ b_2_pix_alloc[i] = b_2_pix_alloc[256];
+ b_2_pix_alloc[i+ 512] = b_2_pix_alloc[511];
+ }
+
+ tables->r_2_pix = r_2_pix_alloc + 256;
+ tables->g_2_pix = g_2_pix_alloc + 256;
+ tables->b_2_pix = b_2_pix_alloc + 256;
+
+ return tables;
+
+}
+
+/*
+ *--------------------------------------------------------------
+ *
+ * Color16DitherImage --
+ *
+ * Converts image into 16 bit color.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+
+static void
+gst_colorspace_yuv_to_rgb16(tables, lum, cb, cr, out, rows, cols)
+ GstColorSpaceYUVTables *tables;
+ unsigned char *lum;
+ unsigned char *cr;
+ unsigned char *cb;
+ unsigned char *out;
+ int cols, rows;
+
+{
+ int L, CR, CB;
+ unsigned short *row1, *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols>>1;
+
+ row1 = (unsigned short *)out;
+ row2 = row1 + cols;
+ lum2 = lum + cols;
+
+ for (y=rows>>1; y; y--) {
+ for (x=cols_2; x; x--) {
+
+ CR = *cr++;
+ CB = *cb++;
+ cr_r = tables->Cr_r_tab[CR];
+ crb_g = tables->Cr_g_tab[CR] + tables->Cb_g_tab[CB];
+ cb_b = tables->Cb_b_tab[CB];
+
+ L = tables->L_tab[(int) *lum++];
+
+ *row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+
+ L = tables->L_tab[(int) *lum++];
+
+ *row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+
+ /*
+ * Now, do second row.
+ */
+ L = tables->L_tab[(int) *lum2++];
+
+ *row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+
+ L = tables->L_tab[(int) *lum2++];
+
+ *row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+ }
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum = lum2;
+ row1 = row2;
+ lum2 += cols;
+ row2 += cols;
+ }
+}
+
+static void
+gst_colorspace_yuv_to_rgb24(tables, lum, cb, cr, out, rows, cols)
+ GstColorSpaceYUVTables *tables;
+ unsigned char *lum;
+ unsigned char *cr;
+ unsigned char *cb;
+ unsigned char *out;
+ int cols, rows;
+
+{
+ int L, CR, CB;
+ unsigned char *row1, *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols>>1;
+ int cols_3 = cols*3;
+ unsigned char pixels[4];
+
+ row1 = out;
+ row2 = row1 + cols_3;
+ lum2 = lum + cols;
+ for (y=rows>>1; y; y--) {
+ for (x=cols_2; x; x--) {
+
+ CR = *cr++;
+ CB = *cb++;
+ cr_r = tables->Cr_r_tab[CR];
+ crb_g = tables->Cr_g_tab[CR] + tables->Cb_g_tab[CB];
+ cb_b = tables->Cb_b_tab[CB];
+
+ L = tables->L_tab[(int) *lum++];
+
+ ((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+ *row1++ = pixels[0]; *row1++ = pixels[1]; *row1++ = pixels[2];
+
+ L = tables->L_tab[(int) *lum++];
+
+ ((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+ *row1++ = pixels[0]; *row1++ = pixels[1]; *row1++ = pixels[2];
+
+ /*
+ * Now, do second row.
+ */
+
+ L = tables->L_tab [(int) *lum2++];
+
+ ((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+ *row2++ = pixels[0]; *row2++ = pixels[1]; *row2++ = pixels[2];
+
+ L = tables->L_tab [(int) *lum2++];
+
+ ((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+ *row2++ = pixels[0]; *row2++ = pixels[1]; *row2++ = pixels[2];
+ }
+ lum = lum2;
+ row1 = row2;
+ lum2 += cols;
+ row2 += cols_3;
+ }
+}
+
+/*
+ *--------------------------------------------------------------
+ *
+ * Color32DitherImage --
+ *
+ * Converts image into 32 bit color (or 24-bit non-packed).
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+
+/*
+ * This is a copysoft version of the function above with ints instead
+ * of shorts to cause a 4-byte pixel size
+ */
+
+static void
+gst_colorspace_yuv_to_rgb32(tables, lum, cb, cr, out, rows, cols)
+ GstColorSpaceYUVTables *tables;
+ unsigned char *lum;
+ unsigned char *cr;
+ unsigned char *cb;
+ unsigned char *out;
+ int cols, rows;
+
+{
+ int L, CR, CB;
+ unsigned int *row1, *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols>>1;
+
+ row1 = (guint32 *)out;
+ row2 = row1 + cols;
+ lum2 = lum + cols;
+ for (y=rows>>1; y; y--) {
+ for (x=cols_2; x; x--) {
+
+ CR = *cr++;
+ CB = *cb++;
+ cr_r = tables->Cr_r_tab[CR];
+ crb_g = tables->Cr_g_tab[CR] + tables->Cb_g_tab[CB];
+ cb_b = tables->Cb_b_tab[CB];
+
+ L = tables->L_tab[(int) *lum++];
+
+ *row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+
+ L = tables->L_tab[(int) *lum++];
+
+ *row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+
+ /*
+ * Now, do second row.
+ */
+
+ L = tables->L_tab [(int) *lum2++];
+
+ *row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+
+ L = tables->L_tab [(int) *lum2++];
+
+ *row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
+ }
+ lum = lum2;
+ row1 = row2;
+ lum2 += cols;
+ row2 += cols;
+ }
+}
+
+#ifdef HAVE_LIBMMX
+static mmx_t MMX_80w = (mmx_t)(long long)0x0080008000800080LL; //dd 00080 0080h, 000800080h
+
+static mmx_t MMX_00FFw = (mmx_t)(long long)0x00ff00ff00ff00ffLL; //dd 000FF 00FFh, 000FF00FFh
+static mmx_t MMX_FF00w = (mmx_t)(long long)0xff00ff00ff00ff00LL; //dd 000FF 00FFh, 000FF00FFh
+
+static mmx_t MMX32_Vredcoeff = (mmx_t)(long long)0x0059005900590059LL;
+static mmx_t MMX32_Ubluecoeff = (mmx_t)(long long)0x0072007200720072LL;
+static mmx_t MMX32_Ugrncoeff = (mmx_t)(long long)0xffeaffeaffeaffeaLL;
+static mmx_t MMX32_Vgrncoeff = (mmx_t)(long long)0xffd2ffd2ffd2ffd2LL;
+
+static void
+gst_colorspace_yuv_to_bgr32_mmx(tables, lum, cr, cb, out, rows, cols)
+ GstColorSpaceYUVTables *tables;
+ unsigned char *lum;
+ unsigned char *cr;
+ unsigned char *cb;
+ unsigned char *out;
+ int cols, rows;
+
+{
+ guint32 *row1 = (guint32 *)out; // 32 bit target
+ int cols4 = cols>>2;
+
+ int y, x;
+
+ for (y=rows>>1; y; y--) {
+ for (x=cols4; x; x--) {
+
+ // create Cr (result in mm1)
+ movd_m2r(*(mmx_t *)cb, mm1); // 0 0 0 0 v3 v2 v1 v0
+ pxor_r2r(mm7, mm7); // 00 00 00 00 00 00 00 00
+ movd_m2r(*(mmx_t *)lum, mm2); // 0 0 0 0 l3 l2 l1 l0
+ punpcklbw_r2r(mm7, mm1); // 0 v3 0 v2 00 v1 00 v0
+ punpckldq_r2r(mm1, mm1); // 00 v1 00 v0 00 v1 00 v0
+ psubw_m2r(MMX_80w, mm1); // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
+
+ // create Cr_g (result in mm0)
+ movq_r2r(mm1, mm0); // r1 r1 r0 r0 r1 r1 r0 r0
+ pmullw_m2r(MMX32_Vgrncoeff, mm0); // red*-46dec=0.7136*64
+ pmullw_m2r(MMX32_Vredcoeff, mm1); // red*89dec=1.4013*64
+ psraw_i2r(6, mm0); // red=red/64
+ psraw_i2r(6, mm1); // red=red/64
+
+ // create L1 L2 (result in mm2,mm4)
+ // L2=lum+cols
+ movq_m2r(*(mmx_t *)(lum+cols),mm3); // 0 0 0 0 L3 L2 L1 L0
+ punpckldq_r2r(mm3, mm2); // L3 L2 L1 L0 l3 l2 l1 l0
+ movq_r2r(mm2, mm4); // L3 L2 L1 L0 l3 l2 l1 l0
+ pand_m2r(MMX_FF00w, mm2); // L3 0 L1 0 l3 0 l1 0
+ pand_m2r(MMX_00FFw, mm4); // 0 L2 0 L0 0 l2 0 l0
+ psrlw_i2r(8, mm2); // 0 L3 0 L1 0 l3 0 l1
+
+ // create R (result in mm6)
+ movq_r2r(mm2, mm5); // 0 L3 0 L1 0 l3 0 l1
+ movq_r2r(mm4, mm6); // 0 L2 0 L0 0 l2 0 l0
+ paddsw_r2r(mm1, mm5); // lum1+red:x R3 x R1 x r3 x r1
+ paddsw_r2r(mm1, mm6); // lum1+red:x R2 x R0 x r2 x r0
+ packuswb_r2r(mm5, mm5); // R3 R1 r3 r1 R3 R1 r3 r1
+ packuswb_r2r(mm6, mm6); // R2 R0 r2 r0 R2 R0 r2 r0
+ pxor_r2r(mm7, mm7); // 00 00 00 00 00 00 00 00
+ punpcklbw_r2r(mm5, mm6); // R3 R2 R1 R0 r3 r2 r1 r0
+
+ // create Cb (result in mm1)
+ movd_m2r(*(mmx_t *)cr, mm1); // 0 0 0 0 u3 u2 u1 u0
+ punpcklbw_r2r(mm7, mm1); // 0 u3 0 u2 00 u1 00 u0
+ punpckldq_r2r(mm1, mm1); // 00 u1 00 u0 00 u1 00 u0
+ psubw_m2r(MMX_80w, mm1); // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
+ // create Cb_g (result in mm5)
+ movq_r2r(mm1, mm5); // u1 u1 u0 u0 u1 u1 u0 u0
+ pmullw_m2r(MMX32_Ugrncoeff, mm5); // blue*-109dec=1.7129*64
+ pmullw_m2r(MMX32_Ubluecoeff, mm1); // blue*114dec=1.78125*64
+ psraw_i2r(6, mm5); // blue=red/64
+ psraw_i2r(6, mm1); // blue=blue/64
+
+ // create G (result in mm7)
+ movq_r2r(mm2, mm3); // 0 L3 0 L1 0 l3 0 l1
+ movq_r2r(mm4, mm7); // 0 L2 0 L0 0 l2 0 l1
+ paddsw_r2r(mm5, mm3); // lum1+Cb_g:x G3t x G1t x g3t x g1t
+ paddsw_r2r(mm5, mm7); // lum1+Cb_g:x G2t x G0t x g2t x g0t
+ paddsw_r2r(mm0, mm3); // lum1+Cr_g:x G3 x G1 x g3 x g1
+ paddsw_r2r(mm0, mm7); // lum1+blue:x G2 x G0 x g2 x g0
+ packuswb_r2r(mm3, mm3); // G3 G1 g3 g1 G3 G1 g3 g1
+ packuswb_r2r(mm7, mm7); // G2 G0 g2 g0 G2 G0 g2 g0
+ punpcklbw_r2r(mm3, mm7); // G3 G2 G1 G0 g3 g2 g1 g0
+
+ // create B (result in mm5)
+ movq_r2r(mm2, mm3); // 0 L3 0 L1 0 l3 0 l1
+ movq_r2r(mm4, mm5); // 0 L2 0 L0 0 l2 0 l1
+ paddsw_r2r(mm1, mm3); // lum1+blue:x B3 x B1 x b3 x b1
+ paddsw_r2r(mm1, mm5); // lum1+blue:x B2 x B0 x b2 x b0
+ packuswb_r2r(mm3, mm3); // B3 B1 b3 b1 B3 B1 b3 b1
+ packuswb_r2r(mm5, mm5); // B2 B0 b2 b0 B2 B0 b2 b0
+ punpcklbw_r2r(mm3, mm5); // B3 B2 B1 B0 b3 b2 b1 b0
+
+ // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+
+ pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
+ pxor_r2r(mm4, mm4); // 0 0 0 0 0 0 0 0
+ movq_r2r(mm6, mm1); // R3 R2 R1 R0 r3 r2 r1 r0
+ movq_r2r(mm5, mm3); // B3 B2 B1 B0 b3 b2 b1 b0
+ // process lower lum
+ punpcklbw_r2r(mm4, mm1); // 0 r3 0 r2 0 r1 0 r0
+ punpcklbw_r2r(mm4, mm3); // 0 b3 0 b2 0 b1 0 b0
+ movq_r2r(mm1, mm2); // 0 r3 0 r2 0 r1 0 r0
+ movq_r2r(mm3, mm0); // 0 b3 0 b2 0 b1 0 b0
+ punpcklwd_r2r(mm1, mm3); // 0 r1 0 b1 0 r0 0 b0
+ punpckhwd_r2r(mm2, mm0); // 0 r3 0 b3 0 r2 0 b2
+
+ pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
+ movq_r2r(mm7, mm1); // G3 G2 G1 G0 g3 g2 g1 g0
+ punpcklbw_r2r(mm1, mm2); // g3 0 g2 0 g1 0 g0 0
+ punpcklwd_r2r(mm4, mm2); // 0 0 g1 0 0 0 g0 0
+ por_r2r(mm3, mm2); // 0 r1 g1 b1 0 r0 g0 b0
+ movq_r2m(mm2, *(mmx_t *)row1); // wrote out ! row1
+
+ pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
+ punpcklbw_r2r(mm1, mm4); // g3 0 g2 0 g1 0 g0 0
+ punpckhwd_r2r(mm2, mm4); // 0 0 g3 0 0 0 g2 0
+ por_r2r(mm0, mm4); // 0 r3 g3 b3 0 r2 g2 b2
+ movq_r2m(mm4, *(mmx_t *)(row1+2)); // wrote out ! row1
+
+ // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+ // this can be done "destructive"
+ pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
+ punpckhbw_r2r(mm2, mm6); // 0 R3 0 R2 0 R1 0 R0
+ punpckhbw_r2r(mm1, mm5); // G3 B3 G2 B2 G1 B1 G0 B0
+ movq_r2r(mm5, mm1); // G3 B3 G2 B2 G1 B1 G0 B0
+ punpcklwd_r2r(mm6, mm1); // 0 R1 G1 B1 0 R0 G0 B0
+ movq_r2m(mm1, *(mmx_t *)(row1+cols)); // wrote out ! row2
+ punpckhwd_r2r(mm6, mm5); // 0 R3 G3 B3 0 R2 G2 B2
+ movq_r2m(mm5, *(mmx_t *)(row1+cols+2)); // wrote out ! row2
+
+ lum+=4;
+ cr+=2;
+ cb+=2;
+ row1 +=4;
+ }
+ lum += cols;
+ row1 += cols;
+ }
+
+ emms();
+
+}
+#endif
+