summaryrefslogtreecommitdiffstats
path: root/gst/colorspace/yuv2rgb.c
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2004-01-15 08:58:22 +0000
committerDavid Schleef <ds@schleef.org>2004-01-15 08:58:22 +0000
commit4910efb9b7baced502f3f78cf54ba580603d4415 (patch)
tree7c0ca8f3bca493926990f9f12b97970b3294051d /gst/colorspace/yuv2rgb.c
parent4506b4c38b4a74556e1134b2989334a118dcb3ca (diff)
downloadgst-plugins-bad-4910efb9b7baced502f3f78cf54ba580603d4415.tar.gz
gst-plugins-bad-4910efb9b7baced502f3f78cf54ba580603d4415.tar.bz2
gst-plugins-bad-4910efb9b7baced502f3f78cf54ba580603d4415.zip
Duplicate the ext/hermes colorspace plugin, and remove Hermes code and GPL code. Fix for new caps negotiation. Rewr...
Original commit message from CVS: * configure.ac: * gst/colorspace/Makefile.am: * gst/colorspace/gstcolorspace.c: * gst/colorspace/gstcolorspace.h: * gst/colorspace/yuv2rgb.c: * gst/colorspace/yuv2rgb.h: Duplicate the ext/hermes colorspace plugin, and remove Hermes code and GPL code. Fix for new caps negotiation. Rewrite much of the format handling code, and some of the conversion code. Basically, rewrote almost everything. This element handles I420, YV12 to RGB conversions. * ext/hermes/Makefile.am: * ext/hermes/gsthermescolorspace.c: Rename colorspace to hermescolorspace. Fix negotiation issues. Remove non-Hermes related code. This element handles lots of RGB to RGB conversions, but no YUV. * ext/hermes/gstcolorspace.c: * ext/hermes/gstcolorspace.h: * ext/hermes/rgb2yuv.c: * ext/hermes/yuv2rgb.c: * ext/hermes/yuv2rgb.h: * ext/hermes/yuv2rgb_mmx16.s: * ext/hermes/yuv2yuv.c: * ext/hermes/yuv2yuv.h: Remove old code.
Diffstat (limited to 'gst/colorspace/yuv2rgb.c')
-rw-r--r--gst/colorspace/yuv2rgb.c720
1 files changed, 720 insertions, 0 deletions
diff --git a/gst/colorspace/yuv2rgb.c b/gst/colorspace/yuv2rgb.c
new file mode 100644
index 00000000..3627e896
--- /dev/null
+++ b/gst/colorspace/yuv2rgb.c
@@ -0,0 +1,720 @@
+/* GStreamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "yuv2rgb.h"
+
+#undef HAVE_LIBMMX
+
+#ifdef HAVE_LIBMMX
+#include <mmx.h>
+#endif
+
+static int V_r_tab [256];
+static int V_g_tab [256];
+static int U_g_tab [256];
+static int U_b_tab [256];
+
+#define CB_BASE 1
+#define CR_BASE (CB_BASE*CB_RANGE)
+#define LUM_BASE (CR_BASE*CR_RANGE)
+
+#define Min(x,y) (((x) < (y)) ? (x) : (y))
+#define Max(x,y) (((x) > (y)) ? (x) : (y))
+
+#define GAMMA_CORRECTION(x) ((int)(pow((x) / 255.0, 1.0 / gammaCorrect) * 255.0))
+#define CHROMA_CORRECTION256(x) ((x) >= 128 \
+ ? 128 + Min(127, (int)(((x) - 128.0) * chromaCorrect)) \
+ : 128 - Min(128, (int)((128.0 - (x)) * chromaCorrect)))
+#define CHROMA_CORRECTION128(x) ((x) >= 0 \
+ ? Min(127, (int)(((x) * chromaCorrect))) \
+ : Max(-128, (int)(((x) * chromaCorrect))))
+#define CHROMA_CORRECTION256D(x) ((x) >= 128 \
+ ? 128.0 + Min(127.0, (((x) - 128.0) * chromaCorrect)) \
+ : 128.0 - Min(128.0, (((128.0 - (x)) * chromaCorrect))))
+#define CHROMA_CORRECTION128D(x) ((x) >= 0 \
+ ? Min(127.0, ((x) * chromaCorrect)) \
+ : Max(-128.0, ((x) * chromaCorrect)))
+
+
+void gst_colorspace_I420_to_rgb16 (GstColorspace *space, unsigned char *src, unsigned char *dest);
+void gst_colorspace_I420_to_rgb24 (GstColorspace *space, unsigned char *src, unsigned char *dest);
+void gst_colorspace_I420_to_rgb32 (GstColorspace *space, unsigned char *src, unsigned char *dest);
+#ifdef HAVE_LIBMMX
+void gst_colorspace_I420_to_bgr16_mmx (GstColorspace *space, unsigned char *src, unsigned char *dest);
+void gst_colorspace_I420_to_bgr32_mmx (GstColorspace *space, unsigned char *src, unsigned char *dest);
+#endif
+
+void gst_colorspace_YV12_to_rgb16 (GstColorspace *space, unsigned char *src, unsigned char *dest);
+void gst_colorspace_YV12_to_rgb24 (GstColorspace *space, unsigned char *src, unsigned char *dest);
+void gst_colorspace_YV12_to_rgb32 (GstColorspace *space, unsigned char *src, unsigned char *dest);
+#ifdef HAVE_LIBMMX
+void gst_colorspace_YV12_to_bgr16_mmx (GstColorspace *space, unsigned char *src, unsigned char *dest);
+void gst_colorspace_YV12_to_bgr32_mmx (GstColorspace *space, unsigned char *src, unsigned char *dest);
+#endif
+
+static void
+gst_colorspace_yuv_to_rgb16(GstColorspace *space,
+ unsigned char *out,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ int cols, int rows);
+static void
+gst_colorspace_yuv_to_rgb24(GstColorspace *space,
+ unsigned char *out,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ int cols, int rows);
+static void
+gst_colorspace_yuv_to_rgb32(GstColorspace *space,
+ unsigned char *out,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ int cols, int rows);
+#if 0
+static void gst_colorspace_yuv_to_rgb16(GstColorspaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+static void gst_colorspace_yuv_to_rgb24(GstColorspaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+static void gst_colorspace_yuv_to_rgb32(GstColorspaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+#ifdef HAVE_LIBMMX
+void gst_colorspace_yuv_to_bgr32_mmx(GstColorspaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+extern void gst_colorspace_yuv_to_bgr16_mmx(GstColorspaceYUVTables *tables,
+ unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows);
+#endif
+#endif
+
+#define ROUND_UP_2(x) (((x)+1)&~1)
+#define ROUND_UP_4(x) (((x)+3)&~3)
+#define ROUND_UP_8(x) (((x)+7)&~7)
+
+
+void gst_colorspace_I420_to_rgb32(GstColorspace *space, unsigned char *dest,
+ unsigned char *src)
+{
+ unsigned char *src_U;
+ unsigned char *src_V;
+
+ src_U = src + ROUND_UP_4 (space->width) * ROUND_UP_2 (space->height);
+ src_V = src_U + ROUND_UP_8 (space->width)/2 * ROUND_UP_2 (space->height)/2;
+
+ gst_colorspace_yuv_to_rgb32(space,
+ dest,
+ src, src_U, src_V,
+ space->width, space->height);
+}
+
+void gst_colorspace_I420_to_rgb24(GstColorspace *space, unsigned char *dest,
+ unsigned char *src)
+{
+ unsigned char *src_U;
+ unsigned char *src_V;
+
+ src_U = src + ROUND_UP_4 (space->width) * ROUND_UP_2 (space->height);
+ src_V = src_U + ROUND_UP_8 (space->width)/2 * ROUND_UP_2 (space->height)/2;
+
+ gst_colorspace_yuv_to_rgb24(space,
+ dest,
+ src, src_U, src_V,
+ space->width, space->height);
+}
+
+void gst_colorspace_I420_to_rgb16(GstColorspace *space, unsigned char *dest,
+ unsigned char *src)
+{
+ unsigned char *src_U;
+ unsigned char *src_V;
+
+ src_U = src + ROUND_UP_4 (space->width) * ROUND_UP_2 (space->height);
+ src_V = src_U + ROUND_UP_8 (space->width)/2 * ROUND_UP_2 (space->height)/2;
+
+ gst_colorspace_yuv_to_rgb16(space,
+ dest,
+ src, src_U, src_V,
+ space->width, space->height);
+}
+
+void gst_colorspace_YV12_to_rgb32(GstColorspace *space, unsigned char *dest,
+ unsigned char *src)
+{
+ unsigned char *src_U;
+ unsigned char *src_V;
+
+ src_V = src + ROUND_UP_4 (space->width) * ROUND_UP_2 (space->height);
+ src_U = src_V + ROUND_UP_8 (space->width)/2 * ROUND_UP_2 (space->height)/2;
+
+ gst_colorspace_yuv_to_rgb32(space,
+ dest,
+ src, src_U, src_V,
+ space->width, space->height);
+}
+
+void gst_colorspace_YV12_to_rgb24(GstColorspace *space, unsigned char *dest,
+ unsigned char *src)
+{
+ unsigned char *src_U;
+ unsigned char *src_V;
+
+ src_V = src + ROUND_UP_4 (space->width) * ROUND_UP_2 (space->height);
+ src_U = src_V + ROUND_UP_8 (space->width)/2 * ROUND_UP_2 (space->height)/2;
+
+ gst_colorspace_yuv_to_rgb24(space,
+ dest,
+ src, src_U, src_V,
+ space->width, space->height);
+}
+
+void gst_colorspace_YV12_to_rgb16(GstColorspace *space, unsigned char *dest,
+ unsigned char *src)
+{
+ unsigned char *src_U;
+ unsigned char *src_V;
+
+ src_V = src + ROUND_UP_4 (space->width) * ROUND_UP_2 (space->height);
+ src_U = src_V + ROUND_UP_8 (space->width)/2 * ROUND_UP_2 (space->height)/2;
+
+ gst_colorspace_yuv_to_rgb16(space,
+ dest,
+ src, src_U, src_V,
+ space->width, space->height);
+}
+
+#ifdef HAVE_LIBMMX
+void gst_colorspace_I420_to_bgr32_mmx(GstColorspace *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG ("gst_colorspace_I420_to_rgb32_mmx");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_bgr32_mmx(NULL,
+ src, /* Y component */
+ src+size, /* cr component */
+ src+size+(size>>2), /* cb component */
+ dest,
+ space->height,
+ space->width);
+
+}
+
+void gst_colorspace_I420_to_bgr16_mmx(GstColorspace *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG ("gst_colorspace_I420_to_bgr16_mmx ");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_bgr16_mmx(NULL,
+ src, /* Y component */
+ src+size, /* cr component */
+ src+size+(size>>2), /* cb component */
+ dest,
+ space->height,
+ space->width);
+ GST_DEBUG ("gst_colorspace_I420_to_bgr16_mmx done");
+
+}
+
+void gst_colorspace_YV12_to_bgr32_mmx(GstColorspace *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG ("gst_colorspace_YV12_to_rgb32_mmx");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_bgr32_mmx(NULL,
+ src, /* Y component */
+ src+size+(size>>2), /* cb component */
+ src+size, /* cr component */
+ dest,
+ space->height,
+ space->width);
+
+}
+
+void gst_colorspace_YV12_to_bgr16_mmx(GstColorspace *space, unsigned char *src, unsigned char *dest) {
+ int size;
+ GST_DEBUG ("gst_colorspace_YV12_to_bgr16_mmx ");
+
+ size = space->width * space->height;
+
+ gst_colorspace_yuv_to_bgr16_mmx(NULL,
+ src, /* Y component */
+ src+size+(size>>2), /* cb component */
+ src+size, /* cr component */
+ dest,
+ space->height,
+ space->width);
+ GST_DEBUG ("gst_colorspace_YV12_to_bgr16_mmx done");
+
+}
+#endif
+/*
+ * How many 1 bits are there in the longword.
+ * Low performance, do not call often.
+ */
+
+static int
+number_of_bits_set(a)
+unsigned long a;
+{
+ if(!a) return 0;
+ if(a & 1) return 1 + number_of_bits_set(a >> 1);
+ return(number_of_bits_set(a >> 1));
+}
+
+/*
+ * How many 0 bits are there at most significant end of longword.
+ * Low performance, do not call often.
+ */
+static int
+free_bits_at_top(a)
+unsigned long a;
+{
+ /* assume char is 8 bits */
+ if(!a) return sizeof(unsigned long) * 8;
+ /* assume twos complement */
+ if(((long)a) < 0l) return 0;
+ return 1 + free_bits_at_top ( a << 1);
+}
+
+/*
+ * How many 0 bits are there at least significant end of longword.
+ * Low performance, do not call often.
+ */
+static int
+free_bits_at_bottom(a)
+unsigned long a;
+{
+ /* assume char is 8 bits */
+ if(!a) return sizeof(unsigned long) * 8;
+ if(((long)a) & 1l) return 0;
+ return 1 + free_bits_at_bottom ( a >> 1);
+}
+
+/*
+ *--------------------------------------------------------------
+ *
+ * InitColor16Dither --
+ *
+ * To get rid of the multiply and other conversions in color
+ * dither, we use a lookup table.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The lookup tables are initialized.
+ *
+ *--------------------------------------------------------------
+ */
+
+void
+gst_colorspace_table_init (GstColorspace *space)
+{
+ int i;
+
+ for (i=0; i<256; i++) {
+ V_r_tab[i] = (0.419/0.299) * (i-128);
+ V_g_tab[i] = -(0.299/0.419) * (i-128);
+ U_g_tab[i] = -(0.114/0.331) * (i-128);
+ U_b_tab[i] = (0.587/0.331) * (i-128);
+ }
+#if 0
+ int CR, CB, i;
+ int *L_tab, *Cr_r_tab, *Cr_g_tab, *Cb_g_tab, *Cb_b_tab;
+ long *r_2_pix_alloc;
+ long *g_2_pix_alloc;
+ long *b_2_pix_alloc;
+ long depth = 32;
+ long red_mask = 0xff0000;
+ long green_mask = 0x00ff00;
+ long blue_mask = 0x0000ff;
+
+ L_tab = space->L_tab = (int *)malloc(256*sizeof(int));
+ Cr_r_tab = space->Cr_r_tab = (int *)malloc(256*sizeof(int));
+ Cr_g_tab = space->Cr_g_tab = (int *)malloc(256*sizeof(int));
+ Cb_g_tab = space->Cb_g_tab = (int *)malloc(256*sizeof(int));
+ Cb_b_tab = space->Cb_b_tab = (int *)malloc(256*sizeof(int));
+
+ r_2_pix_alloc = (long *)malloc(768*sizeof(long));
+ g_2_pix_alloc = (long *)malloc(768*sizeof(long));
+ b_2_pix_alloc = (long *)malloc(768*sizeof(long));
+
+ if (L_tab == NULL ||
+ Cr_r_tab == NULL ||
+ Cr_g_tab == NULL ||
+ Cb_g_tab == NULL ||
+ Cb_b_tab == NULL ||
+ r_2_pix_alloc == NULL ||
+ g_2_pix_alloc == NULL ||
+ b_2_pix_alloc == NULL) {
+ fprintf(stderr, "Could not get enough memory in InitColorDither\n");
+ exit(1);
+ }
+
+ for (i=0; i<256; i++) {
+ L_tab[i] = i;
+ /*
+ if (gammaCorrectFlag) {
+ L_tab[i] = GAMMA_CORRECTION(i);
+ }
+ */
+
+ CB = CR = i;
+ /*
+ if (chromaCorrectFlag) {
+ CB -= 128;
+ CB = CHROMA_CORRECTION128(CB);
+ CR -= 128;
+ CR = CHROMA_CORRECTION128(CR);
+ }
+ else
+ */
+ {
+ CB -= 128; CR -= 128;
+ }
+ Cr_r_tab[i] = (0.419/0.299) * CR;
+ Cr_g_tab[i] = -(0.299/0.419) * CR;
+ Cb_g_tab[i] = -(0.114/0.331) * CB;
+ Cb_b_tab[i] = (0.587/0.331) * CB;
+
+ }
+
+ /*
+ * Set up entries 0-255 in rgb-to-pixel value tables.
+ */
+ for (i = 0; i < 256; i++) {
+ r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(red_mask));
+ r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(red_mask);
+ g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(green_mask));
+ g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(green_mask);
+ b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(blue_mask));
+ b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(blue_mask);
+ /*
+ * If we have 16-bit output depth, then we double the value
+ * in the top word. This means that we can write out both
+ * pixels in the pixel doubling mode with one op. It is
+ * harmless in the normal case as storing a 32-bit value
+ * through a short pointer will lose the top bits anyway.
+ * A similar optimisation for Alpha for 64 bit has been
+ * prepared for, but is not yet implemented.
+ */
+ if(!(depth == 32) && !(depth == 24)) {
+
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
+
+ }
+#ifdef SIXTYFOUR_BIT
+ if(depth == 32) {
+
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 32;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 32;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 32;
+
+ }
+#endif
+ }
+
+ /*
+ * Spread out the values we have to the rest of the array so that
+ * we do not need to check for overflow.
+ */
+ for (i = 0; i < 256; i++) {
+ r_2_pix_alloc[i] = r_2_pix_alloc[256];
+ r_2_pix_alloc[i+ 512] = r_2_pix_alloc[511];
+ g_2_pix_alloc[i] = g_2_pix_alloc[256];
+ g_2_pix_alloc[i+ 512] = g_2_pix_alloc[511];
+ b_2_pix_alloc[i] = b_2_pix_alloc[256];
+ b_2_pix_alloc[i+ 512] = b_2_pix_alloc[511];
+ }
+
+ space->r_2_pix = r_2_pix_alloc + 256;
+ space->g_2_pix = g_2_pix_alloc + 256;
+ space->b_2_pix = b_2_pix_alloc + 256;
+#endif
+}
+
+static void
+gst_colorspace_yuv_to_rgb32(GstColorspace *space,
+ unsigned char *dest,
+ unsigned char *Y,
+ unsigned char *U,
+ unsigned char *V,
+ int width, int height)
+{
+ int x,y;
+ int src_rowstride;
+ int dest_rowstride;
+
+ src_rowstride = ROUND_UP_4 (space->width);
+ dest_rowstride = width * 4;
+ for(y=0;y<height;y++){
+ for(x=0;x<width;x++){
+ dest[x*4 + 0] = 0;
+ dest[x*3 + 1] = CLAMP (Y[x] + V_r_tab[V[x/2]], 0, 255);
+ dest[x*3 + 2] = CLAMP (Y[x] + U_g_tab[U[x/2]] + V_g_tab[V[x/2]], 0, 255);
+ dest[x*3 + 3] = CLAMP (Y[x] + U_b_tab[U[x/2]], 0, 255);
+ }
+ Y += src_rowstride;
+ dest += dest_rowstride;
+ if (y&1) {
+ U += src_rowstride/2;
+ V += src_rowstride/2;
+ }
+ }
+}
+
+static void
+gst_colorspace_yuv_to_rgb24(GstColorspace *space,
+ unsigned char *dest,
+ unsigned char *Y,
+ unsigned char *U,
+ unsigned char *V,
+ int width, int height)
+{
+ int x,y;
+ int src_rowstride;
+ int dest_rowstride;
+
+ src_rowstride = ROUND_UP_4 (space->width);
+ dest_rowstride = ROUND_UP_4 (width * 3);
+ for(y=0;y<height;y++){
+ for(x=0;x<width;x++){
+ dest[x*3 + 0] = CLAMP (Y[x] + V_r_tab[V[x/2]], 0, 255);
+ dest[x*3 + 1] = CLAMP (Y[x] + U_g_tab[U[x/2]] + V_g_tab[V[x/2]], 0, 255);
+ dest[x*3 + 2] = CLAMP (Y[x] + U_b_tab[U[x/2]], 0, 255);
+ }
+ Y += src_rowstride;
+ dest += dest_rowstride;
+ if (y&1) {
+ U += src_rowstride/2;
+ V += src_rowstride/2;
+ }
+ }
+}
+
+static void
+gst_colorspace_yuv_to_rgb16(GstColorspace *space,
+ unsigned char *dest,
+ unsigned char *Y,
+ unsigned char *U,
+ unsigned char *V,
+ int width, int height)
+{
+ int x,y;
+ int src_rowstride;
+ int dest_rowstride;
+ int r, g, b;
+
+ src_rowstride = ROUND_UP_4 (space->width);
+ dest_rowstride = ROUND_UP_4 (width * 2);
+ for(y=0;y<height;y++){
+ for(x=0;x<width;x++){
+ r = CLAMP (Y[x] + V_r_tab[V[x/2]], 0, 255);
+ g = CLAMP (Y[x] + U_g_tab[U[x/2]] + V_g_tab[V[x/2]], 0, 255);
+ b = CLAMP (Y[x] + U_b_tab[U[x/2]], 0, 255);
+ *(unsigned short *)(dest + x*2) = ((r&0xf8)<<8) | ((g&0xfc)<<3) | (b>>3);
+ }
+ Y += src_rowstride;
+ dest += dest_rowstride;
+ if (y&1) {
+ U += src_rowstride/2;
+ V += src_rowstride/2;
+ }
+ }
+}
+
+#ifdef HAVE_LIBMMX
+static mmx_t MMX_80w = (mmx_t)(long long)0x0080008000800080LL; /*dd 00080 0080h, 000800080h */
+
+static mmx_t MMX_00FFw = (mmx_t)(long long)0x00ff00ff00ff00ffLL; /*dd 000FF 00FFh, 000FF00FFh */
+static mmx_t MMX_FF00w = (mmx_t)(long long)0xff00ff00ff00ff00LL; /*dd 000FF 00FFh, 000FF00FFh */
+
+static mmx_t MMX32_Vredcoeff = (mmx_t)(long long)0x0059005900590059LL;
+static mmx_t MMX32_Ubluecoeff = (mmx_t)(long long)0x0072007200720072LL;
+static mmx_t MMX32_Ugrncoeff = (mmx_t)(long long)0xffeaffeaffeaffeaLL;
+static mmx_t MMX32_Vgrncoeff = (mmx_t)(long long)0xffd2ffd2ffd2ffd2LL;
+
+static void
+gst_colorspace_yuv_to_bgr32_mmx(tables, lum, cr, cb, out, rows, cols)
+ GstColorspaceYUVTables *tables;
+ unsigned char *lum;
+ unsigned char *cr;
+ unsigned char *cb;
+ unsigned char *out;
+ int cols, rows;
+
+{
+ guint32 *row1 = (guint32 *)out; /* 32 bit target */
+ int cols4 = cols>>2;
+
+ int y, x;
+
+ for (y=rows>>1; y; y--) {
+ for (x=cols4; x; x--) {
+
+ /* create Cr (result in mm1) */
+ movd_m2r(*(mmx_t *)cb, mm1); /* 0 0 0 0 v3 v2 v1 v0 */
+ pxor_r2r(mm7, mm7); /* 00 00 00 00 00 00 00 00 */
+ movd_m2r(*(mmx_t *)lum, mm2); /* 0 0 0 0 l3 l2 l1 l0 */
+ punpcklbw_r2r(mm7, mm1); /* 0 v3 0 v2 00 v1 00 v0 */
+ punpckldq_r2r(mm1, mm1); /* 00 v1 00 v0 00 v1 00 v0 */
+ psubw_m2r(MMX_80w, mm1); /* mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 */
+
+ /* create Cr_g (result in mm0) */
+ movq_r2r(mm1, mm0); /* r1 r1 r0 r0 r1 r1 r0 r0 */
+ pmullw_m2r(MMX32_Vgrncoeff, mm0); /* red*-46dec=0.7136*64 */
+ pmullw_m2r(MMX32_Vredcoeff, mm1); /* red*89dec=1.4013*64 */
+ psraw_i2r(6, mm0); /* red=red/64 */
+ psraw_i2r(6, mm1); /* red=red/64 */
+
+ /* create L1 L2 (result in mm2,mm4) */
+ /* L2=lum+cols */
+ movq_m2r(*(mmx_t *)(lum+cols),mm3); /* 0 0 0 0 L3 L2 L1 L0 */
+ punpckldq_r2r(mm3, mm2); /* L3 L2 L1 L0 l3 l2 l1 l0 */
+ movq_r2r(mm2, mm4); /* L3 L2 L1 L0 l3 l2 l1 l0 */
+ pand_m2r(MMX_FF00w, mm2); /* L3 0 L1 0 l3 0 l1 0 */
+ pand_m2r(MMX_00FFw, mm4); /* 0 L2 0 L0 0 l2 0 l0 */
+ psrlw_i2r(8, mm2); /* 0 L3 0 L1 0 l3 0 l1 */
+
+ /* create R (result in mm6) */
+ movq_r2r(mm2, mm5); /* 0 L3 0 L1 0 l3 0 l1 */
+ movq_r2r(mm4, mm6); /* 0 L2 0 L0 0 l2 0 l0 */
+ paddsw_r2r(mm1, mm5); /* lum1+red:x R3 x R1 x r3 x r1 */
+ paddsw_r2r(mm1, mm6); /* lum1+red:x R2 x R0 x r2 x r0 */
+ packuswb_r2r(mm5, mm5); /* R3 R1 r3 r1 R3 R1 r3 r1 */
+ packuswb_r2r(mm6, mm6); /* R2 R0 r2 r0 R2 R0 r2 r0 */
+ pxor_r2r(mm7, mm7); /* 00 00 00 00 00 00 00 00 */
+ punpcklbw_r2r(mm5, mm6); /* R3 R2 R1 R0 r3 r2 r1 r0 */
+
+ /* create Cb (result in mm1) */
+ movd_m2r(*(mmx_t *)cr, mm1); /* 0 0 0 0 u3 u2 u1 u0 */
+ punpcklbw_r2r(mm7, mm1); /* 0 u3 0 u2 00 u1 00 u0 */
+ punpckldq_r2r(mm1, mm1); /* 00 u1 00 u0 00 u1 00 u0 */
+ psubw_m2r(MMX_80w, mm1); /* mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 */
+ /* create Cb_g (result in mm5) */
+ movq_r2r(mm1, mm5); /* u1 u1 u0 u0 u1 u1 u0 u0 */
+ pmullw_m2r(MMX32_Ugrncoeff, mm5); /* blue*-109dec=1.7129*64 */
+ pmullw_m2r(MMX32_Ubluecoeff, mm1); /* blue*114dec=1.78125*64 */
+ psraw_i2r(6, mm5); /* blue=red/64 */
+ psraw_i2r(6, mm1); /* blue=blue/64 */
+
+ /* create G (result in mm7) */
+ movq_r2r(mm2, mm3); /* 0 L3 0 L1 0 l3 0 l1 */
+ movq_r2r(mm4, mm7); /* 0 L2 0 L0 0 l2 0 l1 */
+ paddsw_r2r(mm5, mm3); /* lum1+Cb_g:x G3t x G1t x g3t x g1t */
+ paddsw_r2r(mm5, mm7); /* lum1+Cb_g:x G2t x G0t x g2t x g0t */
+ paddsw_r2r(mm0, mm3); /* lum1+Cr_g:x G3 x G1 x g3 x g1 */
+ paddsw_r2r(mm0, mm7); /* lum1+blue:x G2 x G0 x g2 x g0 */
+ packuswb_r2r(mm3, mm3); /* G3 G1 g3 g1 G3 G1 g3 g1 */
+ packuswb_r2r(mm7, mm7); /* G2 G0 g2 g0 G2 G0 g2 g0 */
+ punpcklbw_r2r(mm3, mm7); /* G3 G2 G1 G0 g3 g2 g1 g0 */
+
+ /* create B (result in mm5) */
+ movq_r2r(mm2, mm3); /* 0 L3 0 L1 0 l3 0 l1 */
+ movq_r2r(mm4, mm5); /* 0 L2 0 L0 0 l2 0 l1 */
+ paddsw_r2r(mm1, mm3); /* lum1+blue:x B3 x B1 x b3 x b1 */
+ paddsw_r2r(mm1, mm5); /* lum1+blue:x B2 x B0 x b2 x b0 */
+ packuswb_r2r(mm3, mm3); /* B3 B1 b3 b1 B3 B1 b3 b1 */
+ packuswb_r2r(mm5, mm5); /* B2 B0 b2 b0 B2 B0 b2 b0 */
+ punpcklbw_r2r(mm3, mm5); /* B3 B2 B1 B0 b3 b2 b1 b0 */
+
+ /* fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) */
+
+ pxor_r2r(mm2, mm2); /* 0 0 0 0 0 0 0 0 */
+ pxor_r2r(mm4, mm4); /* 0 0 0 0 0 0 0 0 */
+ movq_r2r(mm6, mm1); /* R3 R2 R1 R0 r3 r2 r1 r0 */
+ movq_r2r(mm5, mm3); /* B3 B2 B1 B0 b3 b2 b1 b0 */
+ /* process lower lum */
+ punpcklbw_r2r(mm4, mm1); /* 0 r3 0 r2 0 r1 0 r0 */
+ punpcklbw_r2r(mm4, mm3); /* 0 b3 0 b2 0 b1 0 b0 */
+ movq_r2r(mm1, mm2); /* 0 r3 0 r2 0 r1 0 r0 */
+ movq_r2r(mm3, mm0); /* 0 b3 0 b2 0 b1 0 b0 */
+ punpcklwd_r2r(mm1, mm3); /* 0 r1 0 b1 0 r0 0 b0 */
+ punpckhwd_r2r(mm2, mm0); /* 0 r3 0 b3 0 r2 0 b2 */
+
+ pxor_r2r(mm2, mm2); /* 0 0 0 0 0 0 0 0 */
+ movq_r2r(mm7, mm1); /* G3 G2 G1 G0 g3 g2 g1 g0 */
+ punpcklbw_r2r(mm1, mm2); /* g3 0 g2 0 g1 0 g0 0 */
+ punpcklwd_r2r(mm4, mm2); /* 0 0 g1 0 0 0 g0 0 */
+ por_r2r(mm3, mm2); /* 0 r1 g1 b1 0 r0 g0 b0 */
+ movq_r2m(mm2, *(mmx_t *)row1); /* wrote out ! row1 */
+
+ pxor_r2r(mm2, mm2); /* 0 0 0 0 0 0 0 0 */
+ punpcklbw_r2r(mm1, mm4); /* g3 0 g2 0 g1 0 g0 0 */
+ punpckhwd_r2r(mm2, mm4); /* 0 0 g3 0 0 0 g2 0 */
+ por_r2r(mm0, mm4); /* 0 r3 g3 b3 0 r2 g2 b2 */
+ movq_r2m(mm4, *(mmx_t *)(row1+2)); /* wrote out ! row1 */
+
+ /* fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) */
+ /* this can be done "destructive" */
+ pxor_r2r(mm2, mm2); /* 0 0 0 0 0 0 0 0 */
+ punpckhbw_r2r(mm2, mm6); /* 0 R3 0 R2 0 R1 0 R0 */
+ punpckhbw_r2r(mm1, mm5); /* G3 B3 G2 B2 G1 B1 G0 B0 */
+ movq_r2r(mm5, mm1); /* G3 B3 G2 B2 G1 B1 G0 B0 */
+ punpcklwd_r2r(mm6, mm1); /* 0 R1 G1 B1 0 R0 G0 B0 */
+ movq_r2m(mm1, *(mmx_t *)(row1+cols)); /* wrote out ! row2 */
+ punpckhwd_r2r(mm6, mm5); /* 0 R3 G3 B3 0 R2 G2 B2 */
+ movq_r2m(mm5, *(mmx_t *)(row1+cols+2)); /* wrote out ! row2 */
+
+ lum+=4;
+ cr+=2;
+ cb+=2;
+ row1 +=4;
+ }
+ lum += cols;
+ row1 += cols;
+ }
+
+ emms();
+
+}
+#endif
+