summaryrefslogtreecommitdiffstats
path: root/gst/deinterlace2
diff options
context:
space:
mode:
authorSebastian Dröge <slomo@circular-chaos.org>2008-06-29 10:56:47 +0000
committerSebastian Dröge <slomo@circular-chaos.org>2008-06-29 10:56:47 +0000
commit67088dcea6eda4eeef243940cda2e76acbc2a398 (patch)
tree5503d9ab1c8567f4f938f8ce37cd036a6cea1d49 /gst/deinterlace2
parenta1b7dde06923100706cbf47480d6d5e59dfb62fe (diff)
downloadgst-plugins-bad-67088dcea6eda4eeef243940cda2e76acbc2a398.tar.gz
gst-plugins-bad-67088dcea6eda4eeef243940cda2e76acbc2a398.tar.bz2
gst-plugins-bad-67088dcea6eda4eeef243940cda2e76acbc2a398.zip
gst/deinterlace2/: Get rid of speedy.[ch] as we don't use most of it's code anyway and it doesn't seem to be relicens...
Original commit message from CVS: * gst/deinterlace2/Makefile.am: * gst/deinterlace2/gstdeinterlace2.c: (gst_deinterlace2_init): * gst/deinterlace2/tvtime/greedy.c: (deinterlace_frame_di_greedy): * gst/deinterlace2/tvtime/greedyh.c: (deinterlace_frame_di_greedyh): * gst/deinterlace2/tvtime/speedtools.h: * gst/deinterlace2/tvtime/speedy.c: * gst/deinterlace2/tvtime/speedy.h: * gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy): * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc: * gst/deinterlace2/tvtime/vfir.c: (deinterlace_frame_vfir): Get rid of speedy.[ch] as we don't use most of it's code anyway and it doesn't seem to be relicensed to LGPL. Use memcpy() instead of the speedy memcpy everywhere instead. * gst/deinterlace2/gstdeinterlace2.h: Remove many unused declarations.
Diffstat (limited to 'gst/deinterlace2')
-rw-r--r--gst/deinterlace2/Makefile.am3
-rw-r--r--gst/deinterlace2/gstdeinterlace2.c4
-rw-r--r--gst/deinterlace2/gstdeinterlace2.h92
-rw-r--r--gst/deinterlace2/tvtime/greedy.c24
-rw-r--r--gst/deinterlace2/tvtime/greedyh.c16
-rw-r--r--gst/deinterlace2/tvtime/speedtools.h54
-rw-r--r--gst/deinterlace2/tvtime/speedy.c2791
-rw-r--r--gst/deinterlace2/tvtime/speedy.h308
-rw-r--r--gst/deinterlace2/tvtime/tomsmocomp.c27
-rw-r--r--gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc2
-rw-r--r--gst/deinterlace2/tvtime/vfir.c20
11 files changed, 21 insertions, 3320 deletions
diff --git a/gst/deinterlace2/Makefile.am b/gst/deinterlace2/Makefile.am
index d6e38c91..a854fc28 100644
--- a/gst/deinterlace2/Makefile.am
+++ b/gst/deinterlace2/Makefile.am
@@ -4,7 +4,6 @@ libgstdeinterlace2_la_SOURCES = \
gstdeinterlace2.c \
tvtime/greedy.c \
tvtime/greedyh.c \
- tvtime/speedy.c \
tvtime/vfir.c \
tvtime/tomsmocomp.c
@@ -22,8 +21,6 @@ noinst_HEADERS = \
tvtime/greedyh.asm \
tvtime/greedyhmacros.h \
tvtime/plugins.h \
- tvtime/speedtools.h \
- tvtime/speedy.h \
tvtime/x86-64_macros.inc \
tvtime/tomsmocomp.h \
tvtime/tomsmocomp/SearchLoop0A.inc \
diff --git a/gst/deinterlace2/gstdeinterlace2.c b/gst/deinterlace2/gstdeinterlace2.c
index 3998a8fc..e42afaba 100644
--- a/gst/deinterlace2/gstdeinterlace2.c
+++ b/gst/deinterlace2/gstdeinterlace2.c
@@ -28,7 +28,6 @@
#include <gst/video/video.h>
#include "tvtime/plugins.h"
-#include "tvtime/speedy.h"
#include <string.h>
@@ -234,9 +233,6 @@ gst_deinterlace2_init (GstDeinterlace2 * object, GstDeinterlace2Class * klass)
object->cpu_feature_flags = oil_cpu_get_flags ();
- setup_speedy_calls (object->cpu_feature_flags, 0);
- object->pMemcpy = speedy_memcpy;
-
object->method = dscaler_tomsmocomp_get_method ();
object->field_layout = GST_DEINTERLACE2_LAYOUT_AUTO;
object->fields = GST_DEINTERLACE2_ALL;
diff --git a/gst/deinterlace2/gstdeinterlace2.h b/gst/deinterlace2/gstdeinterlace2.h
index 1777f7c0..b312037c 100644
--- a/gst/deinterlace2/gstdeinterlace2.h
+++ b/gst/deinterlace2/gstdeinterlace2.h
@@ -47,76 +47,9 @@ typedef struct _GstDeinterlace2Class GstDeinterlace2Class;
typedef struct deinterlace_setting_s deinterlace_setting_t;
typedef struct deinterlace_method_s deinterlace_method_t;
-typedef struct deinterlace_scanline_data_s deinterlace_scanline_data_t;
-typedef struct deinterlace_frame_data_s deinterlace_frame_data_t;
-
-/*
- * There are two scanline functions that every deinterlacer plugin
- * must implement to do its work: one for a 'copy' and one for
- * an 'interpolate' for the currently active field. This so so that
- * while plugins may be delaying fields, the external API assumes that
- * the plugin is completely realtime.
- *
- * Each deinterlacing routine can require data from up to four fields.
- * The most recent field captured is field 0, and increasing numbers go
- * backwards in time.
- */
-struct deinterlace_scanline_data_s
-{
- guint8 *tt0, *t0, *m0, *b0, *bb0;
- guint8 *tt1, *t1, *m1, *b1, *bb1;
- guint8 *tt2, *t2, *m2, *b2, *bb2;
- guint8 *tt3, *t3, *m3, *b3, *bb3;
- int bottom_field;
-};
-
-/*
- * | t-3 t-2 t-1 t
- * | Field 3 | Field 2 | Field 1 | Field 0 |
- * | TT3 | | TT1 | |
- * | | T2 | | T0 |
- * | M3 | | M1 | |
- * | | B2 | | B0 |
- * | BB3 | | BB1 | |
- *
- * While all pointers are passed in, each plugin is only guarenteed for
- * the ones it indicates it requires (in the fields_required parameter)
- * to be available.
- *
- * Pointers are always to scanlines in the standard packed 4:2:2 format.
- */
-typedef void (*deinterlace_interp_scanline_t) (GstDeinterlace2 * object,
- deinterlace_scanline_data_t * data, guint8 * output);
-/*
- * For the copy scanline, the API is basically the same, except that
- * we're given a scanline to 'copy'.
- *
- * | t-3 t-2 t-1 t
- * | Field 3 | Field 2 | Field 1 | Field 0 |
- * | | TT2 | | TT0 |
- * | T3 | | T1 | |
- * | | M2 | | M0 |
- * | B3 | | B1 | |
- * | | BB2 | | BB0 |
- */
-typedef void (*deinterlace_copy_scanline_t) (GstDeinterlace2 * object,
- deinterlace_scanline_data_t * data, guint8 * output);
-
-/*
- * The frame function is for deinterlacing plugins that can only act
- * on whole frames, rather than on a scanline at a time.
- */
-struct deinterlace_frame_data_s
-{
- guint8 *f0;
- guint8 *f1;
- guint8 *f2;
- guint8 *f3;
-};
typedef void (*deinterlace_frame_t) (GstDeinterlace2 * object);
-
/*
* This structure defines the deinterlacer plugin.
*/
@@ -131,32 +64,10 @@ struct deinterlace_method_s
int numsettings;
deinterlace_setting_t *settings;
int scanlinemode;
- deinterlace_interp_scanline_t interpolate_scanline;
- deinterlace_copy_scanline_t copy_scanline;
deinterlace_frame_t deinterlace_frame;
const char *description[10];
};
-/**
- * Registers a new deinterlace method.
- */
-void register_deinterlace_method (deinterlace_method_t * method);
-
-/**
- * Returns how many deinterlacing methods are available.
- */
-int get_num_deinterlace_methods (void);
-
-/**
- * Returns the specified method in the list.
- */
-deinterlace_method_t *get_deinterlace_method (int i);
-
-/**
- * Builds the usable method list.
- */
-void filter_deinterlace_methods (int accel, int fieldsavailable);
-
#define MAX_FIELD_HISTORY 10
#define PICTURE_PROGRESSIVE 0
@@ -245,9 +156,6 @@ struct _GstDeinterlace2
*/
gint field_height;
- /* Function pointer to optimized memcpy function */
- MEMCPY_FUNC *pMemcpy;
-
/* distance between lines in image
need not match the pixel width
*/
diff --git a/gst/deinterlace2/tvtime/greedy.c b/gst/deinterlace2/tvtime/greedy.c
index 2448d1dc..563ba2d2 100644
--- a/gst/deinterlace2/tvtime/greedy.c
+++ b/gst/deinterlace2/tvtime/greedy.c
@@ -26,20 +26,14 @@
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
*/
-#include <stdio.h>
-#if defined (__SVR4) && defined (__sun)
-# include <sys/int_types.h>
-#else
-# include <stdint.h>
-#endif
-
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
+#include <_stdint.h>
+
#include "gstdeinterlace2.h"
-#include "speedtools.h"
-#include "speedy.h"
+#include <string.h>
// This is a simple lightweight DeInterlace method that uses little CPU time
// but gives very good results for low or intermedite motion.
@@ -352,7 +346,7 @@ deinterlace_frame_di_greedy (GstDeinterlace2 * object)
GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
// copy first even line
- object->pMemcpy (Dest, L1, object->line_length);
+ memcpy (Dest, L1, object->line_length);
Dest += object->output_stride;
} else {
InfoIsOdd = 0;
@@ -365,18 +359,18 @@ deinterlace_frame_di_greedy (GstDeinterlace2 * object)
Pitch;
// copy first even line
- object->pMemcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
+ memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
object->line_length);
Dest += object->output_stride;
// then first odd line
- object->pMemcpy (Dest, L1, object->line_length);
+ memcpy (Dest, L1, object->line_length);
Dest += object->output_stride;
}
for (Line = 0; Line < (object->field_height - 1); ++Line) {
func (L2, L1, L3, L2P, Dest, object->line_length);
Dest += object->output_stride;
- object->pMemcpy (Dest, L3, object->line_length);
+ memcpy (Dest, L3, object->line_length);
Dest += object->output_stride;
L1 += Pitch;
@@ -386,7 +380,7 @@ deinterlace_frame_di_greedy (GstDeinterlace2 * object)
}
if (InfoIsOdd) {
- object->pMemcpy (Dest, L2, object->line_length);
+ memcpy (Dest, L2, object->line_length);
}
}
@@ -400,8 +394,6 @@ static deinterlace_method_t greedyl_method = {
0,
0,
1,
- 0,
- 0,
deinterlace_frame_di_greedy,
{"Uses heuristics to detect motion in the input",
"frames and reconstruct image detail where",
diff --git a/gst/deinterlace2/tvtime/greedyh.c b/gst/deinterlace2/tvtime/greedyh.c
index 5cda7d53..58ef4f5d 100644
--- a/gst/deinterlace2/tvtime/greedyh.c
+++ b/gst/deinterlace2/tvtime/greedyh.c
@@ -32,14 +32,12 @@
#include "greedyhmacros.h"
#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
+#include <_stdint.h>
#include <string.h>
#include "gst/gst.h"
#include "plugins.h"
#include "gstdeinterlace2.h"
-#include "speedy.h"
static const unsigned int GreedyMaxComb = 5;
static const unsigned int GreedyMotionThreshold = 25;
@@ -249,7 +247,7 @@ deinterlace_frame_di_greedyh (GstDeinterlace2 * object)
GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
// copy first even line
- object->pMemcpy (Dest, L1, object->line_length);
+ memcpy (Dest, L1, object->line_length);
Dest += object->output_stride;
} else {
InfoIsOdd = 0;
@@ -262,18 +260,18 @@ deinterlace_frame_di_greedyh (GstDeinterlace2 * object)
Pitch;
// copy first even line
- object->pMemcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
+ memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
object->line_length);
Dest += object->output_stride;
// then first odd line
- object->pMemcpy (Dest, L1, object->line_length);
+ memcpy (Dest, L1, object->line_length);
Dest += object->output_stride;
}
for (Line = 0; Line < (object->field_height - 1); ++Line) {
func (L1, L2, L3, L2P, Dest, object->line_length);
Dest += object->output_stride;
- object->pMemcpy (Dest, L3, object->line_length);
+ memcpy (Dest, L3, object->line_length);
Dest += object->output_stride;
L1 += Pitch;
@@ -283,7 +281,7 @@ deinterlace_frame_di_greedyh (GstDeinterlace2 * object)
}
if (InfoIsOdd) {
- object->pMemcpy (Dest, L2, object->line_length);
+ memcpy (Dest, L2, object->line_length);
}
}
@@ -297,8 +295,6 @@ static deinterlace_method_t greedyh_method = {
0,
0,
0,
- 0,
- 0,
deinterlace_frame_di_greedyh,
{"Uses heuristics to detect motion in the input",
"frames and reconstruct image detail where",
diff --git a/gst/deinterlace2/tvtime/speedtools.h b/gst/deinterlace2/tvtime/speedtools.h
deleted file mode 100644
index 677bb5e3..00000000
--- a/gst/deinterlace2/tvtime/speedtools.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *
- * GStreamer
- * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/*
- * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
- * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
- */
-
-#ifndef SPEEDTOOLS_H_INCLUDED
-#define SPEEDTOOLS_H_INCLUDED
-
-#define PREFETCH_2048(x) \
- { int *pfetcha = (int *) x; \
- prefetchnta( pfetcha ); \
- prefetchnta( pfetcha + 64 ); \
- prefetchnta( pfetcha + 128 ); \
- prefetchnta( pfetcha + 192 ); \
- pfetcha += 256; \
- prefetchnta( pfetcha ); \
- prefetchnta( pfetcha + 64 ); \
- prefetchnta( pfetcha + 128 ); \
- prefetchnta( pfetcha + 192 ); }
-
-#define READ_PREFETCH_2048(x) \
- { int *pfetcha = (int *) x; int pfetchtmp; \
- pfetchtmp = pfetcha[ 0 ] + pfetcha[ 16 ] + pfetcha[ 32 ] + pfetcha[ 48 ] + \
- pfetcha[ 64 ] + pfetcha[ 80 ] + pfetcha[ 96 ] + pfetcha[ 112 ] + \
- pfetcha[ 128 ] + pfetcha[ 144 ] + pfetcha[ 160 ] + pfetcha[ 176 ] + \
- pfetcha[ 192 ] + pfetcha[ 208 ] + pfetcha[ 224 ] + pfetcha[ 240 ]; \
- pfetcha += 256; \
- pfetchtmp = pfetcha[ 0 ] + pfetcha[ 16 ] + pfetcha[ 32 ] + pfetcha[ 48 ] + \
- pfetcha[ 64 ] + pfetcha[ 80 ] + pfetcha[ 96 ] + pfetcha[ 112 ] + \
- pfetcha[ 128 ] + pfetcha[ 144 ] + pfetcha[ 160 ] + pfetcha[ 176 ] + \
- pfetcha[ 192 ] + pfetcha[ 208 ] + pfetcha[ 224 ] + pfetcha[ 240 ]; }
-
-#endif /* SPEEDTOOLS_H_INCLUDED */
diff --git a/gst/deinterlace2/tvtime/speedy.c b/gst/deinterlace2/tvtime/speedy.c
deleted file mode 100644
index d19d917f..00000000
--- a/gst/deinterlace2/tvtime/speedy.c
+++ /dev/null
@@ -1,2791 +0,0 @@
-/*
- * Copyright (c) 2002, 2003 Billy Biggs <vektor@dumbterm.net>.
- * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-/*
- * Includes 420to422, 422to444 scaling filters from the MPEG2 reference
- * implementation. The v12 source code indicates that they were written
- * by Cheung Auyeung <auyeung@mot.com>. The file they were in was:
- *
- * store.c, picture output routines
- * Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved.
- *
- * Disclaimer of Warranty
- *
- * These software programs are available to the user without any license fee or
- * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
- * any and all warranties, whether express, implied, or statuary, including any
- * implied warranties or merchantability or of fitness for a particular
- * purpose. In no event shall the copyright-holder be liable for any
- * incidental, punitive, or consequential damages of any kind whatsoever
- * arising from the use of these programs.
- *
- * This disclaimer of warranty extends to the user of these programs and user's
- * customers, employees, agents, transferees, successors, and assigns.
- *
- * The MPEG Software Simulation Group does not represent or warrant that the
- * programs furnished hereunder are free of infringement of any third-party
- * patents.
- *
- * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
- * are subject to royalty fees to patent holders. Many of these patents are
- * general enough such that they are unavoidable regardless of implementation
- * design.
- *
- */
-
-/*
- * Code for the UYVY to YUYV routine comes from rivatv:
- *
- * rivatv-convert.c video image conversion routines
- *
- * Copyright (C) 2002 Stefan Jahn <stefan@lkcc.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <stdio.h>
-#include <string.h>
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "gst/gst.h"
-#include "gstdeinterlace2.h"
-#include "speedy.h"
-#include "speedtools.h"
-#include "mmx.h"
-#include "sse.h"
-
-// TODO: remove includes
-//#include "attributes.h"
-//#include "mm_accel.h"
-
-/* Function pointer definitions. */
-void (*interpolate_packed422_scanline) (uint8_t * output, uint8_t * top,
- uint8_t * bot, int width);
-void (*blit_colour_packed422_scanline) (uint8_t * output,
- int width, int y, int cb, int cr);
-void (*blit_colour_packed4444_scanline) (uint8_t * output,
- int width, int alpha, int luma, int cb, int cr);
-void (*blit_packed422_scanline) (uint8_t * dest, const uint8_t * src,
- int width);
-void (*composite_packed4444_to_packed422_scanline) (uint8_t * output,
- uint8_t * input, uint8_t * foreground, int width);
-void (*composite_packed4444_alpha_to_packed422_scanline) (uint8_t * output,
- uint8_t * input, uint8_t * foreground, int width, int alpha);
-void (*composite_alphamask_to_packed4444_scanline) (uint8_t * output,
- uint8_t * input, uint8_t * mask, int width, int textluma, int textcb,
- int textcr);
-void (*composite_alphamask_alpha_to_packed4444_scanline) (uint8_t * output,
- uint8_t * input, uint8_t * mask, int width, int textluma, int textcb,
- int textcr, int alpha);
-void (*premultiply_packed4444_scanline) (uint8_t * output, uint8_t * input,
- int width);
-void (*blend_packed422_scanline) (uint8_t * output, uint8_t * src1,
- uint8_t * src2, int width, int pos);
-unsigned int (*diff_factor_packed422_scanline) (uint8_t * cur, uint8_t * old,
- int width);
-unsigned int (*comb_factor_packed422_scanline) (uint8_t * top, uint8_t * mid,
- uint8_t * bot, int width);
-void (*kill_chroma_packed422_inplace_scanline) (uint8_t * data, int width);
-
-void (*mirror_packed422_inplace_scanline) (uint8_t * data, int width);
-
-void (*speedy_memcpy) (void *output, const void *input, size_t size);
-
-void (*diff_packed422_block8x8) (pulldown_metrics_t * m, uint8_t * old,
- uint8_t * new, int os, int ns);
-void (*a8_subpix_blit_scanline) (uint8_t * output, uint8_t * input,
- int lasta, int startpos, int width);
-void (*quarter_blit_vertical_packed422_scanline) (uint8_t * output,
- uint8_t * one, uint8_t * three, int width);
-void (*subpix_blit_vertical_packed422_scanline) (uint8_t * output,
- uint8_t * top, uint8_t * bot, int subpixpos, int width);
-void (*packed444_to_nonpremultiplied_packed4444_scanline) (uint8_t * output,
- uint8_t * input, int width, int alpha);
-void (*aspect_adjust_packed4444_scanline) (uint8_t * output, uint8_t * input,
- int width, double pixel_aspect);
-void (*packed444_to_packed422_scanline) (uint8_t * output, uint8_t * input,
- int width);
-void (*packed422_to_packed444_scanline) (uint8_t * output, uint8_t * input,
- int width);
-void (*packed422_to_packed444_rec601_scanline) (uint8_t * dest, uint8_t * src,
- int width);
-void (*packed444_to_rgb24_rec601_scanline) (uint8_t * output, uint8_t * input,
- int width);
-void (*rgb24_to_packed444_rec601_scanline) (uint8_t * output, uint8_t * input,
- int width);
-void (*rgba32_to_packed4444_rec601_scanline) (uint8_t * output, uint8_t * input,
- int width);
-void (*invert_colour_packed422_inplace_scanline) (uint8_t * data, int width);
-
-void (*vfilter_chroma_121_packed422_scanline) (uint8_t * output, int width,
- uint8_t * m, uint8_t * t, uint8_t * b);
-void (*vfilter_chroma_332_packed422_scanline) (uint8_t * output, int width,
- uint8_t * m, uint8_t * t, uint8_t * b);
-void (*convert_uyvy_to_yuyv_scanline) (uint8_t * uyvy_buf, uint8_t * yuyv_buf,
- int width);
-void (*composite_colour4444_alpha_to_packed422_scanline) (uint8_t * output,
- uint8_t * input, int af, int y, int cb, int cr, int width, int alpha);
-
-/*
- * result = (1 - alpha)B + alpha*F
- * = B - alpha*B + alpha*F
- * = B + alpha*(F - B)
- */
-
-static inline __attribute__ ((always_inline, const))
- int multiply_alpha (int a, int r)
-{
- int temp;
-
- temp = (r * a) + 0x80;
- return ((temp + (temp >> 8)) >> 8);
-}
-
-static inline __attribute__ ((always_inline, const))
- uint8_t clip255 (int x)
-{
- if (x > 255) {
- return 255;
- } else if (x < 0) {
- return 0;
- } else {
- return x;
- }
-}
-
-unsigned long CombJaggieThreshold = 73;
-
-#ifdef HAVE_CPU_I386
-static unsigned int
-comb_factor_packed422_scanline_mmx (uint8_t * top, uint8_t * mid,
- uint8_t * bot, int width)
-{
- const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };
- const mmx_t qwOnes = { 0x0001000100010001ULL };
- mmx_t qwThreshold;
-
- unsigned int temp1, temp2;
-
- width /= 4;
-
- qwThreshold.uw[0] = CombJaggieThreshold;
- qwThreshold.uw[1] = CombJaggieThreshold;
- qwThreshold.uw[2] = CombJaggieThreshold;
- qwThreshold.uw[3] = CombJaggieThreshold;
-
- movq_m2r (qwThreshold, mm0);
- movq_m2r (qwYMask, mm1);
- movq_m2r (qwOnes, mm2);
- pxor_r2r (mm7, mm7); /* mm7 = 0. */
-
- while (width--) {
- /* Load and keep just the luma. */
- movq_m2r (*top, mm3);
- movq_m2r (*mid, mm4);
- movq_m2r (*bot, mm5);
-
- pand_r2r (mm1, mm3);
- pand_r2r (mm1, mm4);
- pand_r2r (mm1, mm5);
-
- /* Work out mm6 = (top - mid) * (bot - mid) - ( (top - mid)^2 >> 7 ) */
- psrlw_i2r (1, mm3);
- psrlw_i2r (1, mm4);
- psrlw_i2r (1, mm5);
-
- /* mm6 = (top - mid) */
- movq_r2r (mm3, mm6);
- psubw_r2r (mm4, mm6);
-
- /* mm3 = (top - bot) */
- psubw_r2r (mm5, mm3);
-
- /* mm5 = (bot - mid) */
- psubw_r2r (mm4, mm5);
-
- /* mm6 = (top - mid) * (bot - mid) */
- pmullw_r2r (mm5, mm6);
-
- /* mm3 = (top - bot)^2 >> 7 */
- pmullw_r2r (mm3, mm3); /* mm3 = (top - bot)^2 */
- psrlw_i2r (7, mm3); /* mm3 = ((top - bot)^2 >> 7) */
-
- /* mm6 is what we want. */
- psubw_r2r (mm3, mm6);
-
- /* FF's if greater than qwTheshold */
- pcmpgtw_r2r (mm0, mm6);
-
- /* Add to count if we are greater than threshold */
- pand_r2r (mm2, mm6);
- paddw_r2r (mm6, mm7);
-
- top += 8;
- mid += 8;
- bot += 8;
- }
-
- movd_r2m (mm7, temp1);
- psrlq_i2r (32, mm7);
- movd_r2m (mm7, temp2);
- temp1 += temp2;
- temp2 = temp1;
- temp1 >>= 16;
- temp1 += temp2 & 0xffff;
-
- emms ();
-
- return temp1;
-}
-#endif
-
-static unsigned long BitShift = 6;
-
-static unsigned int
-diff_factor_packed422_scanline_c (uint8_t * cur, uint8_t * old, int width)
-{
- unsigned int ret = 0;
-
- width /= 4;
-
- while (width--) {
- unsigned int tmp1 = (cur[0] + cur[2] + cur[4] + cur[6] + 2) >> 2;
-
- unsigned int tmp2 = (old[0] + old[2] + old[4] + old[6] + 2) >> 2;
-
- tmp1 = (tmp1 - tmp2);
- tmp1 *= tmp1;
- tmp1 >>= BitShift;
- ret += tmp1;
- cur += 8;
- old += 8;
- }
-
- return ret;
-}
-
-/*
-static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t *old, int width )
-{
- unsigned int ret = 0;
-
- width /= 16;
-
- while( width-- ) {
- unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ])>>2;
- unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ])>>2;
- tmp1 = (tmp1 - tmp2);
- tmp1 *= tmp1;
- tmp1 >>= BitShift;
- ret += tmp1;
- cur += (8*4);
- old += (8*4);
- }
-
- return ret;
-}
-*/
-
-#ifdef HAVE_CPU_I386
-static unsigned int
-diff_factor_packed422_scanline_mmx (uint8_t * cur, uint8_t * old, int width)
-{
- const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };
- unsigned int temp1, temp2;
-
- width /= 4;
-
- movq_m2r (qwYMask, mm1);
- movd_m2r (BitShift, mm7);
- pxor_r2r (mm0, mm0);
-
- while (width--) {
- movq_m2r (*cur, mm4);
- movq_m2r (*old, mm5);
-
- pand_r2r (mm1, mm4);
- pand_r2r (mm1, mm5);
-
- psubw_r2r (mm5, mm4); /* mm4 = Y1 - Y2 */
- pmaddwd_r2r (mm4, mm4); /* mm4 = (Y1 - Y2)^2 */
- psrld_r2r (mm7, mm4); /* divide mm4 by 2^BitShift */
- paddd_r2r (mm4, mm0); /* keep total in mm0 */
-
- cur += 8;
- old += 8;
- }
-
- movd_r2m (mm0, temp1);
- psrlq_i2r (32, mm0);
- movd_r2m (mm0, temp2);
- temp1 += temp2;
-
- emms ();
-
- return temp1;
-}
-#endif
-
-// defined in glib/gmacros.h #define ABS(a) (((a) < 0)?-(a):(a))
-
-#ifdef HAVE_CPU_I386
-static void
-diff_packed422_block8x8_mmx (pulldown_metrics_t * m, uint8_t * old,
- uint8_t * new, int os, int ns)
-{
- const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
- short out[24]; /* Output buffer for the partial metrics from the mmx code. */
-
- uint8_t *outdata = (uint8_t *) out;
-
- uint8_t *oldp, *newp;
-
- int i;
-
- pxor_r2r (mm4, mm4); // 4 even difference sums.
- pxor_r2r (mm5, mm5); // 4 odd difference sums.
- pxor_r2r (mm7, mm7); // zeros
-
- oldp = old;
- newp = new;
- for (i = 4; i; --i) {
- // Even difference.
- movq_m2r (oldp[0], mm0);
- movq_m2r (oldp[8], mm2);
- pand_m2r (ymask, mm0);
- pand_m2r (ymask, mm2);
- oldp += os;
-
- movq_m2r (newp[0], mm1);
- movq_m2r (newp[8], mm3);
- pand_m2r (ymask, mm1);
- pand_m2r (ymask, mm3);
- newp += ns;
-
- movq_r2r (mm0, mm6);
- psubusb_r2r (mm1, mm0);
- psubusb_r2r (mm6, mm1);
- movq_r2r (mm2, mm6);
- psubusb_r2r (mm3, mm2);
- psubusb_r2r (mm6, mm3);
-
- paddw_r2r (mm0, mm4);
- paddw_r2r (mm1, mm4);
- paddw_r2r (mm2, mm4);
- paddw_r2r (mm3, mm4);
-
- // Odd difference.
- movq_m2r (oldp[0], mm0);
- movq_m2r (oldp[8], mm2);
- pand_m2r (ymask, mm0);
- pand_m2r (ymask, mm2);
- oldp += os;
-
- movq_m2r (newp[0], mm1);
- movq_m2r (newp[8], mm3);
- pand_m2r (ymask, mm1);
- pand_m2r (ymask, mm3);
- newp += ns;
-
- movq_r2r (mm0, mm6);
- psubusb_r2r (mm1, mm0);
- psubusb_r2r (mm6, mm1);
- movq_r2r (mm2, mm6);
- psubusb_r2r (mm3, mm2);
- psubusb_r2r (mm6, mm3);
-
- paddw_r2r (mm0, mm5);
- paddw_r2r (mm1, mm5);
- paddw_r2r (mm2, mm5);
- paddw_r2r (mm3, mm5);
- }
- movq_r2m (mm4, outdata[0]);
- movq_r2m (mm5, outdata[8]);
-
- m->e = out[0] + out[1] + out[2] + out[3];
- m->o = out[4] + out[5] + out[6] + out[7];
- m->d = m->e + m->o;
-
- pxor_r2r (mm4, mm4); // Past spacial noise.
- pxor_r2r (mm5, mm5); // Temporal noise.
- pxor_r2r (mm6, mm6); // Current spacial noise.
-
- // First loop to measure first four columns
- oldp = old;
- newp = new;
- for (i = 4; i; --i) {
- movq_m2r (oldp[0], mm0);
- movq_m2r (oldp[os], mm1);
- pand_m2r (ymask, mm0);
- pand_m2r (ymask, mm1);
- oldp += (os * 2);
-
- movq_m2r (newp[0], mm2);
- movq_m2r (newp[ns], mm3);
- pand_m2r (ymask, mm2);
- pand_m2r (ymask, mm3);
- newp += (ns * 2);
-
- paddw_r2r (mm1, mm4);
- paddw_r2r (mm1, mm5);
- paddw_r2r (mm3, mm6);
- psubw_r2r (mm0, mm4);
- psubw_r2r (mm2, mm5);
- psubw_r2r (mm2, mm6);
- }
- movq_r2m (mm4, outdata[0]);
- movq_r2m (mm5, outdata[16]);
- movq_r2m (mm6, outdata[32]);
-
- pxor_r2r (mm4, mm4);
- pxor_r2r (mm5, mm5);
- pxor_r2r (mm6, mm6);
-
- // Second loop for the last four columns
- oldp = old;
- newp = new;
- for (i = 4; i; --i) {
- movq_m2r (oldp[8], mm0);
- movq_m2r (oldp[os + 8], mm1);
- pand_m2r (ymask, mm0);
- pand_m2r (ymask, mm1);
- oldp += (os * 2);
-
- movq_m2r (newp[8], mm2);
- movq_m2r (newp[ns + 8], mm3);
- pand_m2r (ymask, mm2);
- pand_m2r (ymask, mm3);
- newp += (ns * 2);
-
- paddw_r2r (mm1, mm4);
- paddw_r2r (mm1, mm5);
- paddw_r2r (mm3, mm6);
- psubw_r2r (mm0, mm4);
- psubw_r2r (mm2, mm5);
- psubw_r2r (mm2, mm6);
- }
- movq_r2m (mm4, outdata[8]);
- movq_r2m (mm5, outdata[24]);
- movq_r2m (mm6, outdata[40]);
-
- m->p = m->t = m->s = 0;
- for (i = 0; i < 8; i++) {
- // FIXME: move abs() into the mmx code!
- m->p += ABS (out[i]);
- m->t += ABS (out[8 + i]);
- m->s += ABS (out[16 + i]);
- }
-
- emms ();
-}
-#endif
-
-static void
-diff_packed422_block8x8_c (pulldown_metrics_t * m, uint8_t * old,
- uint8_t * new, int os, int ns)
-{
- int x, y, e = 0, o = 0, s = 0, p = 0, t = 0;
-
- uint8_t *oldp, *newp;
-
- m->s = m->p = m->t = 0;
- for (x = 8; x; x--) {
- oldp = old;
- old += 2;
- newp = new;
- new += 2;
- s = p = t = 0;
- for (y = 4; y; y--) {
- e += ABS (newp[0] - oldp[0]);
- o += ABS (newp[ns] - oldp[os]);
- s += newp[ns] - newp[0];
- p += oldp[os] - oldp[0];
- t += oldp[os] - newp[0];
- oldp += os << 1;
- newp += ns << 1;
- }
- m->s += ABS (s);
- m->p += ABS (p);
- m->t += ABS (t);
- }
- m->e = e;
- m->o = o;
- m->d = e + o;
-}
-
-static void
-packed444_to_packed422_scanline_c (uint8_t * output, uint8_t * input, int width)
-{
- width /= 2;
- while (width--) {
- output[0] = input[0];
- output[1] = input[1];
- output[2] = input[3];
- output[3] = input[2];
- output += 4;
- input += 6;
- }
-}
-
-static void
-packed422_to_packed444_scanline_c (uint8_t * output, uint8_t * input, int width)
-{
- width /= 2;
- while (width--) {
- output[0] = input[0];
- output[1] = input[1];
- output[2] = input[3];
- output[3] = input[2];
- output[4] = input[1];
- output[5] = input[3];
- output += 6;
- input += 4;
- }
-}
-
-/*
- * For the middle pixels, the filter kernel is:
- *
- * [-1 3 -6 12 -24 80 80 -24 12 -6 3 -1]
- */
-static void
-packed422_to_packed444_rec601_scanline_c (uint8_t * dest, uint8_t * src,
- int width)
-{
- int i;
-
- /* Process two input pixels at a time. Input is [Y'][Cb][Y'][Cr]. */
- for (i = 0; i < width / 2; i++) {
- dest[(i * 6) + 0] = src[(i * 4) + 0];
- dest[(i * 6) + 1] = src[(i * 4) + 1];
- dest[(i * 6) + 2] = src[(i * 4) + 3];
-
- dest[(i * 6) + 3] = src[(i * 4) + 2];
- if (i > (5 * 2) && i < ((width / 2) - (6 * 2))) {
- dest[(i * 6) + 4] =
- clip255 ((((80 * (src[(i * 4) + 1] + src[(i * 4) + 5]))
- - (24 * (src[(i * 4) - 3] + src[(i * 4) + 9]))
- + (12 * (src[(i * 4) - 7] + src[(i * 4) + 13]))
- - (6 * (src[(i * 4) - 11] + src[(i * 4) + 17]))
- + (3 * (src[(i * 4) - 15] + src[(i * 4) + 21]))
- - ((src[(i * 4) - 19] + src[(i * 4) + 25]))) + 64) >> 7);
- dest[(i * 6) + 5] =
- clip255 ((((80 * (src[(i * 4) + 3] + src[(i * 4) + 7]))
- - (24 * (src[(i * 4) - 1] + src[(i * 4) + 11]))
- + (12 * (src[(i * 4) - 5] + src[(i * 4) + 15]))
- - (6 * (src[(i * 4) - 9] + src[(i * 4) + 19]))
- + (3 * (src[(i * 4) - 13] + src[(i * 4) + 23]))
- - ((src[(i * 4) - 17] + src[(i * 4) + 27]))) + 64) >> 7);
- } else if (i < ((width / 2) - 1)) {
- dest[(i * 6) + 4] = (src[(i * 4) + 1] + src[(i * 4) + 5] + 1) >> 1;
- dest[(i * 6) + 5] = (src[(i * 4) + 3] + src[(i * 4) + 7] + 1) >> 1;
- } else {
- dest[(i * 6) + 4] = src[(i * 4) + 1];
- dest[(i * 6) + 5] = src[(i * 4) + 3];
- }
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-vfilter_chroma_121_packed422_scanline_mmx (uint8_t * output, int width,
- uint8_t * m, uint8_t * t, uint8_t * b)
-{
- int i;
- const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
- const mmx_t cmask = { 0xff00ff00ff00ff00ULL };
-
- // Get width in bytes.
- width *= 2;
- i = width / 8;
- width -= i * 8;
-
- movq_m2r (ymask, mm7);
- movq_m2r (cmask, mm6);
-
- while (i--) {
- movq_m2r (*t, mm0);
- movq_m2r (*b, mm1);
- movq_m2r (*m, mm2);
-
- movq_r2r (mm2, mm3);
- pand_r2r (mm7, mm3);
-
- pand_r2r (mm6, mm0);
- pand_r2r (mm6, mm1);
- pand_r2r (mm6, mm2);
-
- psrlq_i2r (8, mm0);
- psrlq_i2r (8, mm1);
- psrlq_i2r (7, mm2);
-
- paddw_r2r (mm0, mm2);
- paddw_r2r (mm1, mm2);
-
- psllw_i2r (6, mm2);
- pand_r2r (mm6, mm2);
-
- por_r2r (mm3, mm2);
-
- movq_r2m (mm2, *output);
- output += 8;
- t += 8;
- b += 8;
- m += 8;
- }
- output++;
- t++;
- b++;
- m++;
- while (width--) {
- *output = (*t + *b + (*m << 1)) >> 2;
- output += 2;
- t += 2;
- b += 2;
- m += 2;
- }
-
- emms ();
-}
-#endif
-
-static void
-vfilter_chroma_121_packed422_scanline_c (uint8_t * output, int width,
- uint8_t * m, uint8_t * t, uint8_t * b)
-{
- output++;
- t++;
- b++;
- m++;
- while (width--) {
- *output = (*t + *b + (*m << 1)) >> 2;
- output += 2;
- t += 2;
- b += 2;
- m += 2;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-vfilter_chroma_332_packed422_scanline_mmx (uint8_t * output, int width,
- uint8_t * m, uint8_t * t, uint8_t * b)
-{
- int i;
- const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
- const mmx_t cmask = { 0xff00ff00ff00ff00ULL };
-
- // Get width in bytes.
- width *= 2;
- i = width / 8;
- width -= i * 8;
-
- movq_m2r (ymask, mm7);
- movq_m2r (cmask, mm6);
-
- while (i--) {
- movq_m2r (*t, mm0);
- movq_m2r (*b, mm1);
- movq_m2r (*m, mm2);
-
- movq_r2r (mm2, mm3);
- pand_r2r (mm7, mm3);
-
- pand_r2r (mm6, mm0);
- pand_r2r (mm6, mm1);
- pand_r2r (mm6, mm2);
-
- psrlq_i2r (8, mm0);
- psrlq_i2r (7, mm1);
- psrlq_i2r (8, mm2);
-
- movq_r2r (mm0, mm4);
- psllw_i2r (1, mm4);
- paddw_r2r (mm4, mm0);
-
- movq_r2r (mm2, mm4);
- psllw_i2r (1, mm4);
- paddw_r2r (mm4, mm2);
-
- paddw_r2r (mm0, mm2);
- paddw_r2r (mm1, mm2);
-
- psllw_i2r (5, mm2);
- pand_r2r (mm6, mm2);
-
- por_r2r (mm3, mm2);
-
- movq_r2m (mm2, *output);
- output += 8;
- t += 8;
- b += 8;
- m += 8;
- }
- output++;
- t++;
- b++;
- m++;
- while (width--) {
- *output = (3 * *t + 3 * *m + 2 * *b) >> 3;
- output += 2;
- t += 2;
- b += 2;
- m += 2;
- }
-
- emms ();
-}
-#endif
-
-static void
-vfilter_chroma_332_packed422_scanline_c (uint8_t * output, int width,
- uint8_t * m, uint8_t * t, uint8_t * b)
-{
- output++;
- t++;
- b++;
- m++;
- while (width--) {
- *output = (3 * *t + 3 * *m + 2 * *b) >> 3;
- output += 2;
- t += 2;
- b += 2;
- m += 2;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-kill_chroma_packed422_inplace_scanline_mmx (uint8_t * data, int width)
-{
- const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
- const mmx_t nullchroma = { 0x8000800080008000ULL };
-
- movq_m2r (ymask, mm7);
- movq_m2r (nullchroma, mm6);
- for (; width > 4; width -= 4) {
- movq_m2r (*data, mm0);
- pand_r2r (mm7, mm0);
- paddb_r2r (mm6, mm0);
- movq_r2m (mm0, *data);
- data += 8;
- }
- emms ();
-
- while (width--) {
- data[1] = 128;
- data += 2;
- }
-}
-#endif
-
-static void
-kill_chroma_packed422_inplace_scanline_c (uint8_t * data, int width)
-{
- while (width--) {
- data[1] = 128;
- data += 2;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-invert_colour_packed422_inplace_scanline_mmx (uint8_t * data, int width)
-{
- const mmx_t allones = { 0xffffffffffffffffULL };
-
- movq_m2r (allones, mm1);
- for (; width > 4; width -= 4) {
- movq_r2r (mm1, mm2);
- movq_m2r (*data, mm0);
- psubb_r2r (mm0, mm2);
- movq_r2m (mm2, *data);
- data += 8;
- }
- emms ();
-
- width *= 2;
- while (width--) {
- *data = 255 - *data;
- data++;
- }
-}
-#endif
-
-static void
-invert_colour_packed422_inplace_scanline_c (uint8_t * data, int width)
-{
- width *= 2;
- while (width--) {
- *data = 255 - *data;
- data++;
- }
-}
-
-static void
-mirror_packed422_inplace_scanline_c (uint8_t * data, int width)
-{
- int x, tmp1, tmp2;
-
- int width2 = width * 2;
-
- for (x = 0; x < width; x += 2) {
- tmp1 = data[x];
- tmp2 = data[x + 1];
- data[x] = data[width2 - x];
- data[x + 1] = data[width2 - x + 1];
- data[width2 - x] = tmp1;
- data[width2 - x + 1] = tmp2;
- }
-}
-
-static void
-interpolate_packed422_scanline_c (uint8_t * output, uint8_t * top,
- uint8_t * bot, int width)
-{
- int i;
-
- for (i = width * 2; i; --i) {
- *output++ = ((*top++) + (*bot++)) >> 1;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-convert_uyvy_to_yuyv_scanline_mmx (uint8_t * uyvy_buf, uint8_t * yuyv_buf,
- int width)
-{
-#if defined(HAVE_CPU_I386) && !defined(HAVE_CPU_X86_64)
- __asm__ __volatile__ (" movl %0, %%esi \n"
- " movl %1, %%edi \n"
- " movl %2, %%edx \n" " shrl $3, %%edx \n"
- /* Process 8 pixels at once */
- "1: movq (%%esi), %%mm0 \n" /* mm0 = Y3V2Y2U2Y1V0Y0U0 */
- " movq 8(%%esi), %%mm2 \n" /* mm2 = Y7V6Y6U6Y5V4Y4U4 */
- " movq %%mm0, %%mm1 \n" /* mm1 = Y3V2Y2U2Y1V0Y0U0 */
- " movq %%mm2, %%mm3 \n" /* mm3 = Y7V6Y6U6Y5V4Y4U4 */
- " psllw $8, %%mm0 \n" /* mm0 = V2__U2__V0__U0__ */
- " psrlw $8, %%mm1 \n" /* mm1 = __Y3__Y2__Y1__Y0 */
- " psllw $8, %%mm2 \n" /* mm2 = V6__U6__V4__U4__ */
- " psrlw $8, %%mm3 \n" /* mm3 = __Y7__Y6__Y5__Y4 */
- " por %%mm1, %%mm0 \n" /* mm0 = V2Y3U2Y2V0Y1U0Y0 */
- " por %%mm3, %%mm2 \n" /* mm2 = V6Y7U6Y6V4Y5U4Y4 */
- " movq %%mm0, (%%edi) \n"
- " movq %%mm2, 8(%%edi) \n"
- " addl $16, %%esi \n"
- " addl $16, %%edi \n"
- " decl %%edx \n"
- " jnz 1b \n" " emms \n"
- /* output */ :
- /* input */ :"g" (uyvy_buf), "g" (yuyv_buf), "g" (width)
- /* clobber registers */
- :"cc", "edx", "esi", "edi");
-#endif
-#ifdef HAVE_CPU_X86_64
- __asm__ __volatile__ (" movq %0, %%rsi \n"
- " movq %1, %%rdi \n"
- " xorq %%rdx, %%rdx \n"
- " movl %2, %%edx \n" " shrq $3, %%rdx \n"
- /* Process 8 pixels at once */
- "1: movq (%%rsi), %%mm0 \n" /* mm0 = Y3V2Y2U2Y1V0Y0U0 */
- " movq 8(%%rsi), %%mm2 \n" /* mm2 = Y7V6Y6U6Y5V4Y4U4 */
- " movq %%mm0, %%mm1 \n" /* mm1 = Y3V2Y2U2Y1V0Y0U0 */
- " movq %%mm2, %%mm3 \n" /* mm3 = Y7V6Y6U6Y5V4Y4U4 */
- " psllw $8, %%mm0 \n" /* mm0 = V2__U2__V0__U0__ */
- " psrlw $8, %%mm1 \n" /* mm1 = __Y3__Y2__Y1__Y0 */
- " psllw $8, %%mm2 \n" /* mm2 = V6__U6__V4__U4__ */
- " psrlw $8, %%mm3 \n" /* mm3 = __Y7__Y6__Y5__Y4 */
- " por %%mm1, %%mm0 \n" /* mm0 = V2Y3U2Y2V0Y1U0Y0 */
- " por %%mm3, %%mm2 \n" /* mm2 = V6Y7U6Y6V4Y5U4Y4 */
- " movq %%mm0, (%%rdi) \n"
- " movq %%mm2, 8(%%rdi) \n"
- " addq $16, %%rsi \n"
- " addq $16, %%rdi \n"
- " decq %%rdx \n"
- " jnz 1b \n" " emms \n"
- /* output */ :
- /* input */ :"g" (uyvy_buf), "g" (yuyv_buf), "g" (width)
- /* clobber registers */
- :"cc", "rdx", "rsi", "rdi");
-#endif
- if (width & 7) {
- uint32_t *uyvy = (uint32_t *) uyvy_buf;
-
- uint32_t *yuyv = (uint32_t *) yuyv_buf;
-
- uint32_t val;
-
- width &= 7;
- width >>= 1;
- while (width--) {
- val = *uyvy++;
- val = ((val << 8) & ~0x00FF0000) | ((val >> 8) & ~0x0000FF00);
- *yuyv++ = val;
- }
- }
-}
-#endif
-
-static void
-convert_uyvy_to_yuyv_scanline_c (uint8_t * uyvy_buf, uint8_t * yuyv_buf,
- int width)
-{
- uint32_t *uyvy = (uint32_t *) uyvy_buf;
-
- uint32_t *yuyv = (uint32_t *) yuyv_buf;
-
- uint32_t val;
-
- width >>= 1;
- while (width--) {
- val = *uyvy++;
- val = ((val << 8) & ~0x00FF0000) | ((val >> 8) & ~0x0000FF00);
- *yuyv++ = val;
- }
-}
-
-
-#ifdef HAVE_CPU_I386
-static void
-interpolate_packed422_scanline_mmx (uint8_t * output, uint8_t * top,
- uint8_t * bot, int width)
-{
- const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */
- int i;
-
- for (i = width / 16; i; --i) {
- movq_m2r (*bot, mm0);
- movq_m2r (*top, mm1);
- movq_m2r (*(bot + 8), mm2);
- movq_m2r (*(top + 8), mm3);
- movq_m2r (*(bot + 16), mm4);
- movq_m2r (*(top + 16), mm5);
- movq_m2r (*(bot + 24), mm6);
- movq_m2r (*(top + 24), mm7);
- pand_m2r (shiftmask, mm0);
- pand_m2r (shiftmask, mm1);
- pand_m2r (shiftmask, mm2);
- pand_m2r (shiftmask, mm3);
- pand_m2r (shiftmask, mm4);
- pand_m2r (shiftmask, mm5);
- pand_m2r (shiftmask, mm6);
- pand_m2r (shiftmask, mm7);
- psrlw_i2r (1, mm0);
- psrlw_i2r (1, mm1);
- psrlw_i2r (1, mm2);
- psrlw_i2r (1, mm3);
- psrlw_i2r (1, mm4);
- psrlw_i2r (1, mm5);
- psrlw_i2r (1, mm6);
- psrlw_i2r (1, mm7);
- paddb_r2r (mm1, mm0);
- paddb_r2r (mm3, mm2);
- paddb_r2r (mm5, mm4);
- paddb_r2r (mm7, mm6);
- movq_r2m (mm0, *output);
- movq_r2m (mm2, *(output + 8));
- movq_r2m (mm4, *(output + 16));
- movq_r2m (mm6, *(output + 24));
- output += 32;
- top += 32;
- bot += 32;
- }
- width = (width & 0xf);
-
- for (i = width / 4; i; --i) {
- movq_m2r (*bot, mm0);
- movq_m2r (*top, mm1);
- pand_m2r (shiftmask, mm0);
- pand_m2r (shiftmask, mm1);
- psrlw_i2r (1, mm0);
- psrlw_i2r (1, mm1);
- paddb_r2r (mm1, mm0);
- movq_r2m (mm0, *output);
- output += 8;
- top += 8;
- bot += 8;
- }
- width = width & 0x7;
-
- /* Handle last few pixels. */
- for (i = width * 2; i; --i) {
- *output++ = ((*top++) + (*bot++)) >> 1;
- }
-
- emms ();
-}
-#endif
-
-#ifdef HAVE_CPU_I386
-static void
-interpolate_packed422_scanline_mmxext (uint8_t * output, uint8_t * top,
- uint8_t * bot, int width)
-{
- int i;
-
- for (i = width / 16; i; --i) {
- movq_m2r (*bot, mm0);
- movq_m2r (*top, mm1);
- movq_m2r (*(bot + 8), mm2);
- movq_m2r (*(top + 8), mm3);
- movq_m2r (*(bot + 16), mm4);
- movq_m2r (*(top + 16), mm5);
- movq_m2r (*(bot + 24), mm6);
- movq_m2r (*(top + 24), mm7);
- pavgb_r2r (mm1, mm0);
- pavgb_r2r (mm3, mm2);
- pavgb_r2r (mm5, mm4);
- pavgb_r2r (mm7, mm6);
- movntq_r2m (mm0, *output);
- movntq_r2m (mm2, *(output + 8));
- movntq_r2m (mm4, *(output + 16));
- movntq_r2m (mm6, *(output + 24));
- output += 32;
- top += 32;
- bot += 32;
- }
- width = (width & 0xf);
-
- for (i = width / 4; i; --i) {
- movq_m2r (*bot, mm0);
- movq_m2r (*top, mm1);
- pavgb_r2r (mm1, mm0);
- movntq_r2m (mm0, *output);
- output += 8;
- top += 8;
- bot += 8;
- }
- width = width & 0x7;
-
- /* Handle last few pixels. */
- for (i = width * 2; i; --i) {
- *output++ = ((*top++) + (*bot++)) >> 1;
- }
-
- sfence ();
- emms ();
-}
-#endif
-
-static void
-blit_colour_packed422_scanline_c (uint8_t * output, int width, int y, int cb,
- int cr)
-{
- uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
-
- uint32_t *o = (uint32_t *) output;
-
- for (width /= 2; width; --width) {
- *o++ = colour;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-blit_colour_packed422_scanline_mmx (uint8_t * output, int width, int y, int cb,
- int cr)
-{
- uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
-
- int i;
-
- movd_m2r (colour, mm1);
- movd_m2r (colour, mm2);
- psllq_i2r (32, mm1);
- por_r2r (mm1, mm2);
-
- for (i = width / 16; i; --i) {
- movq_r2m (mm2, *output);
- movq_r2m (mm2, *(output + 8));
- movq_r2m (mm2, *(output + 16));
- movq_r2m (mm2, *(output + 24));
- output += 32;
- }
- width = (width & 0xf);
-
- for (i = width / 4; i; --i) {
- movq_r2m (mm2, *output);
- output += 8;
- }
- width = (width & 0x7);
-
- for (i = width / 2; i; --i) {
- *((uint32_t *) output) = colour;
- output += 4;
- }
-
- if (width & 1) {
- *output = y;
- *(output + 1) = cb;
- }
-
- emms ();
-}
-#endif
-
-#ifdef HAVE_CPU_I386
-static void
-blit_colour_packed422_scanline_mmxext (uint8_t * output, int width, int y,
- int cb, int cr)
-{
- uint32_t colour = cr << 24 | y << 16 | cb << 8 | y;
-
- int i;
-
- movd_m2r (colour, mm1);
- movd_m2r (colour, mm2);
- psllq_i2r (32, mm1);
- por_r2r (mm1, mm2);
-
- for (i = width / 16; i; --i) {
- movntq_r2m (mm2, *output);
- movntq_r2m (mm2, *(output + 8));
- movntq_r2m (mm2, *(output + 16));
- movntq_r2m (mm2, *(output + 24));
- output += 32;
- }
- width = (width & 0xf);
-
- for (i = width / 4; i; --i) {
- movntq_r2m (mm2, *output);
- output += 8;
- }
- width = (width & 0x7);
-
- for (i = width / 2; i; --i) {
- *((uint32_t *) output) = colour;
- output += 4;
- }
-
- if (width & 1) {
- *output = y;
- *(output + 1) = cb;
- }
-
- sfence ();
- emms ();
-}
-#endif
-
-static void
-blit_colour_packed4444_scanline_c (uint8_t * output, int width,
- int alpha, int luma, int cb, int cr)
-{
- int j;
-
- for (j = 0; j < width; j++) {
- *output++ = alpha;
- *output++ = luma;
- *output++ = cb;
- *output++ = cr;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-blit_colour_packed4444_scanline_mmx (uint8_t * output, int width,
- int alpha, int luma, int cb, int cr)
-{
- uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha;
-
- int i;
-
- movd_m2r (colour, mm1);
- movd_m2r (colour, mm2);
- psllq_i2r (32, mm1);
- por_r2r (mm1, mm2);
-
- for (i = width / 8; i; --i) {
- movq_r2m (mm2, *output);
- movq_r2m (mm2, *(output + 8));
- movq_r2m (mm2, *(output + 16));
- movq_r2m (mm2, *(output + 24));
- output += 32;
- }
- width = (width & 0x7);
-
- for (i = width / 2; i; --i) {
- movq_r2m (mm2, *output);
- output += 8;
- }
- width = (width & 0x1);
-
- if (width) {
- *((uint32_t *) output) = colour;
- output += 4;
- }
-
- emms ();
-}
-#endif
-
-#ifdef HAVE_CPU_I386
-static void
-blit_colour_packed4444_scanline_mmxext (uint8_t * output, int width,
- int alpha, int luma, int cb, int cr)
-{
- uint32_t colour = (cr << 24) | (cb << 16) | (luma << 8) | alpha;
-
- int i;
-
- movd_m2r (colour, mm1);
- movd_m2r (colour, mm2);
- psllq_i2r (32, mm1);
- por_r2r (mm1, mm2);
-
- for (i = width / 8; i; --i) {
- movntq_r2m (mm2, *output);
- movntq_r2m (mm2, *(output + 8));
- movntq_r2m (mm2, *(output + 16));
- movntq_r2m (mm2, *(output + 24));
- output += 32;
- }
- width = (width & 0x7);
-
- for (i = width / 2; i; --i) {
- movntq_r2m (mm2, *output);
- output += 8;
- }
- width = (width & 0x1);
-
- if (width) {
- *((uint32_t *) output) = colour;
- output += 4;
- }
-
- sfence ();
- emms ();
-}
-#endif
-
-
-/*
- * Some memcpy code inspired by the xine code which originally came
- * from mplayer.
- */
-
-/* linux kernel __memcpy (from: /include/asm/string.h) */
-#ifdef HAVE_CPU_I386
-static inline __attribute__ ((always_inline, const))
- void small_memcpy (void *to, const void *from, size_t n)
-{
- int d0, d1, d2;
-
- __asm__ __volatile__ ("rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t" "movsb\n" "2:":"=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n / 4), "q" (n), "1" ((long) to), "2" ((long) from)
- :"memory");
-}
-#endif
-
-static void
-speedy_memcpy_c (void *dest, const void *src, size_t n)
-{
- if (dest != src) {
- memcpy (dest, src, n);
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-speedy_memcpy_mmx (void *d, const void *s, size_t n)
-{
- const uint8_t *src = s;
-
- uint8_t *dest = d;
-
- if (dest != src) {
- while (n > 64) {
- movq_m2r (src[0], mm0);
- movq_m2r (src[8], mm1);
- movq_m2r (src[16], mm2);
- movq_m2r (src[24], mm3);
- movq_m2r (src[32], mm4);
- movq_m2r (src[40], mm5);
- movq_m2r (src[48], mm6);
- movq_m2r (src[56], mm7);
- movq_r2m (mm0, dest[0]);
- movq_r2m (mm1, dest[8]);
- movq_r2m (mm2, dest[16]);
- movq_r2m (mm3, dest[24]);
- movq_r2m (mm4, dest[32]);
- movq_r2m (mm5, dest[40]);
- movq_r2m (mm6, dest[48]);
- movq_r2m (mm7, dest[56]);
- dest += 64;
- src += 64;
- n -= 64;
- }
-
- while (n > 8) {
- movq_m2r (src[0], mm0);
- movq_r2m (mm0, dest[0]);
- dest += 8;
- src += 8;
- n -= 8;
- }
-
- if (n)
- small_memcpy (dest, src, n);
-
- emms ();
- }
-}
-#endif
-
-#ifdef HAVE_CPU_I386
-static void
-speedy_memcpy_mmxext (void *d, const void *s, size_t n)
-{
- const uint8_t *src = s;
-
- uint8_t *dest = d;
-
- if (dest != src) {
- while (n > 64) {
- movq_m2r (src[0], mm0);
- movq_m2r (src[8], mm1);
- movq_m2r (src[16], mm2);
- movq_m2r (src[24], mm3);
- movq_m2r (src[32], mm4);
- movq_m2r (src[40], mm5);
- movq_m2r (src[48], mm6);
- movq_m2r (src[56], mm7);
- movntq_r2m (mm0, dest[0]);
- movntq_r2m (mm1, dest[8]);
- movntq_r2m (mm2, dest[16]);
- movntq_r2m (mm3, dest[24]);
- movntq_r2m (mm4, dest[32]);
- movntq_r2m (mm5, dest[40]);
- movntq_r2m (mm6, dest[48]);
- movntq_r2m (mm7, dest[56]);
- dest += 64;
- src += 64;
- n -= 64;
- }
-
- while (n > 8) {
- movq_m2r (src[0], mm0);
- movntq_r2m (mm0, dest[0]);
- dest += 8;
- src += 8;
- n -= 8;
- }
-
- if (n)
- small_memcpy (dest, src, n);
-
- sfence ();
- emms ();
- }
-}
-#endif
-
-static void
-blit_packed422_scanline_c (uint8_t * dest, const uint8_t * src, int width)
-{
- speedy_memcpy_c (dest, src, width * 2);
-}
-
-#ifdef HAVE_CPU_I386
-static void
-blit_packed422_scanline_mmx (uint8_t * dest, const uint8_t * src, int width)
-{
- speedy_memcpy_mmx (dest, src, width * 2);
-}
-#endif
-
-#ifdef HAVE_CPU_I386
-static void
-blit_packed422_scanline_mmxext (uint8_t * dest, const uint8_t * src, int width)
-{
- speedy_memcpy_mmxext (dest, src, width * 2);
-}
-#endif
-
-static void
-composite_colour4444_alpha_to_packed422_scanline_c (uint8_t * output,
- uint8_t * input, int af, int y, int cb, int cr, int width, int alpha)
-{
- int a = ((af * alpha) + 0x80) >> 8;
-
- if (a == 0xff) {
- blit_colour_packed422_scanline (output, width, y, cb, cr);
- } else if (a) {
- int i;
-
- for (i = 0; i < width; i++) {
- /*
- * (1 - alpha)*B + alpha*F
- * (1 - af*a)*B + af*a*F
- * B - af*a*B + af*a*F
- * B + a*(af*F - af*B)
- */
-
- output[0] =
- input[0] + ((alpha * (y - multiply_alpha (af,
- input[0])) + 0x80) >> 8);
-
- if ((i & 1) == 0) {
-
- /*
- * At first I thought I was doing this incorrectly, but
- * the following math has convinced me otherwise.
- *
- * C_r = (1 - alpha)*B + alpha*F
- * C_r = B - af*a*B + af*a*F
- *
- * C_r = 128 + ((1 - af*a)*(B - 128) + a*af*(F - 128))
- * C_r = 128 + (B - af*a*B - 128 + af*a*128 + a*af*F - a*af*128)
- * C_r = B - af*a*B + a*af*F
- */
-
- output[1] =
- input[1] + ((alpha * (cb - multiply_alpha (af,
- input[1])) + 0x80) >> 8);
- output[3] =
- input[3] + ((alpha * (cr - multiply_alpha (af,
- input[3])) + 0x80) >> 8);
- }
- output += 2;
- input += 2;
- }
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-composite_colour4444_alpha_to_packed422_scanline_mmxext (uint8_t * output,
- uint8_t * input, int af, int y, int cb, int cr, int width, int alpha)
-{
- const mmx_t alpha2 = { 0x0000FFFF00000000ULL };
- const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL };
- const mmx_t round = { 0x0080008000800080ULL };
- mmx_t foreground;
-
- int i;
-
- if (!alpha) {
- blit_packed422_scanline (output, input, width);
- return;
- }
-
- foreground.ub[0] = foreground.ub[4] = af;
- foreground.ub[1] = foreground.ub[5] = y;
- foreground.ub[2] = foreground.ub[6] = cb;
- foreground.ub[3] = foreground.ub[7] = cr;
-
- movq_m2r (alpha, mm2);
- pshufw_r2r (mm2, mm2, 0);
- pxor_r2r (mm7, mm7);
-
- for (i = width / 2; i; i--) {
- /* mm1 = [ cr ][ y ][ cb ][ y ] */
- movd_m2r (*input, mm1);
- punpcklbw_r2r (mm7, mm1);
-
- movq_m2r (foreground, mm3);
- movq_r2r (mm3, mm4);
- punpcklbw_r2r (mm7, mm3);
- punpckhbw_r2r (mm7, mm4);
- /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */
-
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 0 a ][ 0 a ][ 0 a ] */
- pshufw_r2r (mm3, mm5, 0);
- pshufw_r2r (mm4, mm6, 0);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */
- pshufw_r2r (mm3, mm3, 201);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */
- pshufw_r2r (mm4, mm4, 16);
-
- pand_m2r (alpha1, mm3);
- pand_m2r (alpha2, mm4);
- pand_m2r (alpha1, mm5);
- pand_m2r (alpha2, mm6);
- por_r2r (mm4, mm3);
- por_r2r (mm6, mm5);
-
- /* now, mm5 is af and mm1 is B. Need to multiply them. */
- pmullw_r2r (mm1, mm5);
-
- /* Multiply by appalpha. */
- pmullw_r2r (mm2, mm3);
- paddw_m2r (round, mm3);
- psrlw_i2r (8, mm3);
- /* Result is now B + F. */
- paddw_r2r (mm3, mm1);
-
- /* Round up appropriately. */
- paddw_m2r (round, mm5);
-
- /* mm6 contains our i>>8; */
- movq_r2r (mm5, mm6);
- psrlw_i2r (8, mm6);
-
- /* Add mm6 back into mm5. Now our result is in the high bytes. */
- paddw_r2r (mm6, mm5);
-
- /* Shift down. */
- psrlw_i2r (8, mm5);
-
- /* Multiply by appalpha. */
- pmullw_r2r (mm2, mm5);
- paddw_m2r (round, mm5);
- psrlw_i2r (8, mm5);
-
- psubusw_r2r (mm5, mm1);
-
- /* mm1 = [ B + F - af*B ] */
- packuswb_r2r (mm1, mm1);
- movd_r2m (mm1, *output);
-
- output += 4;
- input += 4;
- }
- sfence ();
- emms ();
-}
-#endif
-
-
-static void
-composite_packed4444_alpha_to_packed422_scanline_c (uint8_t * output,
- uint8_t * input, uint8_t * foreground, int width, int alpha)
-{
- int i;
-
- for (i = 0; i < width; i++) {
- int af = foreground[0];
-
- if (af) {
- int a = ((af * alpha) + 0x80) >> 8;
-
-
- if (a == 0xff) {
- output[0] = foreground[1];
-
- if ((i & 1) == 0) {
- output[1] = foreground[2];
- output[3] = foreground[3];
- }
- } else if (a) {
- /*
- * (1 - alpha)*B + alpha*F
- * (1 - af*a)*B + af*a*F
- * B - af*a*B + af*a*F
- * B + a*(af*F - af*B)
- */
-
- output[0] = input[0]
- + ((alpha * (foreground[1]
- - multiply_alpha (foreground[0], input[0])) + 0x80) >> 8);
-
- if ((i & 1) == 0) {
-
- /*
- * At first I thought I was doing this incorrectly, but
- * the following math has convinced me otherwise.
- *
- * C_r = (1 - alpha)*B + alpha*F
- * C_r = B - af*a*B + af*a*F
- *
- * C_r = 128 + ((1 - af*a)*(B - 128) + a*af*(F - 128))
- * C_r = 128 + (B - af*a*B - 128 + af*a*128 + a*af*F - a*af*128)
- * C_r = B - af*a*B + a*af*F
- */
-
- output[1] = input[1] + ((alpha * (foreground[2]
- - multiply_alpha (foreground[0], input[1])) + 0x80) >> 8);
- output[3] = input[3] + ((alpha * (foreground[3]
- - multiply_alpha (foreground[0], input[3])) + 0x80) >> 8);
- }
- }
- }
- foreground += 4;
- output += 2;
- input += 2;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-composite_packed4444_alpha_to_packed422_scanline_mmxext (uint8_t * output,
- uint8_t * input, uint8_t * foreground, int width, int alpha)
-{
- const mmx_t alpha2 = { 0x0000FFFF00000000ULL };
- const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL };
- const mmx_t round = { 0x0080008000800080ULL };
- int i;
-
- if (!alpha) {
- blit_packed422_scanline (output, input, width);
- return;
- }
-
- if (alpha == 256) {
- composite_packed4444_to_packed422_scanline (output, input, foreground,
- width);
- return;
- }
-
- READ_PREFETCH_2048 (input);
- READ_PREFETCH_2048 (foreground);
-
- movq_m2r (alpha, mm2);
- pshufw_r2r (mm2, mm2, 0);
- pxor_r2r (mm7, mm7);
-
- for (i = width / 2; i; i--) {
- int fg1 = *((uint32_t *) foreground);
-
- int fg2 = *(((uint32_t *) foreground) + 1);
-
- if (fg1 || fg2) {
- /* mm1 = [ cr ][ y ][ cb ][ y ] */
- movd_m2r (*input, mm1);
- punpcklbw_r2r (mm7, mm1);
-
- movq_m2r (*foreground, mm3);
- movq_r2r (mm3, mm4);
- punpcklbw_r2r (mm7, mm3);
- punpckhbw_r2r (mm7, mm4);
- /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */
-
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 0 a ][ 0 a ][ 0 a ] */
- pshufw_r2r (mm3, mm5, 0);
- pshufw_r2r (mm4, mm6, 0);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */
- pshufw_r2r (mm3, mm3, 201);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */
- pshufw_r2r (mm4, mm4, 16);
-
- pand_m2r (alpha1, mm3);
- pand_m2r (alpha2, mm4);
- pand_m2r (alpha1, mm5);
- pand_m2r (alpha2, mm6);
- por_r2r (mm4, mm3);
- por_r2r (mm6, mm5);
-
- /* now, mm5 is af and mm1 is B. Need to multiply them. */
- pmullw_r2r (mm1, mm5);
-
- /* Multiply by appalpha. */
- pmullw_r2r (mm2, mm3);
- paddw_m2r (round, mm3);
- psrlw_i2r (8, mm3);
- /* Result is now B + F. */
- paddw_r2r (mm3, mm1);
-
- /* Round up appropriately. */
- paddw_m2r (round, mm5);
-
- /* mm6 contains our i>>8; */
- movq_r2r (mm5, mm6);
- psrlw_i2r (8, mm6);
-
- /* Add mm6 back into mm5. Now our result is in the high bytes. */
- paddw_r2r (mm6, mm5);
-
- /* Shift down. */
- psrlw_i2r (8, mm5);
-
- /* Multiply by appalpha. */
- pmullw_r2r (mm2, mm5);
- paddw_m2r (round, mm5);
- psrlw_i2r (8, mm5);
-
- psubusw_r2r (mm5, mm1);
-
- /* mm1 = [ B + F - af*B ] */
- packuswb_r2r (mm1, mm1);
- movd_r2m (mm1, *output);
- }
-
- foreground += 8;
- output += 4;
- input += 4;
- }
- sfence ();
- emms ();
-}
-#endif
-
-static void
-composite_packed4444_to_packed422_scanline_c (uint8_t * output, uint8_t * input,
- uint8_t * foreground, int width)
-{
- int i;
-
- for (i = 0; i < width; i++) {
- int a = foreground[0];
-
- if (a == 0xff) {
- output[0] = foreground[1];
-
- if ((i & 1) == 0) {
- output[1] = foreground[2];
- output[3] = foreground[3];
- }
- } else if (a) {
- /*
- * (1 - alpha)*B + alpha*F
- * B + af*F - af*B
- */
-
- output[0] =
- input[0] + foreground[1] - multiply_alpha (foreground[0], input[0]);
-
- if ((i & 1) == 0) {
-
- /*
- * C_r = (1 - af)*B + af*F
- * C_r = B - af*B + af*F
- */
-
- output[1] =
- input[1] + foreground[2] - multiply_alpha (foreground[0], input[1]);
- output[3] =
- input[3] + foreground[3] - multiply_alpha (foreground[0], input[3]);
- }
- }
- foreground += 4;
- output += 2;
- input += 2;
- }
-}
-
-
-#ifdef HAVE_CPU_I386
-static void
-composite_packed4444_to_packed422_scanline_mmxext (uint8_t * output,
- uint8_t * input, uint8_t * foreground, int width)
-{
- const mmx_t alpha2 = { 0x0000FFFF00000000ULL };
- const mmx_t alpha1 = { 0xFFFF0000FFFFFFFFULL };
- const mmx_t round = { 0x0080008000800080ULL };
- int i;
-
- READ_PREFETCH_2048 (input);
- READ_PREFETCH_2048 (foreground);
-
- pxor_r2r (mm7, mm7);
- for (i = width / 2; i; i--) {
- int fg1 = *((uint32_t *) foreground);
-
- int fg2 = *(((uint32_t *) foreground) + 1);
-
- if ((fg1 & 0xff) == 0xff && (fg2 & 0xff) == 0xff) {
- movq_m2r (*foreground, mm3);
- movq_r2r (mm3, mm4);
- punpcklbw_r2r (mm7, mm3);
- punpckhbw_r2r (mm7, mm4);
- /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */
- pshufw_r2r (mm3, mm3, 201);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */
- pshufw_r2r (mm4, mm4, 16);
- pand_m2r (alpha1, mm3);
- pand_m2r (alpha2, mm4);
- por_r2r (mm4, mm3);
- /* mm1 = [ B + F - af*B ] */
- packuswb_r2r (mm3, mm3);
- movd_r2m (mm3, *output);
- } else if (fg1 || fg2) {
-
- /* mm1 = [ cr ][ y ][ cb ][ y ] */
- movd_m2r (*input, mm1);
- punpcklbw_r2r (mm7, mm1);
-
- movq_m2r (*foreground, mm3);
- movq_r2r (mm3, mm4);
- punpcklbw_r2r (mm7, mm3);
- punpckhbw_r2r (mm7, mm4);
- /* mm3 and mm4 will be the appropriate colours, mm5 and mm6 for alpha. */
-
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 0 a ][ 0 a ][ 0 a ] */
- pshufw_r2r (mm3, mm5, 0);
- pshufw_r2r (mm4, mm6, 0);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 3 cr ][ 0 a ][ 2 cb ][ 1 y ] == 11001000 == 201 */
- pshufw_r2r (mm3, mm3, 201);
- /* [ 3 cr ][ 2 cb ][ 1 y ][ 0 a ] -> [ 0 a ][ 1 y ][ 0 a ][ 0 a ] == 00010000 == 16 */
- pshufw_r2r (mm4, mm4, 16);
-
- pand_m2r (alpha1, mm3);
- pand_m2r (alpha2, mm4);
- pand_m2r (alpha1, mm5);
- pand_m2r (alpha2, mm6);
- por_r2r (mm4, mm3);
- por_r2r (mm6, mm5);
-
- /* now, mm5 is af and mm1 is B. Need to multiply them. */
- pmullw_r2r (mm1, mm5);
-
- /* Result is now B + F. */
- paddw_r2r (mm3, mm1);
-
- /* Round up appropriately. */
- paddw_m2r (round, mm5);
-
- /* mm6 contains our i>>8; */
- movq_r2r (mm5, mm6);
- psrlw_i2r (8, mm6);
-
- /* Add mm6 back into mm5. Now our result is in the high bytes. */
- paddw_r2r (mm6, mm5);
-
- /* Shift down. */
- psrlw_i2r (8, mm5);
-
- psubusw_r2r (mm5, mm1);
-
- /* mm1 = [ B + F - af*B ] */
- packuswb_r2r (mm1, mm1);
- movd_r2m (mm1, *output);
- }
-
- foreground += 8;
- output += 4;
- input += 4;
- }
- sfence ();
- emms ();
-}
-#endif
-
-/*
- * um... just need some scrap paper...
- * D = (1 - alpha)*B + alpha*F
- * D = (1 - a)*B + a*textluma
- * = B - a*B + a*textluma
- * = B + a*(textluma - B)
- * Da = (1 - a)*b + a
- */
-static void
-composite_alphamask_to_packed4444_scanline_c (uint8_t * output,
- uint8_t * input,
- uint8_t * mask, int width, int textluma, int textcb, int textcr)
-{
- uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff;
-
- int i;
-
- for (i = 0; i < width; i++) {
- int a = *mask;
-
- if (a == 0xff) {
- *((uint32_t *) output) = opaque;
- } else if ((input[0] == 0x00)) {
- *((uint32_t *) output) = (multiply_alpha (a, textcr) << 24)
- | (multiply_alpha (a, textcb) << 16)
- | (multiply_alpha (a, textluma) << 8) | a;
- } else if (a) {
- *((uint32_t *) output) =
- ((input[3] + multiply_alpha (a, textcr - input[3])) << 24)
- | ((input[2] + multiply_alpha (a, textcb - input[2])) << 16)
- | ((input[1] + multiply_alpha (a, textluma - input[1])) << 8)
- | (input[0] + multiply_alpha (a, 0xff - input[0]));
- }
- mask++;
- output += 4;
- input += 4;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-composite_alphamask_to_packed4444_scanline_mmxext (uint8_t * output,
- uint8_t * input,
- uint8_t * mask, int width, int textluma, int textcb, int textcr)
-{
- uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff;
- const mmx_t round = { 0x0080008000800080ULL };
- const mmx_t fullalpha = { 0x00000000000000ffULL };
- mmx_t colour;
-
- colour.w[0] = 0x00;
- colour.w[1] = textluma;
- colour.w[2] = textcb;
- colour.w[3] = textcr;
-
- movq_m2r (colour, mm1);
- movq_r2r (mm1, mm0);
-
- /* mm0 = [ cr ][ cb ][ y ][ 0xff ] */
- paddw_m2r (fullalpha, mm0);
-
- /* mm7 = 0 */
- pxor_r2r (mm7, mm7);
-
- /* mm6 = round */
- movq_m2r (round, mm6);
-
- while (width--) {
- int a = *mask;
-
- if (a == 0xff) {
- *((uint32_t *) output) = opaque;
- } else if ((input[0] == 0x00)) {
- /* We just need to multiply our colour by the alpha value. */
-
- /* mm2 = [ a ][ a ][ a ][ a ] */
- movd_m2r (a, mm2);
- movq_r2r (mm2, mm3);
- pshufw_r2r (mm2, mm2, 0);
-
- /* mm5 = [ cr ][ cb ][ y ][ 0 ] */
- movq_r2r (mm1, mm5);
-
- /* Multiply by alpha. */
- pmullw_r2r (mm2, mm5);
- paddw_m2r (round, mm5);
- movq_r2r (mm5, mm6);
- psrlw_i2r (8, mm6);
- paddw_r2r (mm6, mm5);
- psrlw_i2r (8, mm5);
-
- /* Set alpha to a. */
- por_r2r (mm3, mm5);
-
- /* Pack and write our result. */
- packuswb_r2r (mm5, mm5);
- movd_r2m (mm5, *output);
- } else if (a) {
- /* mm2 = [ a ][ a ][ a ][ a ] */
- movd_m2r (a, mm2);
- pshufw_r2r (mm2, mm2, 0);
-
- /* mm3 = [ cr ][ cb ][ y ][ 0xff ] */
- movq_r2r (mm0, mm3);
-
- /* mm4 = [ i_cr ][ i_cb ][ i_y ][ i_a ] */
- movd_m2r (*input, mm4);
- punpcklbw_r2r (mm7, mm4);
-
- /* Subtract input and colour. */
- psubw_r2r (mm4, mm3); /* mm3 = mm3 - mm4 */
-
- /* Multiply alpha. */
- pmullw_r2r (mm2, mm3);
- paddw_r2r (mm6, mm3);
- movq_r2r (mm3, mm2);
- psrlw_i2r (8, mm3);
- paddw_r2r (mm2, mm3);
- psrlw_i2r (8, mm3);
-
- /* Add back in the input. */
- paddb_r2r (mm3, mm4);
-
- /* Write result. */
- packuswb_r2r (mm4, mm4);
- movd_r2m (mm4, *output);
- }
- mask++;
- output += 4;
- input += 4;
- }
- sfence ();
- emms ();
-}
-#endif
-
-static void
-composite_alphamask_alpha_to_packed4444_scanline_c (uint8_t * output,
- uint8_t * input,
- uint8_t * mask, int width, int textluma, int textcb, int textcr, int alpha)
-{
- uint32_t opaque = (textcr << 24) | (textcb << 16) | (textluma << 8) | 0xff;
-
- int i;
-
- for (i = 0; i < width; i++) {
- int af = *mask;
-
- if (af) {
- int a = ((af * alpha) + 0x80) >> 8;
-
- if (a == 0xff) {
- *((uint32_t *) output) = opaque;
- } else if (input[0] == 0x00) {
- *((uint32_t *) output) = (multiply_alpha (a, textcr) << 24)
- | (multiply_alpha (a, textcb) << 16)
- | (multiply_alpha (a, textluma) << 8) | a;
- } else if (a) {
- *((uint32_t *) output) =
- ((input[3] + multiply_alpha (a, textcr - input[3])) << 24)
- | ((input[2] + multiply_alpha (a, textcb - input[2])) << 16)
- | ((input[1] + multiply_alpha (a, textluma - input[1])) << 8)
- | (a + multiply_alpha (0xff - a, input[0]));
- }
- }
- mask++;
- output += 4;
- input += 4;
- }
-}
-
-static void
-premultiply_packed4444_scanline_c (uint8_t * output, uint8_t * input, int width)
-{
- while (width--) {
- unsigned int cur_a = input[0];
-
- *((uint32_t *) output) = (multiply_alpha (cur_a, input[3]) << 24)
- | (multiply_alpha (cur_a, input[2]) << 16)
- | (multiply_alpha (cur_a, input[1]) << 8)
- | cur_a;
-
- output += 4;
- input += 4;
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-premultiply_packed4444_scanline_mmxext (uint8_t * output, uint8_t * input,
- int width)
-{
- const mmx_t round = { 0x0080008000800080ULL };
- const mmx_t alpha = { 0x00000000000000ffULL };
- const mmx_t noalp = { 0xffffffffffff0000ULL };
-
- pxor_r2r (mm7, mm7);
- while (width--) {
- movd_m2r (*input, mm0);
- punpcklbw_r2r (mm7, mm0);
-
- movq_r2r (mm0, mm2);
- pshufw_r2r (mm2, mm2, 0);
- movq_r2r (mm2, mm4);
- pand_m2r (alpha, mm4);
-
- pmullw_r2r (mm2, mm0);
- paddw_m2r (round, mm0);
-
- movq_r2r (mm0, mm3);
- psrlw_i2r (8, mm3);
- paddw_r2r (mm3, mm0);
- psrlw_i2r (8, mm0);
-
- pand_m2r (noalp, mm0);
- paddw_r2r (mm4, mm0);
-
- packuswb_r2r (mm0, mm0);
- movd_r2m (mm0, *output);
-
- output += 4;
- input += 4;
- }
- sfence ();
- emms ();
-}
-#endif
-
-static void
-blend_packed422_scanline_c (uint8_t * output, uint8_t * src1,
- uint8_t * src2, int width, int pos)
-{
- if (pos == 0) {
- blit_packed422_scanline (output, src1, width);
- } else if (pos == 256) {
- blit_packed422_scanline (output, src2, width);
- } else if (pos == 128) {
- interpolate_packed422_scanline (output, src1, src2, width);
- } else {
- width *= 2;
- while (width--) {
- *output++ = ((*src1++ * (256 - pos)) + (*src2++ * pos) + 0x80) >> 8;
- }
- }
-}
-
-#ifdef HAVE_CPU_I386
-static void
-blend_packed422_scanline_mmxext (uint8_t * output, uint8_t * src1,
- uint8_t * src2, int width, int pos)
-{
- if (pos <= 0) {
- blit_packed422_scanline (output, src1, width);
- } else if (pos >= 256) {
- blit_packed422_scanline (output, src2, width);
- } else if (pos == 128) {
- interpolate_packed422_scanline (output, src1, src2, width);
- } else {
- const mmx_t all256 = { 0x0100010001000100ULL };
- const mmx_t round = { 0x0080008000800080ULL };
-
- movd_m2r (pos, mm0);
- pshufw_r2r (mm0, mm0, 0);
- movq_m2r (all256, mm1);
- psubw_r2r (mm0, mm1);
- pxor_r2r (mm7, mm7);
-
- for (width /= 2; width; width--) {
- movd_m2r (*src1, mm3);
- movd_m2r (*src2, mm4);
- punpcklbw_r2r (mm7, mm3);
- punpcklbw_r2r (mm7, mm4);
-
- pmullw_r2r (mm1, mm3);
- pmullw_r2r (mm0, mm4);
- paddw_r2r (mm4, mm3);
- paddw_m2r (round, mm3);
- psrlw_i2r (8, mm3);
-
- packuswb_r2r (mm3, mm3);
- movd_r2m (mm3, *output);
-
- output += 4;
- src1 += 4;
- src2 += 4;
- }
- sfence ();
- emms ();
- }
-}
-#endif
-
-#ifdef HAVE_CPU_I386
-static void
-quarter_blit_vertical_packed422_scanline_mmxext (uint8_t * output,
- uint8_t * one, uint8_t * three, int width)
-{
- int i;
-
- for (i = width / 16; i; --i) {
- movq_m2r (*one, mm0);
- movq_m2r (*three, mm1);
- movq_m2r (*(one + 8), mm2);
- movq_m2r (*(three + 8), mm3);
- movq_m2r (*(one + 16), mm4);
- movq_m2r (*(three + 16), mm5);
- movq_m2r (*(one + 24), mm6);
- movq_m2r (*(three + 24), mm7);
- pavgb_r2r (mm1, mm0);
- pavgb_r2r (mm1, mm0);
- pavgb_r2r (mm3, mm2);
- pavgb_r2r (mm3, mm2);
- pavgb_r2r (mm5, mm4);
- pavgb_r2r (mm5, mm4);
- pavgb_r2r (mm7, mm6);
- pavgb_r2r (mm7, mm6);
- movntq_r2m (mm0, *output);
- movntq_r2m (mm2, *(output + 8));
- movntq_r2m (mm4, *(output + 16));
- movntq_r2m (mm6, *(output + 24));
- output += 32;
- one += 32;
- three += 32;
- }
- width = (width & 0xf);
-
- for (i = width / 4; i; --i) {
- movq_m2r (*one, mm0);
- movq_m2r (*three, mm1);
- pavgb_r2r (mm1, mm0);
- pavgb_r2r (mm1, mm0);
- movntq_r2m (mm0, *output);
- output += 8;
- one += 8;
- three += 8;
- }
- width = width & 0x7;
-
- /* Handle last few pixels. */
- for (i = width * 2; i; --i) {
- *output++ = (*one + *three + *three + *three + 2) / 4;
- one++;
- three++;
- }
-
- sfence ();
- emms ();
-}
-#endif
-
-
-static void
-quarter_blit_vertical_packed422_scanline_c (uint8_t * output, uint8_t * one,
- uint8_t * three, int width)
-{
- width *= 2;
- while (width--) {
- *output++ = (*one + *three + *three + *three + 2) / 4;
- one++;
- three++;
- }
-}
-
-static void
-subpix_blit_vertical_packed422_scanline_c (uint8_t * output, uint8_t * top,
- uint8_t * bot, int subpixpos, int width)
-{
- if (subpixpos == 32768) {
- interpolate_packed422_scanline (output, top, bot, width);
- } else if (subpixpos == 16384) {
- quarter_blit_vertical_packed422_scanline (output, top, bot, width);
- } else if (subpixpos == 49152) {
- quarter_blit_vertical_packed422_scanline (output, bot, top, width);
- } else {
- int x;
-
- width *= 2;
- for (x = 0; x < width; x++) {
- output[x] =
- ((top[x] * subpixpos) + (bot[x] * (0xffff - subpixpos))) >> 16;
- }
- }
-}
-
-static void
-a8_subpix_blit_scanline_c (uint8_t * output, uint8_t * input,
- int lasta, int startpos, int width)
-{
- int pos = 0xffff - (startpos & 0xffff);
-
- int prev = lasta;
-
- int x;
-
- for (x = 0; x < width; x++) {
- output[x] = ((prev * pos) + (input[x] * (0xffff - pos))) >> 16;
- prev = input[x];
- }
-}
-
-/*
- * These are from lavtools in mjpegtools:
- *
- * colorspace.c: Routines to perform colorspace conversions.
- *
- * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#define FP_BITS 18
-
-/* precomputed tables */
-
-static int Y_R[256];
-
-static int Y_G[256];
-
-static int Y_B[256];
-
-static int Cb_R[256];
-
-static int Cb_G[256];
-
-static int Cb_B[256];
-
-static int Cr_R[256];
-
-static int Cr_G[256];
-
-static int Cr_B[256];
-
-static int conv_RY_inited = 0;
-
-static int RGB_Y[256];
-
-static int R_Cr[256];
-
-static int G_Cb[256];
-
-static int G_Cr[256];
-
-static int B_Cb[256];
-
-static int conv_YR_inited = 0;
-
-static int
-myround (double n)
-{
- if (n >= 0)
- return (int) (n + 0.5);
- else
- return (int) (n - 0.5);
-}
-
-static void
-init_RGB_to_YCbCr_tables (void)
-{
- int i;
-
- /*
- * Q_Z[i] = (coefficient * i
- * * (Q-excursion) / (Z-excursion) * fixed-point-factor)
- *
- * to one of each, add the following:
- * + (fixed-point-factor / 2) --- for rounding later
- * + (Q-offset * fixed-point-factor) --- to add the offset
- *
- */
- for (i = 0; i < 256; i++) {
- Y_R[i] =
- myround (0.299 * (double) i * 219.0 / 255.0 * (double) (1 << FP_BITS));
- Y_G[i] =
- myround (0.587 * (double) i * 219.0 / 255.0 * (double) (1 << FP_BITS));
- Y_B[i] =
- myround ((0.114 * (double) i * 219.0 / 255.0 * (double) (1 << FP_BITS))
- + (double) (1 << (FP_BITS - 1)) + (16.0 * (double) (1 << FP_BITS)));
-
- Cb_R[i] =
- myround (-0.168736 * (double) i * 224.0 / 255.0 *
- (double) (1 << FP_BITS));
- Cb_G[i] =
- myround (-0.331264 * (double) i * 224.0 / 255.0 *
- (double) (1 << FP_BITS));
- Cb_B[i] =
- myround ((0.500 * (double) i * 224.0 / 255.0 * (double) (1 << FP_BITS))
- + (double) (1 << (FP_BITS - 1)) + (128.0 * (double) (1 << FP_BITS)));
-
- Cr_R[i] =
- myround (0.500 * (double) i * 224.0 / 255.0 * (double) (1 << FP_BITS));
- Cr_G[i] =
- myround (-0.418688 * (double) i * 224.0 / 255.0 *
- (double) (1 << FP_BITS));
- Cr_B[i] =
- myround ((-0.081312 * (double) i * 224.0 / 255.0 *
- (double) (1 << FP_BITS))
- + (double) (1 << (FP_BITS - 1)) + (128.0 * (double) (1 << FP_BITS)));
- }
- conv_RY_inited = 1;
-}
-
-static void
-init_YCbCr_to_RGB_tables (void)
-{
- int i;
-
- /*
- * Q_Z[i] = (coefficient * i
- * * (Q-excursion) / (Z-excursion) * fixed-point-factor)
- *
- * to one of each, add the following:
- * + (fixed-point-factor / 2) --- for rounding later
- * + (Q-offset * fixed-point-factor) --- to add the offset
- *
- */
-
- /* clip Y values under 16 */
- for (i = 0; i < 16; i++) {
- RGB_Y[i] =
- myround ((1.0 * (double) (16) * 255.0 / 219.0 * (double) (1 << FP_BITS))
- + (double) (1 << (FP_BITS - 1)));
- }
- for (i = 16; i < 236; i++) {
- RGB_Y[i] =
- myround ((1.0 * (double) (i -
- 16) * 255.0 / 219.0 * (double) (1 << FP_BITS))
- + (double) (1 << (FP_BITS - 1)));
- }
- /* clip Y values above 235 */
- for (i = 236; i < 256; i++) {
- RGB_Y[i] =
- myround ((1.0 * (double) (235) * 255.0 / 219.0 *
- (double) (1 << FP_BITS))
- + (double) (1 << (FP_BITS - 1)));
- }
-
- /* clip Cb/Cr values below 16 */
- for (i = 0; i < 16; i++) {
- R_Cr[i] =
- myround (1.402 * (double) (-112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- G_Cr[i] =
- myround (-0.714136 * (double) (-112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- G_Cb[i] =
- myround (-0.344136 * (double) (-112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- B_Cb[i] =
- myround (1.772 * (double) (-112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- }
- for (i = 16; i < 241; i++) {
- R_Cr[i] =
- myround (1.402 * (double) (i -
- 128) * 255.0 / 224.0 * (double) (1 << FP_BITS));
- G_Cr[i] =
- myround (-0.714136 * (double) (i -
- 128) * 255.0 / 224.0 * (double) (1 << FP_BITS));
- G_Cb[i] =
- myround (-0.344136 * (double) (i -
- 128) * 255.0 / 224.0 * (double) (1 << FP_BITS));
- B_Cb[i] =
- myround (1.772 * (double) (i -
- 128) * 255.0 / 224.0 * (double) (1 << FP_BITS));
- }
- /* clip Cb/Cr values above 240 */
- for (i = 241; i < 256; i++) {
- R_Cr[i] =
- myround (1.402 * (double) (112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- G_Cr[i] =
- myround (-0.714136 * (double) (112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- G_Cb[i] =
- myround (-0.344136 * (double) (i -
- 128) * 255.0 / 224.0 * (double) (1 << FP_BITS));
- B_Cb[i] =
- myround (1.772 * (double) (112) * 255.0 / 224.0 *
- (double) (1 << FP_BITS));
- }
- conv_YR_inited = 1;
-}
-
-static void
-rgb24_to_packed444_rec601_scanline_c (uint8_t * output, uint8_t * input,
- int width)
-{
- if (!conv_RY_inited)
- init_RGB_to_YCbCr_tables ();
-
- while (width--) {
- int r = input[0];
-
- int g = input[1];
-
- int b = input[2];
-
- output[0] = (Y_R[r] + Y_G[g] + Y_B[b]) >> FP_BITS;
- output[1] = (Cb_R[r] + Cb_G[g] + Cb_B[b]) >> FP_BITS;
- output[2] = (Cr_R[r] + Cr_G[g] + Cr_B[b]) >> FP_BITS;
- output += 3;
- input += 3;
- }
-}
-
-static void
-rgba32_to_packed4444_rec601_scanline_c (uint8_t * output, uint8_t * input,
- int width)
-{
- if (!conv_RY_inited)
- init_RGB_to_YCbCr_tables ();
-
- while (width--) {
- int r = input[0];
-
- int g = input[1];
-
- int b = input[2];
-
- int a = input[3];
-
- output[0] = a;
- output[1] = (Y_R[r] + Y_G[g] + Y_B[b]) >> FP_BITS;
- output[2] = (Cb_R[r] + Cb_G[g] + Cb_B[b]) >> FP_BITS;
- output[3] = (Cr_R[r] + Cr_G[g] + Cr_B[b]) >> FP_BITS;
- output += 4;
- input += 4;
- }
-}
-
-static void
-packed444_to_rgb24_rec601_scanline_c (uint8_t * output, uint8_t * input,
- int width)
-{
- if (!conv_YR_inited)
- init_YCbCr_to_RGB_tables ();
-
- while (width--) {
- int luma = input[0];
-
- int cb = input[1];
-
- int cr = input[2];
-
- output[0] = clip255 ((RGB_Y[luma] + R_Cr[cr]) >> FP_BITS);
- output[1] = clip255 ((RGB_Y[luma] + G_Cb[cb] + G_Cr[cr]) >> FP_BITS);
- output[2] = clip255 ((RGB_Y[luma] + B_Cb[cb]) >> FP_BITS);
-
- output += 3;
- input += 3;
- }
-}
-
-/*
- * 601 numbers:
- *
- * Y' = 0.299*R' + 0.587*G' + 0.114*B' (in 0.0 to 1.0)
- * Cb = -0.169*R' - 0.331*G' + 0.500*B' (in -0.5 to +0.5)
- * Cr = 0.500*R' - 0.419*G' - 0.081*B' (in -0.5 to +0.5)
- *
- * Inverse:
- * Y Cb Cr
- * R 1.0000 -0.0009 1.4017
- * G 1.0000 -0.3437 -0.7142
- * B 1.0000 1.7722 0.0010
- *
- * S170M numbers:
- * Y' = 0.299*R' + 0.587*G' + 0.114*B' (in 0.0 to 1.0)
- * B-Y' = -0.299*R' - 0.587*G' + 0.886*B'
- * R-Y' = 0.701*R' - 0.587*G' - 0.114*B'
- */
-/*
-static void packed444_to_rgb24_rec601_reference_scanline( uint8_t *output, uint8_t *input, int width )
-{
- while( width-- ) {
- double yp = (((double) input[ 0 ]) - 16.0) / 255.0;
- double cb = (((double) input[ 1 ]) - 128.0) / 255.0;
- double cr = (((double) input[ 2 ]) - 128.0) / 255.0;
- double r, g, b;
-
- r = yp - (0.0009*cb) + (1.4017*cr);
- g = yp - (0.3437*cb) - (0.7142*cr);
- b = yp + (1.7722*cb) + (0.0010*cr);
-
- if( r > 1.0 ) r = 1.0; else if( r < 0.0 ) r = 0.0;
- if( g > 1.0 ) g = 1.0; else if( g < 0.0 ) g = 0.0;
- if( b > 1.0 ) b = 1.0; else if( b < 0.0 ) b = 0.0;
-
- output[ 0 ] = (int) ((r * 255.0) + 0.5);
- output[ 1 ] = (int) ((g * 255.0) + 0.5);
- output[ 2 ] = (int) ((b * 255.0) + 0.5);
-
- output += 3;
- input += 3;
- }
-}
-*/
-
-static void
-packed444_to_nonpremultiplied_packed4444_scanline_c (uint8_t * output,
- uint8_t * input, int width, int alpha)
-{
- int i;
-
- for (i = 0; i < width; i++) {
- output[0] = alpha & 0xff;
- output[1] = input[0] & 0xff;
- output[2] = input[1] & 0xff;
- output[3] = input[2] & 0xff;
-
- output += 4;
- input += 3;
- }
-}
-
-static void
-aspect_adjust_packed4444_scanline_c (uint8_t * output,
- uint8_t * input, int width, double pixel_aspect)
-{
- double i;
-
- int prev_i = 0;
-
- int w = 0;
-
- pixel_aspect = 1.0 / pixel_aspect;
-
- for (i = 0.0; i < width; i += pixel_aspect) {
- uint8_t *curin = input + ((int) i) * 4;
-
- if (!prev_i) {
- output[0] = curin[0];
- output[1] = curin[1];
- output[2] = curin[2];
- output[3] = curin[3];
- } else {
- int avg_a = 0;
-
- int avg_y = 0;
-
- int avg_cb = 0;
-
- int avg_cr = 0;
-
- int pos = prev_i * 4;
-
- int c = 0;
-
- int j;
-
- for (j = prev_i; j <= (int) i; j++) {
- avg_a += input[pos++];
- avg_y += input[pos++];
- avg_cb += input[pos++];
- avg_cr += input[pos++];
- c++;
- }
- output[0] = avg_a / c;
- output[1] = avg_y / c;
- output[2] = avg_cb / c;
- output[3] = avg_cr / c;
- }
- output += 4;
- prev_i = (int) i;
- w++;
- }
-}
-
-static uint32_t speedy_accel;
-
-void
-setup_speedy_calls (uint32_t accel, int verbose)
-{
- speedy_accel = accel;
-
- interpolate_packed422_scanline = interpolate_packed422_scanline_c;
- blit_colour_packed422_scanline = blit_colour_packed422_scanline_c;
- blit_colour_packed4444_scanline = blit_colour_packed4444_scanline_c;
- blit_packed422_scanline = blit_packed422_scanline_c;
- composite_packed4444_to_packed422_scanline =
- composite_packed4444_to_packed422_scanline_c;
- composite_packed4444_alpha_to_packed422_scanline =
- composite_packed4444_alpha_to_packed422_scanline_c;
- composite_alphamask_to_packed4444_scanline =
- composite_alphamask_to_packed4444_scanline_c;
- composite_alphamask_alpha_to_packed4444_scanline =
- composite_alphamask_alpha_to_packed4444_scanline_c;
- premultiply_packed4444_scanline = premultiply_packed4444_scanline_c;
- blend_packed422_scanline = blend_packed422_scanline_c;
- comb_factor_packed422_scanline = 0;
- diff_factor_packed422_scanline = diff_factor_packed422_scanline_c;
- kill_chroma_packed422_inplace_scanline =
- kill_chroma_packed422_inplace_scanline_c;
- mirror_packed422_inplace_scanline = mirror_packed422_inplace_scanline_c;
- speedy_memcpy = speedy_memcpy_c;
- diff_packed422_block8x8 = diff_packed422_block8x8_c;
- a8_subpix_blit_scanline = a8_subpix_blit_scanline_c;
- quarter_blit_vertical_packed422_scanline =
- quarter_blit_vertical_packed422_scanline_c;
- subpix_blit_vertical_packed422_scanline =
- subpix_blit_vertical_packed422_scanline_c;
- packed444_to_nonpremultiplied_packed4444_scanline =
- packed444_to_nonpremultiplied_packed4444_scanline_c;
- aspect_adjust_packed4444_scanline = aspect_adjust_packed4444_scanline_c;
- packed444_to_packed422_scanline = packed444_to_packed422_scanline_c;
- packed422_to_packed444_scanline = packed422_to_packed444_scanline_c;
- packed422_to_packed444_rec601_scanline =
- packed422_to_packed444_rec601_scanline_c;
- packed444_to_rgb24_rec601_scanline = packed444_to_rgb24_rec601_scanline_c;
- rgb24_to_packed444_rec601_scanline = rgb24_to_packed444_rec601_scanline_c;
- rgba32_to_packed4444_rec601_scanline = rgba32_to_packed4444_rec601_scanline_c;
- invert_colour_packed422_inplace_scanline =
- invert_colour_packed422_inplace_scanline_c;
- vfilter_chroma_121_packed422_scanline =
- vfilter_chroma_121_packed422_scanline_c;
- vfilter_chroma_332_packed422_scanline =
- vfilter_chroma_332_packed422_scanline_c;
- convert_uyvy_to_yuyv_scanline = convert_uyvy_to_yuyv_scanline_c;
- composite_colour4444_alpha_to_packed422_scanline =
- composite_colour4444_alpha_to_packed422_scanline_c;
-
-#ifdef HAVE_CPU_I386
- if (speedy_accel & OIL_IMPL_FLAG_MMXEXT) {
- if (verbose) {
- fprintf (stderr, "speedycode: Using MMXEXT optimized functions.\n");
- }
- interpolate_packed422_scanline = interpolate_packed422_scanline_mmxext;
- blit_colour_packed422_scanline = blit_colour_packed422_scanline_mmxext;
- blit_colour_packed4444_scanline = blit_colour_packed4444_scanline_mmxext;
- blit_packed422_scanline = blit_packed422_scanline_mmxext;
- composite_packed4444_to_packed422_scanline =
- composite_packed4444_to_packed422_scanline_mmxext;
- composite_packed4444_alpha_to_packed422_scanline =
- composite_packed4444_alpha_to_packed422_scanline_mmxext;
- composite_alphamask_to_packed4444_scanline =
- composite_alphamask_to_packed4444_scanline_mmxext;
- premultiply_packed4444_scanline = premultiply_packed4444_scanline_mmxext;
- kill_chroma_packed422_inplace_scanline =
- kill_chroma_packed422_inplace_scanline_mmx;
- blend_packed422_scanline = blend_packed422_scanline_mmxext;
- diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx;
- comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx;
- diff_packed422_block8x8 = diff_packed422_block8x8_mmx;
- quarter_blit_vertical_packed422_scanline =
- quarter_blit_vertical_packed422_scanline_mmxext;
- invert_colour_packed422_inplace_scanline =
- invert_colour_packed422_inplace_scanline_mmx;
- vfilter_chroma_121_packed422_scanline =
- vfilter_chroma_121_packed422_scanline_mmx;
- vfilter_chroma_332_packed422_scanline =
- vfilter_chroma_332_packed422_scanline_mmx;
- convert_uyvy_to_yuyv_scanline = convert_uyvy_to_yuyv_scanline_mmx;
- composite_colour4444_alpha_to_packed422_scanline =
- composite_colour4444_alpha_to_packed422_scanline_mmxext;
- speedy_memcpy = speedy_memcpy_mmxext;
- } else if (speedy_accel & OIL_IMPL_FLAG_MMX) {
- if (verbose) {
- fprintf (stderr, "speedycode: Using MMX optimized functions.\n");
- }
- interpolate_packed422_scanline = interpolate_packed422_scanline_mmx;
- blit_colour_packed422_scanline = blit_colour_packed422_scanline_mmx;
- blit_colour_packed4444_scanline = blit_colour_packed4444_scanline_mmx;
- blit_packed422_scanline = blit_packed422_scanline_mmx;
- diff_factor_packed422_scanline = diff_factor_packed422_scanline_mmx;
- comb_factor_packed422_scanline = comb_factor_packed422_scanline_mmx;
- kill_chroma_packed422_inplace_scanline =
- kill_chroma_packed422_inplace_scanline_mmx;
- diff_packed422_block8x8 = diff_packed422_block8x8_mmx;
- invert_colour_packed422_inplace_scanline =
- invert_colour_packed422_inplace_scanline_mmx;
- vfilter_chroma_121_packed422_scanline =
- vfilter_chroma_121_packed422_scanline_mmx;
- vfilter_chroma_332_packed422_scanline =
- vfilter_chroma_332_packed422_scanline_mmx;
- convert_uyvy_to_yuyv_scanline = convert_uyvy_to_yuyv_scanline_mmx;
- speedy_memcpy = speedy_memcpy_mmx;
- } else {
- if (verbose) {
- fprintf (stderr,
- "speedycode: No MMX or MMXEXT support detected, using C fallbacks.\n");
- }
- }
-#endif
-}
-
-uint32_t
-speedy_get_accel (void)
-{
- return speedy_accel;
-}
diff --git a/gst/deinterlace2/tvtime/speedy.h b/gst/deinterlace2/tvtime/speedy.h
deleted file mode 100644
index 37293bba..00000000
--- a/gst/deinterlace2/tvtime/speedy.h
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- *
- * GStreamer
- * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/*
- * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
- * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
- */
-
-#ifndef SPEEDY_H_INCLUDED
-#define SPEEDY_H_INCLUDED
-
-#if defined (__SVR4) && defined (__sun)
-# include <sys/int_types.h>
-#else
-# include <stdint.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Speedy is a collection of optimized functions plus their C fallbacks.
- * This includes a simple system to select which functions to use
- * at runtime.
- *
- * The optimizations are done with the help of the mmx.h system, from
- * libmpeg2 by Michel Lespinasse and Aaron Holtzman.
- *
- * The library is a collection of function pointers which must be first
- * initialized by setup_speedy_calls() to point at the fastest available
- * implementation of each function.
- */
-
-/*
- * Struct for pulldown detection metrics.
- */
-typedef struct pulldown_metrics_s {
- /* difference: total, even lines, odd lines */
- int d, e, o;
- /* noise: temporal, spacial (current), spacial (past) */
- int t, s, p;
-} pulldown_metrics_t;
-
-/*
- * Interpolates a packed 4:2:2 scanline using linear interpolation.
- */
-extern void (*interpolate_packed422_scanline)( uint8_t *output, uint8_t *top,
- uint8_t *bot, int width );
-
-/*
- * Blits a colour to a packed 4:2:2 scanline.
- */
-extern void (*blit_colour_packed422_scanline)( uint8_t *output,
- int width, int y, int cb, int cr );
-
-/*
- * Blits a colour to a packed 4:4:4:4 scanline. I use luma/cb/cr instead of
- * RGB but this will of course work for either.
- */
-extern void (*blit_colour_packed4444_scanline)( uint8_t *output,
- int width, int alpha, int luma,
- int cb, int cr );
-
-/*
- * Blit from and to packed 4:2:2 scanline.
- */
-extern void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width );
-
-/*
- * Composites a premultiplied 4:4:4:4 pixel onto a packed 4:2:2 scanline.
- */
-extern void (*composite_colour4444_alpha_to_packed422_scanline)( uint8_t *output, uint8_t *input,
- int af, int y, int cb, int cr,
- int width, int alpha );
-
-/*
- * Composites a packed 4:4:4:4 scanline onto a packed 4:2:2 scanline.
- * Chroma is downsampled by dropping samples (nearest neighbour).
- */
-extern void (*composite_packed4444_to_packed422_scanline)( uint8_t *output,
- uint8_t *input,
- uint8_t *foreground,
- int width );
-
-/*
- * Composites a packed 4:4:4:4 scanline onto a packed 4:2:2 scanline.
- * Chroma is downsampled by dropping samples (nearest neighbour). The
- * alpha value provided is in the range 0-256 and is first applied to
- * the input (for fadeouts).
- */
-extern void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output,
- uint8_t *input,
- uint8_t *foreground,
- int width, int alpha );
-
-/*
- * Takes an alphamask and the given colour (in Y'CbCr) and composites it
- * onto a packed 4:4:4:4 scanline.
- */
-extern void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output,
- uint8_t *input,
- uint8_t *mask, int width,
- int textluma, int textcb,
- int textcr );
-
-/*
- * Takes an alphamask and the given colour (in Y'CbCr) and composites it
- * onto a packed 4:4:4:4 scanline. The alpha value provided is in the
- * range 0-256 and is first applied to the input (for fadeouts).
- */
-extern void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output,
- uint8_t *input,
- uint8_t *mask, int width,
- int textluma, int textcb,
- int textcr, int alpha );
-
-/*
- * Premultiplies the colour by the alpha channel in a packed 4:4:4:4
- * scanline.
- */
-extern void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width );
-
-/*
- * Blend between two packed 4:2:2 scanline. Pos is the fade value in
- * the range 0-256. A value of 0 gives 100% src1, and a value of 256
- * gives 100% src2. Anything in between gives the appropriate faded
- * version.
- */
-extern void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1,
- uint8_t *src2, int width, int pos );
-
-/*
- * Calculates the 'difference factor' for two scanlines. This is a
- * metric where higher values indicate that the two scanlines are more
- * different.
- */
-extern unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width );
-
-/*
- * Calculates the 'comb factor' for a set of three scanlines. This is a
- * metric where higher values indicate a more likely chance that the two
- * fields are at separate points in time.
- */
-extern unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid,
- uint8_t *bot, int width );
-
-/*
- * Vertical [1 2 1] chroma filter.
- */
-extern void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width,
- uint8_t *m, uint8_t *t, uint8_t *b );
-
-/*
- * Vertical [3 3 2] chroma filter.
- */
-extern void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width,
- uint8_t *m, uint8_t *t, uint8_t *b );
-
-/*
- * Sets the chroma of the scanline to neutral (128) in-place.
- */
-extern void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width );
-
-/*
- * Mirrors the scanline in-place.
- */
-extern void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width );
-
-/*
- * Inverts the colours on a scanline in-place.
- */
-extern void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width );
-
-/*
- * Fast memcpy function, used by all of the blit functions. Won't blit
- * anything if dest == src.
- */
-extern void (*speedy_memcpy)( void *output, const void *input, size_t size );
-
-/*
- * Calculates the block difference metrics for dalias' pulldown
- * detection algorithm.
- */
-extern void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old,
- uint8_t *new, int os, int ns );
-
-/*
- * Takes an alpha mask and subpixelly blits it using linear
- * interpolation.
- */
-extern void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input,
- int lasta, int startpos, int width );
-
-/*
- * 1/4 vertical subpixel blit for packed 4:2:2 scanlines using linear
- * interpolation.
- */
-extern void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one,
- uint8_t *three, int width );
-
-/*
- * Vertical subpixel blit for packed 4:2:2 scanlines using linear
- * interpolation.
- */
-extern void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top,
- uint8_t *bot, int subpixpos, int width );
-
-/*
- * Simple function to convert a 4:4:4 scanline to a 4:4:4:4 scanline by
- * adding an alpha channel. Result is non-premultiplied.
- */
-extern void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output,
- uint8_t *input,
- int width, int alpha );
-
-/*
- * I think this function needs to be rethought and renamed, but here
- * it is for now. This function horizontally resamples a scanline
- * using linear interpolation to compensate for a change in pixel
- * aspect ratio.
- */
-extern void (*aspect_adjust_packed4444_scanline)( uint8_t *output,
- uint8_t *input,
- int width,
- double pixel_aspect );
-
-/*
- * Convert a packed 4:4:4 surface to a packed 4:2:2 surface using
- * nearest neighbour chroma downsampling.
- */
-extern void (*packed444_to_packed422_scanline)( uint8_t *output,
- uint8_t *input,
- int width );
-
-/*
- * Converts packed 4:2:2 to packed 4:4:4 scanlines using nearest
- * neighbour chroma upsampling.
- */
-extern void (*packed422_to_packed444_scanline)( uint8_t *output,
- uint8_t *input,
- int width );
-
-/*
- * This filter actually does not meet the spec so calling it rec601
- * is a bit of a lie. I got the filter from Poynton's site. This
- * converts a scanline from packed 4:2:2 to packed 4:4:4. But this
- * function should point at some high quality to-the-spec resampler.
- */
-extern void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest,
- uint8_t *src,
- int width );
-
-/*
- * Conversions between Y'CbCr and R'G'B'. We use Rec.601 numbers
- * since our source is broadcast video, but I think there is an
- * argument to be made for switching to Rec.709.
- */
-extern void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output,
- uint8_t *input,
- int width );
-extern void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output,
- uint8_t *input,
- int width );
-extern void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output,
- uint8_t *input,
- int width );
-
-/*
- * Convert from 4:2:2 with UYVY ordering to 4:2:2 with YUYV ordering.
- */
-extern void (*convert_uyvy_to_yuyv_scanline)( uint8_t *uyvy_buf,
- uint8_t *yuyv_buf, int width );
-
-/*
- * Sets up the function pointers to point at the fastest function
- * available. Requires accelleration settings (see mm_accel.h).
- */
-void setup_speedy_calls( uint32_t accel, int verbose );
-
-/*
- * Returns a bitfield of what accellerations were used when speedy was
- * initialized. See mm_accel.h.
- */
-uint32_t speedy_get_accel( void );
-
-#ifdef __cplusplus
-};
-#endif
-#endif /* SPEEDY_H_INCLUDED */
diff --git a/gst/deinterlace2/tvtime/tomsmocomp.c b/gst/deinterlace2/tvtime/tomsmocomp.c
index f0b73677..dfadf457 100644
--- a/gst/deinterlace2/tvtime/tomsmocomp.c
+++ b/gst/deinterlace2/tvtime/tomsmocomp.c
@@ -28,7 +28,6 @@
#include "gst/gst.h"
#include "gstdeinterlace2.h"
#include "plugins.h"
-#include "speedy.h"
#include "tomsmocomp.h"
#include "tomsmocomp/tomsmocompmacros.h"
@@ -39,33 +38,18 @@
#define UseStrangeBobDefault 0
long SearchEffort;
-
int UseStrangeBob;
-
-MEMCPY_FUNC *pMyMemcpy;
-
int IsOdd;
-
const unsigned char *pWeaveSrc;
-
const unsigned char *pWeaveSrcP;
-
unsigned char *pWeaveDest;
-
const unsigned char *pCopySrc;
-
const unsigned char *pCopySrcP;
-
unsigned char *pCopyDest;
-
int src_pitch;
-
int dst_pitch;
-
int rowsize;
-
int height;
-
int FldHeight;
int
@@ -73,13 +57,12 @@ Fieldcopy (void *dest, const void *src, size_t count,
int rows, int dst_pitch, int src_pitch)
{
unsigned char *pDest = (unsigned char *) dest;
-
unsigned char *pSrc = (unsigned char *) src;
int i;
for (i = 0; i < rows; i++) {
- pMyMemcpy (pDest, pSrc, count);
+ memcpy (pDest, pSrc, count);
pSrc += src_pitch;
pDest += dst_pitch;
}
@@ -111,8 +94,6 @@ Fieldcopy (void *dest, const void *src, size_t count,
#undef SSE_TYPE
#undef FUNCT_NAME
-
-
void
deinterlace_frame_di_tomsmocomp (GstDeinterlace2 * object)
{
@@ -135,8 +116,6 @@ static deinterlace_method_t tomsmocompmethod = {
0,
0,
0,
- 0,
- 0,
deinterlace_frame_di_tomsmocomp,
{"Uses heuristics to detect motion in the input",
"frames and reconstruct image detail where",
@@ -150,8 +129,6 @@ static deinterlace_method_t tomsmocompmethod = {
""}
};
-
-
deinterlace_method_t *
dscaler_tomsmocomp_get_method (void)
{
@@ -159,8 +136,6 @@ dscaler_tomsmocomp_get_method (void)
return &tomsmocompmethod;
}
-
-
void
tomsmocomp_init (void)
{
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc
index f046f5e2..2318f6a3 100644
--- a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc
+++ b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc
@@ -51,8 +51,6 @@
void FUNCT_NAME(GstDeinterlace2* object)
{
- pMyMemcpy = object->pMemcpy;
-
/* double stride do address just every odd/even scanline */
src_pitch = object->field_stride;
dst_pitch = object->output_stride;
diff --git a/gst/deinterlace2/tvtime/vfir.c b/gst/deinterlace2/tvtime/vfir.c
index 53d233e1..3a56f850 100644
--- a/gst/deinterlace2/tvtime/vfir.c
+++ b/gst/deinterlace2/tvtime/vfir.c
@@ -28,19 +28,13 @@
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
*/
-#include <stdio.h>
-#if defined (__SVR4) && defined (__sun)
-# include <sys/int_types.h>
-#else
-# include <stdint.h>
-#endif
-
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
-#include "speedy.h"
+#include "_stdint.h"
#include "gstdeinterlace2.h"
+#include <string.h>
/*
* The MPEG2 spec uses a slightly harsher filter, they specify
@@ -156,11 +150,11 @@ deinterlace_frame_vfir (GstDeinterlace2 * object)
if (object->field_history[object->history_count - 2].flags ==
PICTURE_INTERLACED_BOTTOM) {
- blit_packed422_scanline (out_data, cur_field, object->frame_width);
+ memcpy (out_data, cur_field, object->line_length);
out_data += object->output_stride;
}
- blit_packed422_scanline (out_data, cur_field, object->frame_width);
+ memcpy (out_data, cur_field, object->line_length);
out_data += object->output_stride;
line++;
@@ -184,14 +178,14 @@ deinterlace_frame_vfir (GstDeinterlace2 * object)
cur_field += object->field_stride;
last_field += object->field_stride;
- blit_packed422_scanline (out_data, cur_field, object->frame_width);
+ memcpy (out_data, cur_field, object->line_length);
out_data += object->output_stride;
}
if (object->field_history[object->history_count - 2].flags ==
PICTURE_INTERLACED_TOP) {
/* double the last scanline of the top field */
- blit_packed422_scanline (out_data, cur_field, object->frame_width);
+ memcpy (out_data, cur_field, object->line_length);
}
}
@@ -205,8 +199,6 @@ static deinterlace_method_t vfirmethod = {
0,
0,
1,
- 0,
- 0,
deinterlace_frame_vfir,
{"Avoids flicker by blurring consecutive frames",
"of input. Use this if you want to run your",