summaryrefslogtreecommitdiffstats
path: root/gst/deinterlace2/tvtime/vfir.c
diff options
context:
space:
mode:
authorSebastian Dröge <slomo@circular-chaos.org>2008-06-24 09:10:46 +0000
committerSebastian Dröge <slomo@circular-chaos.org>2008-06-24 09:10:46 +0000
commit6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7 (patch)
treeb8831236c5d74c11990e649ea2c09c74107ade7f /gst/deinterlace2/tvtime/vfir.c
parentd7cca015530caeb2411db65d7a4ed283e60c36a6 (diff)
downloadgst-plugins-bad-6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7.tar.gz
gst-plugins-bad-6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7.tar.bz2
gst-plugins-bad-6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7.zip
gst/deinterlace2/tvtime/greedy.c: Fix the C implementation to produce correct results and optimize the
Original commit message from CVS: * gst/deinterlace2/tvtime/greedy.c: (deinterlace_greedy_packed422_scanline_c), (deinterlace_greedy_packed422_scanline_mmxext), (deinterlace_greedy_packed422_scanline): Fix the C implementation to produce correct results and optimize the MMXEXT implementation. Handle odd widths and don't read over array boundaries in the MMXEXT implementation. * gst/deinterlace2/tvtime/vfir.c: (deinterlace_line_c), (deinterlace_line_mmx), (deinterlace_scanline_vfir): Fix a small rounding bug in the MMX implementation, the MMX implementation doesn't actually need MMXEXT instructions so don't mark it as such. Handle odd widths in both implementations.
Diffstat (limited to 'gst/deinterlace2/tvtime/vfir.c')
-rw-r--r--gst/deinterlace2/tvtime/vfir.c77
1 files changed, 41 insertions, 36 deletions
diff --git a/gst/deinterlace2/tvtime/vfir.c b/gst/deinterlace2/tvtime/vfir.c
index f32be654..479ee440 100644
--- a/gst/deinterlace2/tvtime/vfir.c
+++ b/gst/deinterlace2/tvtime/vfir.c
@@ -49,10 +49,36 @@
* filter taps here are: [-1 4 2 4 -1].
*/
+/**
+ * C implementation.
+ */
+static inline void
+deinterlace_line_c (uint8_t * dst, uint8_t * lum_m4,
+ uint8_t * lum_m3, uint8_t * lum_m2,
+ uint8_t * lum_m1, uint8_t * lum, int size)
+{
+ int sum;
+
+ for (; size > 0; size--) {
+ sum = -lum_m4[0];
+ sum += lum_m3[0] << 2;
+ sum += lum_m2[0] << 1;
+ sum += lum_m1[0] << 2;
+ sum += -lum[0];
+ dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3];
+ lum_m4++;
+ lum_m3++;
+ lum_m2++;
+ lum_m1++;
+ lum++;
+ dst++;
+ }
+}
+
#ifdef HAVE_CPU_I386
#include "mmx.h"
static void
-deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4,
+deinterlace_line_mmx (uint8_t * dst, uint8_t * lum_m4,
uint8_t * lum_m3, uint8_t * lum_m2,
uint8_t * lum_m1, uint8_t * lum, int size)
{
@@ -63,14 +89,15 @@ deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4,
rounder.uw[2] = 4;
rounder.uw[3] = 4;
pxor_r2r (mm7, mm7);
- movq_m2r (rounder, mm6);
+ movd_m2r (rounder, mm6);
+ punpcklbw_r2r (mm7, mm6);
for (; size > 3; size -= 4) {
- movd_m2r (lum_m4[0], mm0);
- movd_m2r (lum_m3[0], mm1);
- movd_m2r (lum_m2[0], mm2);
- movd_m2r (lum_m1[0], mm3);
- movd_m2r (lum[0], mm4);
+ movd_m2r (*lum_m4, mm0);
+ movd_m2r (*lum_m3, mm1);
+ movd_m2r (*lum_m2, mm2);
+ movd_m2r (*lum_m1, mm3);
+ movd_m2r (*lum, mm4);
punpcklbw_r2r (mm7, mm0);
punpcklbw_r2r (mm7, mm1);
punpcklbw_r2r (mm7, mm2);
@@ -85,7 +112,7 @@ deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4,
psubusw_r2r (mm0, mm1);
psrlw_i2r (3, mm1); // 3
packuswb_r2r (mm7, mm1);
- movd_r2m (mm1, dst[0]);
+ movd_r2m (mm1, *dst);
lum_m4 += 4;
lum_m3 += 4;
lum_m2 += 4;
@@ -94,34 +121,12 @@ deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4,
dst += 4;
}
emms ();
-}
-#endif
-/**
- * C implementation.
- */
-static void
-deinterlace_line_c (uint8_t * dst, uint8_t * lum_m4,
- uint8_t * lum_m3, uint8_t * lum_m2,
- uint8_t * lum_m1, uint8_t * lum, int size)
-{
- int sum;
-
- for (; size > 0; size--) {
- sum = -lum_m4[0];
- sum += lum_m3[0] << 2;
- sum += lum_m2[0] << 1;
- sum += lum_m1[0] << 2;
- sum += -lum[0];
- dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3];
- lum_m4++;
- lum_m3++;
- lum_m2++;
- lum_m1++;
- lum++;
- dst++;
- }
+ /* Handle odd widths */
+ if (size > 0)
+ deinterlace_line_c (dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size);
}
+#endif
/*
* The commented-out method below that uses the bottom_field member is more
@@ -134,8 +139,8 @@ deinterlace_scanline_vfir (GstDeinterlace2 * object,
deinterlace_scanline_data_t * data, uint8_t * output)
{
#ifdef HAVE_CPU_I386
- if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) {
- deinterlace_line_mmxext (output, data->tt1, data->t0, data->m1, data->b0,
+ if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMX) {
+ deinterlace_line_mmx (output, data->tt1, data->t0, data->m1, data->b0,
data->bb1, object->frame_width * 2);
} else {
deinterlace_line_c (output, data->tt1, data->t0, data->m1, data->b0,