diff options
author | Sebastian Dröge <slomo@circular-chaos.org> | 2008-06-24 09:10:46 +0000 |
---|---|---|
committer | Sebastian Dröge <slomo@circular-chaos.org> | 2008-06-24 09:10:46 +0000 |
commit | 6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7 (patch) | |
tree | b8831236c5d74c11990e649ea2c09c74107ade7f /gst/deinterlace2/tvtime/vfir.c | |
parent | d7cca015530caeb2411db65d7a4ed283e60c36a6 (diff) | |
download | gst-plugins-bad-6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7.tar.gz gst-plugins-bad-6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7.tar.bz2 gst-plugins-bad-6fd4ed3965ecfebdcba954ccb97d9464f3bd12d7.zip |
gst/deinterlace2/tvtime/greedy.c: Fix the C implementation to produce correct results and optimize the
Original commit message from CVS:
* gst/deinterlace2/tvtime/greedy.c:
(deinterlace_greedy_packed422_scanline_c),
(deinterlace_greedy_packed422_scanline_mmxext),
(deinterlace_greedy_packed422_scanline):
Fix the C implementation to produce correct results and optimize the
MMXEXT implementation.
Handle odd widths and don't read over array boundaries in the MMXEXT
implementation.
* gst/deinterlace2/tvtime/vfir.c: (deinterlace_line_c),
(deinterlace_line_mmx), (deinterlace_scanline_vfir):
Fix a small rounding bug in the MMX implementation, the MMX
implementation doesn't actually need MMXEXT instructions so don't mark
it as such.
Handle odd widths in both implementations.
Diffstat (limited to 'gst/deinterlace2/tvtime/vfir.c')
-rw-r--r-- | gst/deinterlace2/tvtime/vfir.c | 77 |
1 files changed, 41 insertions, 36 deletions
diff --git a/gst/deinterlace2/tvtime/vfir.c b/gst/deinterlace2/tvtime/vfir.c index f32be654..479ee440 100644 --- a/gst/deinterlace2/tvtime/vfir.c +++ b/gst/deinterlace2/tvtime/vfir.c @@ -49,10 +49,36 @@ * filter taps here are: [-1 4 2 4 -1]. */ +/** + * C implementation. + */ +static inline void +deinterlace_line_c (uint8_t * dst, uint8_t * lum_m4, + uint8_t * lum_m3, uint8_t * lum_m2, + uint8_t * lum_m1, uint8_t * lum, int size) +{ + int sum; + + for (; size > 0; size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + sum += lum_m1[0] << 2; + sum += -lum[0]; + dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + dst++; + } +} + #ifdef HAVE_CPU_I386 #include "mmx.h" static void -deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4, +deinterlace_line_mmx (uint8_t * dst, uint8_t * lum_m4, uint8_t * lum_m3, uint8_t * lum_m2, uint8_t * lum_m1, uint8_t * lum, int size) { @@ -63,14 +89,15 @@ deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4, rounder.uw[2] = 4; rounder.uw[3] = 4; pxor_r2r (mm7, mm7); - movq_m2r (rounder, mm6); + movd_m2r (rounder, mm6); + punpcklbw_r2r (mm7, mm6); for (; size > 3; size -= 4) { - movd_m2r (lum_m4[0], mm0); - movd_m2r (lum_m3[0], mm1); - movd_m2r (lum_m2[0], mm2); - movd_m2r (lum_m1[0], mm3); - movd_m2r (lum[0], mm4); + movd_m2r (*lum_m4, mm0); + movd_m2r (*lum_m3, mm1); + movd_m2r (*lum_m2, mm2); + movd_m2r (*lum_m1, mm3); + movd_m2r (*lum, mm4); punpcklbw_r2r (mm7, mm0); punpcklbw_r2r (mm7, mm1); punpcklbw_r2r (mm7, mm2); @@ -85,7 +112,7 @@ deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4, psubusw_r2r (mm0, mm1); psrlw_i2r (3, mm1); // 3 packuswb_r2r (mm7, mm1); - movd_r2m (mm1, dst[0]); + movd_r2m (mm1, *dst); lum_m4 += 4; lum_m3 += 4; lum_m2 += 4; @@ -94,34 +121,12 @@ deinterlace_line_mmxext (uint8_t * dst, uint8_t * lum_m4, dst += 4; } emms (); -} -#endif -/** - * C implementation. - */ -static void -deinterlace_line_c (uint8_t * dst, uint8_t * lum_m4, - uint8_t * lum_m3, uint8_t * lum_m2, - uint8_t * lum_m1, uint8_t * lum, int size) -{ - int sum; - - for (; size > 0; size--) { - sum = -lum_m4[0]; - sum += lum_m3[0] << 2; - sum += lum_m2[0] << 1; - sum += lum_m1[0] << 2; - sum += -lum[0]; - dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3]; - lum_m4++; - lum_m3++; - lum_m2++; - lum_m1++; - lum++; - dst++; - } + /* Handle odd widths */ + if (size > 0) + deinterlace_line_c (dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size); } +#endif /* * The commented-out method below that uses the bottom_field member is more @@ -134,8 +139,8 @@ deinterlace_scanline_vfir (GstDeinterlace2 * object, deinterlace_scanline_data_t * data, uint8_t * output) { #ifdef HAVE_CPU_I386 - if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) { - deinterlace_line_mmxext (output, data->tt1, data->t0, data->m1, data->b0, + if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMX) { + deinterlace_line_mmx (output, data->tt1, data->t0, data->m1, data->b0, data->bb1, object->frame_width * 2); } else { deinterlace_line_c (output, data->tt1, data->t0, data->m1, data->b0, |