diff options
Diffstat (limited to 'gst/deinterlace2/tvtime/greedy.c')
-rw-r--r-- | gst/deinterlace2/tvtime/greedy.c | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/gst/deinterlace2/tvtime/greedy.c b/gst/deinterlace2/tvtime/greedy.c new file mode 100644 index 00000000..578eb711 --- /dev/null +++ b/gst/deinterlace2/tvtime/greedy.c @@ -0,0 +1,207 @@ +/* + * + * GStreamer + * Copyright (c) 2000 Tom Barry All rights reserved. + * mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry + * and Billy Biggs. + * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578 + */ + +#include <stdio.h> +#if defined (__SVR4) && defined (__sun) +# include <sys/int_types.h> +#else +# include <stdint.h> +#endif + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "mmx.h" +#include "sse.h" +#include "gstdeinterlace2.h" +#include "speedtools.h" +#include "speedy.h" + +// This is a simple lightweight DeInterlace method that uses little CPU time +// but gives very good results for low or intermedite motion. +// It defers frames by one field, but that does not seem to produce noticeable +// lip sync problems. +// +// The method used is to take either the older or newer weave pixel depending +// upon which give the smaller comb factor, and then clip to avoid large damage +// when wrong. +// +// I'd intended this to be part of a larger more elaborate method added to +// Blended Clip but this give too good results for the CPU to ignore here. + +static void +copy_scanline (GstDeinterlace2 * object, + deinterlace_scanline_data_t * data, uint8_t * output) +{ + blit_packed422_scanline (output, data->m1, object->frame_width); +} + +static int GreedyMaxComb = 15; + +static void +deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object, + deinterlace_scanline_data_t * data, uint8_t * output) +{ +#ifdef HAVE_CPU_I386 + mmx_t MaxComb; + + uint8_t *m0 = data->m0; + + uint8_t *t1 = data->t1; + + uint8_t *b1 = data->b1; + + uint8_t *m2 = data->m2; + + int width = object->frame_width; + + // How badly do we let it weave? 0-255 + MaxComb.ub[0] = GreedyMaxComb; + MaxComb.ub[1] = GreedyMaxComb; + MaxComb.ub[2] = GreedyMaxComb; + MaxComb.ub[3] = GreedyMaxComb; + MaxComb.ub[4] = GreedyMaxComb; + MaxComb.ub[5] = GreedyMaxComb; + MaxComb.ub[6] = GreedyMaxComb; + MaxComb.ub[7] = GreedyMaxComb; + + // L2 == m0 + // L1 == t1 + // L3 == b1 + // LP2 == m2 + + width /= 4; + while (width--) { + movq_m2r (*t1, mm1); // L1 + movq_m2r (*m0, mm2); // L2 + movq_m2r (*b1, mm3); // L3 + movq_m2r (*m2, mm0); // LP2 + + // average L1 and L3 leave result in mm4 + movq_r2r (mm1, mm4); // L1 + pavgb_r2r (mm3, mm4); // (L1 + L3)/2 + + + // get abs value of possible L2 comb + movq_r2r (mm2, mm7); // L2 + psubusb_r2r (mm4, mm7); // L2 - avg + movq_r2r (mm4, mm5); // avg + psubusb_r2r (mm2, mm5); // avg - L2 + por_r2r (mm7, mm5); // abs(avg-L2) + movq_r2r (mm4, mm6); // copy of avg for later + + + // get abs value of possible LP2 comb + movq_r2r (mm0, mm7); // LP2 + psubusb_r2r (mm4, mm7); // LP2 - avg + psubusb_r2r (mm0, mm4); // avg - LP2 + por_r2r (mm7, mm4); // abs(avg-LP2) + + // use L2 or LP2 depending upon which makes smaller comb + psubusb_r2r (mm5, mm4); // see if it goes to zero + psubusb_r2r (mm5, mm5); // 0 + pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0 + pcmpeqb_r2r (mm4, mm5); // opposite of mm4 + + // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 + pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0 + pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0 + por_r2r (mm5, mm4); // may the best win + + // Now lets clip our chosen value to be not outside of the range + // of the high/low range L1-L3 by more than abs(L1-L3) + // This allows some comb but limits the damages and also allows more + // detail than a boring oversmoothed clip. + + movq_r2r (mm1, mm2); // copy L1 + psubusb_r2r (mm3, mm2); // - L3, with saturation + paddusb_r2r (mm3, mm2); // now = Max(L1,L3) + + pcmpeqb_r2r (mm7, mm7); // all ffffffff + psubusb_r2r (mm1, mm7); // - L1 + paddusb_r2r (mm7, mm3); // add, may sat at fff.. + psubusb_r2r (mm7, mm3); // now = Min(L1,L3) + + // allow the value to be above the high or below the low by amt of MaxComb + paddusb_m2r (MaxComb, mm2); // increase max by diff + psubusb_m2r (MaxComb, mm3); // lower min by diff + + psubusb_r2r (mm3, mm4); // best - Min + paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3) + + pcmpeqb_r2r (mm7, mm7); // all ffffffff + psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3) + paddusb_r2r (mm7, mm2); // add may sat at FFF.. + psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped + + movntq_r2m (mm2, *output); // move in our clipped best + + // Advance to the next set of pixels. + output += 8; + m0 += 8; + t1 += 8; + b1 += 8; + m2 += 8; + } + sfence (); + emms (); +#endif +} + + +static deinterlace_method_t greedyl_method = { + 0, //DEINTERLACE_PLUGIN_API_VERSION, + "Motion Adaptive: Simple Detection", + "AdaptiveSimple", + 3, + OIL_IMPL_FLAG_MMXEXT, + 0, + 0, + 0, + 1, + copy_scanline, + deinterlace_greedy_packed422_scanline_mmxext, + 0, + {"Uses heuristics to detect motion in the input", + "frames and reconstruct image detail where", + "possible. Use this for high quality output", + "even on monitors set to an arbitrary refresh", + "rate.", + "", + "Simple detection uses linear interpolation", + "where motion is detected, using a two-field", + "buffer. This is the Greedy: Low Motion", + "deinterlacer from DScaler."} +}; + +deinterlace_method_t * +dscaler_greedyl_get_method (void) +{ + return &greedyl_method; +} |