summaryrefslogtreecommitdiffstats
path: root/gst/deinterlace2/tvtime
diff options
context:
space:
mode:
Diffstat (limited to 'gst/deinterlace2/tvtime')
-rw-r--r--gst/deinterlace2/tvtime/greedy.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/gst/deinterlace2/tvtime/greedy.c b/gst/deinterlace2/tvtime/greedy.c
index 66b8799d..e1a9b1cf 100644
--- a/gst/deinterlace2/tvtime/greedy.c
+++ b/gst/deinterlace2/tvtime/greedy.c
@@ -111,6 +111,107 @@ deinterlace_greedy_packed422_scanline_c (GstDeinterlace2 * object,
#ifdef HAVE_CPU_I386
#include "mmx.h"
+static void
+deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
+ uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2, uint8_t * output,
+ int width)
+{
+ mmx_t MaxComb;
+
+ // How badly do we let it weave? 0-255
+ MaxComb.ub[0] = GreedyMaxComb;
+ MaxComb.ub[1] = GreedyMaxComb;
+ MaxComb.ub[2] = GreedyMaxComb;
+ MaxComb.ub[3] = GreedyMaxComb;
+ MaxComb.ub[4] = GreedyMaxComb;
+ MaxComb.ub[5] = GreedyMaxComb;
+ MaxComb.ub[6] = GreedyMaxComb;
+ MaxComb.ub[7] = GreedyMaxComb;
+
+ // L2 == m0
+ // L1 == t1
+ // L3 == b1
+ // LP2 == m2
+
+ for (; width > 7; width -= 8) {
+ movq_m2r (*t1, mm1); // L1
+ movq_m2r (*m0, mm2); // L2
+ movq_m2r (*b1, mm3); // L3
+ movq_m2r (*m2, mm0); // LP2
+
+ // average L1 and L3 leave result in mm4
+ movq_r2r (mm1, mm4); // L1
+ movq_r2r (mm3, mm5); // L3
+ psrlw_i2r (1, mm4); // L1/2
+ psrlw_i2r (1, mm5); // L3/2
+ paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
+
+ // get abs value of possible L2 comb
+ movq_r2r (mm2, mm7); // L2
+ psubusb_r2r (mm4, mm7); // L2 - avg
+ movq_r2r (mm4, mm5); // avg
+ psubusb_r2r (mm2, mm5); // avg - L2
+ por_r2r (mm7, mm5); // abs(avg-L2)
+
+
+ // get abs value of possible LP2 comb
+ movq_r2r (mm0, mm7); // LP2
+ psubusb_r2r (mm4, mm7); // LP2 - avg
+ psubusb_r2r (mm0, mm4); // avg - LP2
+ por_r2r (mm7, mm4); // abs(avg-LP2)
+
+ // use L2 or LP2 depending upon which makes smaller comb
+ psubusb_r2r (mm5, mm4); // see if it goes to zero
+ psubusb_r2r (mm5, mm5); // 0
+ pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
+ pcmpeqb_r2r (mm4, mm5); // opposite of mm4
+
+ // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+ pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
+ pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
+ por_r2r (mm5, mm4); // may the best win
+
+ // Now lets clip our chosen value to be not outside of the range
+ // of the high/low range L1-L3 by more than abs(L1-L3)
+ // This allows some comb but limits the damages and also allows more
+ // detail than a boring oversmoothed clip.
+
+ movq_r2r (mm1, mm2); // copy L1
+ psubusb_r2r (mm3, mm2); // - L3, with saturation
+ paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
+
+ pcmpeqb_r2r (mm7, mm7); // all ffffffff
+ psubusb_r2r (mm1, mm7); // - L1
+ paddusb_r2r (mm7, mm3); // add, may sat at fff..
+ psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
+
+ // allow the value to be above the high or below the low by amt of MaxComb
+ paddusb_m2r (MaxComb, mm2); // increase max by diff
+ psubusb_m2r (MaxComb, mm3); // lower min by diff
+
+ psubusb_r2r (mm3, mm4); // best - Min
+ paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
+
+ pcmpeqb_r2r (mm7, mm7); // all ffffffff
+ psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
+ paddusb_r2r (mm7, mm2); // add may sat at FFF..
+ psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+
+ movq_r2m (mm2, *output); // move in our clipped best
+
+ // Advance to the next set of pixels.
+ output += 8;
+ m0 += 8;
+ t1 += 8;
+ b1 += 8;
+ m2 += 8;
+ }
+ emms ();
+ if (width > 0)
+ deinterlace_greedy_packed422_scanline_c (object, m0, t1, b1, m2, output,
+ width);
+}
+
#include "sse.h"
static void
@@ -214,6 +315,9 @@ deinterlace_greedy_packed422_scanline (GstDeinterlace2 * object,
if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) {
deinterlace_greedy_packed422_scanline_mmxext (object, data->m0, data->t1,
data->b1, data->m2, output, 2 * object->frame_width);
+ } else if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMX) {
+ deinterlace_greedy_packed422_scanline_mmx (object, data->m0, data->t1,
+ data->b1, data->m2, output, 2 * object->frame_width);
} else {
deinterlace_greedy_packed422_scanline_c (object, data->m0, data->t1,
data->b1, data->m2, output, 2 * object->frame_width);