summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Dröge <slomo@circular-chaos.org>2008-06-24 10:12:08 +0000
committerSebastian Dröge <slomo@circular-chaos.org>2008-06-24 10:12:08 +0000
commit501437acf936ca3f3f5dd4fa2210d2d272d18709 (patch)
tree6abbce5f3814076a4e396f9725b27d99dbfd8a4a
parente02d4969cb8e4f6a1ec178ebdb8a9708b857c883 (diff)
downloadgst-plugins-bad-501437acf936ca3f3f5dd4fa2210d2d272d18709.tar.gz
gst-plugins-bad-501437acf936ca3f3f5dd4fa2210d2d272d18709.tar.bz2
gst-plugins-bad-501437acf936ca3f3f5dd4fa2210d2d272d18709.zip
gst/deinterlace2/tvtime/greedy.c: Add plain MMX implementation for the greedyl method.
Original commit message from CVS: * gst/deinterlace2/tvtime/greedy.c: (deinterlace_greedy_packed422_scanline_mmx), (deinterlace_greedy_packed422_scanline): Add plain MMX implementation for the greedyl method.
-rw-r--r--ChangeLog7
-rw-r--r--gst/deinterlace2/tvtime/greedy.c104
2 files changed, 111 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 81c7d765..eda3bad3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2008-06-24 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ * gst/deinterlace2/tvtime/greedy.c:
+ (deinterlace_greedy_packed422_scanline_mmx),
+ (deinterlace_greedy_packed422_scanline):
+ Add plain MMX implementation for the greedyl method.
+
+2008-06-24 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+
* gst/deinterlace2/Makefile.am:
Move the assembly includes to noinst_HEADERS where they belong.
diff --git a/gst/deinterlace2/tvtime/greedy.c b/gst/deinterlace2/tvtime/greedy.c
index 66b8799d..e1a9b1cf 100644
--- a/gst/deinterlace2/tvtime/greedy.c
+++ b/gst/deinterlace2/tvtime/greedy.c
@@ -111,6 +111,107 @@ deinterlace_greedy_packed422_scanline_c (GstDeinterlace2 * object,
#ifdef HAVE_CPU_I386
#include "mmx.h"
+static void
+deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
+ uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2, uint8_t * output,
+ int width)
+{
+ mmx_t MaxComb;
+
+ // How badly do we let it weave? 0-255
+ MaxComb.ub[0] = GreedyMaxComb;
+ MaxComb.ub[1] = GreedyMaxComb;
+ MaxComb.ub[2] = GreedyMaxComb;
+ MaxComb.ub[3] = GreedyMaxComb;
+ MaxComb.ub[4] = GreedyMaxComb;
+ MaxComb.ub[5] = GreedyMaxComb;
+ MaxComb.ub[6] = GreedyMaxComb;
+ MaxComb.ub[7] = GreedyMaxComb;
+
+ // L2 == m0
+ // L1 == t1
+ // L3 == b1
+ // LP2 == m2
+
+ for (; width > 7; width -= 8) {
+ movq_m2r (*t1, mm1); // L1
+ movq_m2r (*m0, mm2); // L2
+ movq_m2r (*b1, mm3); // L3
+ movq_m2r (*m2, mm0); // LP2
+
+ // average L1 and L3 leave result in mm4
+ movq_r2r (mm1, mm4); // L1
+ movq_r2r (mm3, mm5); // L3
+ psrlw_i2r (1, mm4); // L1/2
+ psrlw_i2r (1, mm5); // L3/2
+ paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
+
+ // get abs value of possible L2 comb
+ movq_r2r (mm2, mm7); // L2
+ psubusb_r2r (mm4, mm7); // L2 - avg
+ movq_r2r (mm4, mm5); // avg
+ psubusb_r2r (mm2, mm5); // avg - L2
+ por_r2r (mm7, mm5); // abs(avg-L2)
+
+
+ // get abs value of possible LP2 comb
+ movq_r2r (mm0, mm7); // LP2
+ psubusb_r2r (mm4, mm7); // LP2 - avg
+ psubusb_r2r (mm0, mm4); // avg - LP2
+ por_r2r (mm7, mm4); // abs(avg-LP2)
+
+ // use L2 or LP2 depending upon which makes smaller comb
+ psubusb_r2r (mm5, mm4); // see if it goes to zero
+ psubusb_r2r (mm5, mm5); // 0
+ pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
+ pcmpeqb_r2r (mm4, mm5); // opposite of mm4
+
+ // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+ pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
+ pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
+ por_r2r (mm5, mm4); // may the best win
+
+ // Now lets clip our chosen value to be not outside of the range
+ // of the high/low range L1-L3 by more than abs(L1-L3)
+ // This allows some comb but limits the damages and also allows more
+ // detail than a boring oversmoothed clip.
+
+ movq_r2r (mm1, mm2); // copy L1
+ psubusb_r2r (mm3, mm2); // - L3, with saturation
+ paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
+
+ pcmpeqb_r2r (mm7, mm7); // all ffffffff
+ psubusb_r2r (mm1, mm7); // - L1
+ paddusb_r2r (mm7, mm3); // add, may sat at fff..
+ psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
+
+ // allow the value to be above the high or below the low by amt of MaxComb
+ paddusb_m2r (MaxComb, mm2); // increase max by diff
+ psubusb_m2r (MaxComb, mm3); // lower min by diff
+
+ psubusb_r2r (mm3, mm4); // best - Min
+ paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
+
+ pcmpeqb_r2r (mm7, mm7); // all ffffffff
+ psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
+ paddusb_r2r (mm7, mm2); // add may sat at FFF..
+ psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+
+ movq_r2m (mm2, *output); // move in our clipped best
+
+ // Advance to the next set of pixels.
+ output += 8;
+ m0 += 8;
+ t1 += 8;
+ b1 += 8;
+ m2 += 8;
+ }
+ emms ();
+ if (width > 0)
+ deinterlace_greedy_packed422_scanline_c (object, m0, t1, b1, m2, output,
+ width);
+}
+
#include "sse.h"
static void
@@ -214,6 +315,9 @@ deinterlace_greedy_packed422_scanline (GstDeinterlace2 * object,
if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) {
deinterlace_greedy_packed422_scanline_mmxext (object, data->m0, data->t1,
data->b1, data->m2, output, 2 * object->frame_width);
+ } else if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMX) {
+ deinterlace_greedy_packed422_scanline_mmx (object, data->m0, data->t1,
+ data->b1, data->m2, output, 2 * object->frame_width);
} else {
deinterlace_greedy_packed422_scanline_c (object, data->m0, data->t1,
data->b1, data->m2, output, 2 * object->frame_width);