summaryrefslogtreecommitdiffstats
path: root/gst/deinterlace2/tvtime/greedy.c
diff options
context:
space:
mode:
Diffstat (limited to 'gst/deinterlace2/tvtime/greedy.c')
-rw-r--r--gst/deinterlace2/tvtime/greedy.c158
1 files changed, 71 insertions, 87 deletions
diff --git a/gst/deinterlace2/tvtime/greedy.c b/gst/deinterlace2/tvtime/greedy.c
index c25af036..66b8799d 100644
--- a/gst/deinterlace2/tvtime/greedy.c
+++ b/gst/deinterlace2/tvtime/greedy.c
@@ -60,26 +60,65 @@ copy_scanline (GstDeinterlace2 * object,
blit_packed422_scanline (output, data->m1, object->frame_width);
}
-static int GreedyMaxComb = 15;
+static const int GreedyMaxComb = 15;
-#ifdef HAVE_CPU_I386
-#include "mmx.h"
-#include "sse.h"
-static void
-deinterlace_greedy_packed422_scanline_sse (GstDeinterlace2 * object,
- deinterlace_scanline_data_t * data, uint8_t * output)
+static inline void
+deinterlace_greedy_packed422_scanline_c (GstDeinterlace2 * object,
+ uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2, uint8_t * output,
+ int width)
{
- mmx_t MaxComb;
+ int avg, l2_diff, lp2_diff, max, min, best;
+
+ // L2 == m0
+ // L1 == t1
+ // L3 == b1
+ // LP2 == m2
+
+ while (width--) {
+ avg = (*t1 + *b1) / 2;
+
+ l2_diff = ABS (*m0 - avg);
+ lp2_diff = ABS (*m2 - avg);
+
+ if (l2_diff > lp2_diff)
+ best = *m2;
+ else
+ best = *m0;
+
+ max = MAX (*t1, *b1);
+ min = MIN (*t1, *b1);
+
+ if (max < 256 - GreedyMaxComb)
+ max += GreedyMaxComb;
+ else
+ max = 255;
- uint8_t *m0 = data->m0;
+ if (min > GreedyMaxComb)
+ min -= GreedyMaxComb;
+ else
+ min = 0;
- uint8_t *t1 = data->t1;
+ *output = CLAMP (best, min, max);
- uint8_t *b1 = data->b1;
+ // Advance to the next set of pixels.
+ output += 1;
+ m0 += 1;
+ t1 += 1;
+ b1 += 1;
+ m2 += 1;
+ }
+}
- uint8_t *m2 = data->m2;
+#ifdef HAVE_CPU_I386
+#include "mmx.h"
+#include "sse.h"
- int width = object->frame_width;
+static void
+deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
+ uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2, uint8_t * output,
+ int width)
+{
+ mmx_t MaxComb;
// How badly do we let it weave? 0-255
MaxComb.ub[0] = GreedyMaxComb;
@@ -96,8 +135,7 @@ deinterlace_greedy_packed422_scanline_sse (GstDeinterlace2 * object,
// L3 == b1
// LP2 == m2
- width /= 4;
- while (width--) {
+ for (; width > 7; width -= 8) {
movq_m2r (*t1, mm1); // L1
movq_m2r (*m0, mm2); // L2
movq_m2r (*b1, mm3); // L3
@@ -107,15 +145,12 @@ deinterlace_greedy_packed422_scanline_sse (GstDeinterlace2 * object,
movq_r2r (mm1, mm4); // L1
pavgb_r2r (mm3, mm4); // (L1 + L3)/2
-
// get abs value of possible L2 comb
movq_r2r (mm2, mm7); // L2
psubusb_r2r (mm4, mm7); // L2 - avg
movq_r2r (mm4, mm5); // avg
psubusb_r2r (mm2, mm5); // avg - L2
por_r2r (mm7, mm5); // abs(avg-L2)
- movq_r2r (mm4, mm6); // copy of avg for later
-
// get abs value of possible LP2 comb
movq_r2r (mm0, mm7); // LP2
@@ -125,7 +160,7 @@ deinterlace_greedy_packed422_scanline_sse (GstDeinterlace2 * object,
// use L2 or LP2 depending upon which makes smaller comb
psubusb_r2r (mm5, mm4); // see if it goes to zero
- psubusb_r2r (mm5, mm5); // 0
+ pxor_r2r (mm5, mm5); // 0
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
@@ -140,27 +175,19 @@ deinterlace_greedy_packed422_scanline_sse (GstDeinterlace2 * object,
// detail than a boring oversmoothed clip.
movq_r2r (mm1, mm2); // copy L1
- psubusb_r2r (mm3, mm2); // - L3, with saturation
- paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
+ pmaxub_r2r (mm3, mm2); // now = Max(L1,L3)
- pcmpeqb_r2r (mm7, mm7); // all ffffffff
- psubusb_r2r (mm1, mm7); // - L1
- paddusb_r2r (mm7, mm3); // add, may sat at fff..
- psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
+ pminub_r2r (mm1, mm3); // now = Min(L1,L3)
// allow the value to be above the high or below the low by amt of MaxComb
paddusb_m2r (MaxComb, mm2); // increase max by diff
psubusb_m2r (MaxComb, mm3); // lower min by diff
- psubusb_r2r (mm3, mm4); // best - Min
- paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
- pcmpeqb_r2r (mm7, mm7); // all ffffffff
- psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
- paddusb_r2r (mm7, mm2); // add may sat at FFF..
- psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+ pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
+ pminub_r2r (mm4, mm2); // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped
- movntq_r2m (mm2, *output); // move in our clipped best
+ movq_r2m (mm2, *output); // move in our clipped best
// Advance to the next set of pixels.
output += 8;
@@ -171,72 +198,29 @@ deinterlace_greedy_packed422_scanline_sse (GstDeinterlace2 * object,
}
sfence ();
emms ();
-}
-#endif
-
-static void
-deinterlace_greedy_packed422_scanline_c (GstDeinterlace2 * object,
- deinterlace_scanline_data_t * data, uint8_t * output)
-{
- uint8_t *m0 = data->m0;
-
- uint8_t *t1 = data->t1;
-
- uint8_t *b1 = data->b1;
-
- uint8_t *m2 = data->m2;
-
- int width = 2 * object->frame_width;
-
- uint16_t avg, l2_diff, lp2_diff, max, min, best;
-
- // L2 == m0
- // L1 == t1
- // L3 == b1
- // LP2 == m2
-
- while (width--) {
- avg = (*t1 + *b1) / 2;
-
- l2_diff = ABS (*m0 - avg);
- lp2_diff = ABS (*m2 - avg);
-
- if (l2_diff > lp2_diff)
- best = *m2;
- else
- best = *m0;
-
- max = MAX (*t1, *b1);
- min = MIN (*t1, *b1);
-
- if (max < 256 - GreedyMaxComb)
- max += GreedyMaxComb;
- if (min > GreedyMaxComb)
- min -= GreedyMaxComb;
-
- *output = MIN (MAX (best, min), max);
- // Advance to the next set of pixels.
- output += 1;
- m0 += 1;
- t1 += 1;
- b1 += 1;
- m2 += 1;
- }
+ if (width > 0)
+ deinterlace_greedy_packed422_scanline_c (object, m0, t1, b1, m2, output,
+ width);
}
+#endif
+
static void
deinterlace_greedy_packed422_scanline (GstDeinterlace2 * object,
deinterlace_scanline_data_t * data, uint8_t * output)
{
#ifdef HAVE_CPU_I386
- if (object->cpu_feature_flags & OIL_IMPL_FLAG_SSE) {
- deinterlace_greedy_packed422_scanline_sse (object, data, output);
+ if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) {
+ deinterlace_greedy_packed422_scanline_mmxext (object, data->m0, data->t1,
+ data->b1, data->m2, output, 2 * object->frame_width);
} else {
- deinterlace_greedy_packed422_scanline_c (object, data, output);
+ deinterlace_greedy_packed422_scanline_c (object, data->m0, data->t1,
+ data->b1, data->m2, output, 2 * object->frame_width);
}
#else
- deinterlace_greedy_packed422_scanline_c (object, data, output);
+ deinterlace_greedy_packed422_scanline_c (object, data->m0, data->t1, data->b1,
+ data->m2, output, 2 * object->frame_width);
#endif
}