From 4f836b7732ccaf0421081cbe4198b97dabd60513 Mon Sep 17 00:00:00 2001
From: Martin Eikermann <meiker@upb.de>
Date: Wed, 11 Jun 2008 11:12:49 +0000
Subject: gst/deinterlace2/: Add a deinterlacer plugin based on the
 tvtime/DScaler deinterlacer, which was relicensed to LGPL f...

Original commit message from CVS:
Based on a patch by: Martin Eikermann <meiker at upb dot de>
* gst/deinterlace2/Makefile.am:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace2_method_get_type),
(gst_deinterlace2_fields_get_type),
(gst_deinterlace2_field_layout_get_type),
(gst_deinterlace2_base_init), (gst_deinterlace2_class_init),
(gst_deinterlace2_init), (gst_deinterlace2_set_method),
(gst_deinterlace2_set_property), (gst_deinterlace2_get_property),
(gst_deinterlace2_finalize), (gst_deinterlace2_pop_history),
(gst_deinterlace2_head_history), (gst_deinterlace2_push_history),
(gst_deinterlace2_deinterlace_scanlines), (gst_deinterlace2_chain),
(gst_deinterlace2_setcaps), (gst_deinterlace2_sink_event),
(gst_deinterlace2_change_state), (gst_deinterlace2_src_event),
(gst_deinterlace2_src_query), (gst_deinterlace2_src_query_types),
(plugin_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/greedy.c: (copy_scanline),
(deinterlace_greedy_packed422_scanline_mmxext),
(dscaler_greedyl_get_method):
* gst/deinterlace2/tvtime/greedyh.asm:
* gst/deinterlace2/tvtime/greedyh.c:
(deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method),
(greedyh_init), (greedyh_filter_mmx), (greedyh_filter_3dnow),
(greedyh_filter_sse):
* gst/deinterlace2/tvtime/greedyh.h:
* gst/deinterlace2/tvtime/greedyhmacros.h:
* gst/deinterlace2/tvtime/mmx.h:
* gst/deinterlace2/tvtime/plugins.h:
* gst/deinterlace2/tvtime/speedtools.h:
* gst/deinterlace2/tvtime/speedy.c: (multiply_alpha), (clip255),
(comb_factor_packed422_scanline_mmx),
(diff_factor_packed422_scanline_c),
(diff_factor_packed422_scanline_mmx),
(diff_packed422_block8x8_mmx), (diff_packed422_block8x8_c),
(packed444_to_packed422_scanline_c),
(packed422_to_packed444_scanline_c),
(packed422_to_packed444_rec601_scanline_c),
(vfilter_chroma_121_packed422_scanline_mmx),
(vfilter_chroma_121_packed422_scanline_c),
(vfilter_chroma_332_packed422_scanline_mmx),
(vfilter_chroma_332_packed422_scanline_c),
(kill_chroma_packed422_inplace_scanline_mmx),
(kill_chroma_packed422_inplace_scanline_c),
(invert_colour_packed422_inplace_scanline_mmx),
(invert_colour_packed422_inplace_scanline_c),
(mirror_packed422_inplace_scanline_c),
(interpolate_packed422_scanline_c),
(convert_uyvy_to_yuyv_scanline_mmx),
(convert_uyvy_to_yuyv_scanline_c),
(interpolate_packed422_scanline_mmx),
(interpolate_packed422_scanline_mmxext),
(blit_colour_packed422_scanline_c),
(blit_colour_packed422_scanline_mmx),
(blit_colour_packed422_scanline_mmxext),
(blit_colour_packed4444_scanline_c),
(blit_colour_packed4444_scanline_mmx),
(blit_colour_packed4444_scanline_mmxext), (small_memcpy),
(speedy_memcpy_c), (speedy_memcpy_mmx), (speedy_memcpy_mmxext),
(blit_packed422_scanline_c), (blit_packed422_scanline_mmx),
(blit_packed422_scanline_mmxext),
(composite_colour4444_alpha_to_packed422_scanline_c),
(composite_colour4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_alpha_to_packed422_scanline_c),
(composite_packed4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_to_packed422_scanline_c),
(composite_packed4444_to_packed422_scanline_mmxext),
(composite_alphamask_to_packed4444_scanline_c),
(composite_alphamask_to_packed4444_scanline_mmxext),
(composite_alphamask_alpha_to_packed4444_scanline_c),
(premultiply_packed4444_scanline_c),
(premultiply_packed4444_scanline_mmxext),
(blend_packed422_scanline_c), (blend_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_c),
(subpix_blit_vertical_packed422_scanline_c),
(a8_subpix_blit_scanline_c), (myround), (init_RGB_to_YCbCr_tables),
(init_YCbCr_to_RGB_tables), (rgb24_to_packed444_rec601_scanline_c),
(rgba32_to_packed4444_rec601_scanline_c),
(packed444_to_rgb24_rec601_scanline_c),
(packed444_to_nonpremultiplied_packed4444_scanline_c),
(aspect_adjust_packed4444_scanline_c), (setup_speedy_calls),
(speedy_get_accel):
* gst/deinterlace2/tvtime/speedy.h:
* gst/deinterlace2/tvtime/sse.h:
* gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy),
(deinterlace_frame_di_tomsmocomp), (dscaler_tomsmocomp_get_method),
(tomsmocomp_init), (tomsmocomp_filter_mmx),
(tomsmocomp_filter_3dnow), (tomsmocomp_filter_sse):
* gst/deinterlace2/tvtime/tomsmocomp.h:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/vfir.c: (deinterlace_line),
(deinterlace_scanline_vfir), (copy_scanline),
(dscaler_vfir_get_method):
* gst/deinterlace2/tvtime/x86-64_macros.inc:
Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer,
which was relicensed to LGPL for GStreamer and in theory provides
better and faster results than the simple deinterlace element.
Fixes bug #163578.
Ported to GStreamer 0.10 but still not enabled or included in the
build system by default because of bad artefacts caused by a bug
somewhere and as it can be only build on x86/amd64 ATM and requires
special CFLAGS. Will be fixed soon.
---
 .../tvtime/tomsmocomp/SearchLoopTop.inc            | 193 +++++++++++++++++++++
 1 file changed, 193 insertions(+)
 create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc

(limited to 'gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc')
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
new file mode 100644
index 00000000..7560f404
--- /dev/null
+++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
@@ -0,0 +1,193 @@
+// -*- c++ -*-
+
+unsigned char* pDest;
+const unsigned char* pSrcP;
+const unsigned char* pSrc;
+const unsigned char* pBob;
+const unsigned char* pBobP;
+
+int64_t Max_Mov   = 0x0404040404040404ull; 
+int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
+int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
+int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
+int64_t TENS      = 0x0a0a0a0a0a0a0a0aull; 
+int64_t FOURS     = 0x0404040404040404ull; 
+int64_t ONES      = 0x0101010101010101ull; 
+int64_t Min_Vals  = 0x0000000000000000ull;
+int64_t Max_Vals  = 0x0000000000000000ull;
+int64_t ShiftMask = 0xfefffefffefffeffull;
+
+// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
+// saves a lot of xor's to delete 64bit garbage.
+
+#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
+long	    src_pitch2 = src_pitch;			// even & odd lines are not interleaved in DScaler
+#else
+long	    src_pitch2 = 2 * src_pitch;		// even & odd lines are interleaved in Avisynth
+#endif
+
+
+long	    dst_pitch2 = 2 * dst_pitch;
+long     y;
+
+#ifdef IS_SSE2
+long     Last8 = (rowsize-16);			// ofs to last 16 bytes in row for SSE2
+#else
+long     Last8 = (rowsize-8);			// ofs to last 8 bytes in row
+#endif
+
+long		dst_pitchw = dst_pitch; // local stor so asm can ref
+	pSrc  = pWeaveSrc;			// points 1 weave line above
+	pSrcP = pWeaveSrcP;			// " 
+
+#ifdef DBL_RESIZE
+	        
+#ifdef USE_VERTICAL_FILTER
+	pDest = pWeaveDest + dst_pitch2;
+#else
+	pDest = pWeaveDest + 3*dst_pitch;
+#endif
+
+#else
+
+#ifdef USE_VERTICAL_FILTER
+	pDest = pWeaveDest + dst_pitch;
+#else
+	pDest = pWeaveDest + dst_pitch2;
+#endif
+
+#endif
+
+	if (TopFirst)
+	{
+		pBob = pCopySrc + src_pitch2;      // remember one weave line just copied previously
+		pBobP = pCopySrcP + src_pitch2;
+	}
+	else
+	{
+		pBob =  pCopySrc;
+		pBobP =  pCopySrcP;
+	}
+
+#ifndef _pBob
+#define _pBob       "%0"
+#define _src_pitch2 "%1"
+#define _ShiftMask  "%2"
+#define _pDest      "%3"
+#define _dst_pitchw "%4"
+#define _Last8      "%5"
+#define _pSrc       "%6"
+#define _pSrcP      "%7"
+#define _pBobP      "%8"
+#define _DiffThres  "%9"
+#define _Min_Vals   "%10"
+#define _Max_Vals   "%11"
+#define _FOURS      "%12"
+#define _TENS       "%13"
+#define _ONES       "%14"
+#define _UVMask     "%15"
+#define _Max_Mov    "%16"
+#define _YMask      "%17"
+#define _oldbx      "%18"
+#endif
+
+        long oldbx;
+
+	for (y=1; y < FldHeight-1; y++)	
+	{
+		// pretend it's indented -->>
+        __asm__ __volatile__
+            (
+             // Loop general reg usage
+             //
+             // XAX - pBobP, then pDest 
+             // XBX - pBob
+             // XCX - src_pitch2
+             // XDX - current offset
+             // XDI - prev weave pixels, 1 line up
+             // XSI - next weave pixels, 1 line up
+
+             // Save "XBX" (-fPIC)
+	     MOVX" %%"XBX", "_oldbx"\n\t"
+             
+#ifdef IS_SSE2
+             
+             // sse2 code deleted for now
+
+#else
+             // simple bob first 8 bytes
+             MOVX"	"_pBob",        %%"XBX"\n\t"
+             MOVX"	"_src_pitch2",  %%"XCX"\n\t"
+
+#ifdef USE_VERTICAL_FILTER
+             "movq	    (%%"XBX"),        %%mm0\n\t"
+             "movq	    (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
+             "movq	    %%mm0,          %%mm2\n\t"
+             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)		// halfway between
+             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)		// 1/4 way
+             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)		// 3/4 way
+             MOVX"		"_pDest",       %%"XDI"\n\t"
+             MOVX"		"_dst_pitchw",  %%"XAX"\n\t"
+             V_MOVNTQ	("(%%"XDI")", "%%mm0")
+             V_MOVNTQ	("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
+
+             // simple bob last 8 bytes
+             MOVX"		"_Last8", %%"XDX"\n\t"
+             LEAX"		(%%"XBX", %%"XDX"), %%"XSI"\n\t"  // ["XBX"+"XDX"]
+             "movq	    (%%"XSI"), %%mm0\n\t"
+             "movq	    (%%"XSI", %%"XCX"), %%mm1\n\t"    // qword ptr["XSI"+"XCX"]
+             "movq	    %%mm0, %%mm2\n\t"
+             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)		// halfway between
+             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)		// 1/4 way
+             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)		// 3/4 way
+             ADDX"		%%"XDX", %%"XDI"\n\t"						// last 8 bytes of dest
+             V_MOVNTQ	("%%"XDI"", "%%mm0")
+             V_MOVNTQ	("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
+
+#else
+             "movq	(%%"XBX"), %%mm0\n\t"
+             //		pavgb	mm0, qword ptr["XBX"+"XCX"]
+             V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
+             MOVX"		"_pDest", %%"XDI"\n\t"
+             V_MOVNTQ	("(%%"XDI")", "%%mm0")
+
+             // simple bob last 8 bytes
+             MOVX"		"_Last8", %%"XDX"\n\t"
+             LEAX"		(%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
+             "movq	    (%%"XSI"), %%mm0\n\t"
+             //		pavgb	mm0, qword ptr["XSI"+"XCX"]
+             V_PAVGB	("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
+             V_MOVNTQ	("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
+#endif
+             // now loop and get the middle qwords
+             MOVX"		"_pSrc", %%"XSI"\n\t"
+             MOVX"		"_pSrcP", %%"XDI"\n\t"
+             MOVX"		$8, %%"XDX"\n\t"				// curr offset longo all lines
+
+             "1:\n\t"	
+             MOVX"		"_pBobP", %%"XAX"\n\t"
+             ADDX"		$8, %%"XDI"\n\t"
+             ADDX"		$8, %%"XSI"\n\t"
+             ADDX"		$8, %%"XBX"\n\t"
+             ADDX"		%%"XDX", %%"XAX"\n\t"
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+             // For non-SSE2:
+             // through out most of the rest of this loop we will maintain
+             //	mm4		our min bob value
+             //	mm5		best weave pixels so far
+             // mm6		our max Bob value 
+             //	mm7		best weighted pixel ratings so far
+             
+             // We will keep a slight bias to using the weave pixels
+             // from the current location, by rating them by the min distance
+             // from the Bob value instead of the avg distance from that value.
+             // our best and only rating so far
+             "pcmpeqb	%%mm7, %%mm7\n\t"			// ffff, say we didn't find anything good yet
+
+#endif
-- 
cgit v1.2.1