diff options
author | Martin Eikermann <meiker@upb.de> | 2008-06-11 11:12:49 +0000 |
---|---|---|
committer | Sebastian Dröge <slomo@circular-chaos.org> | 2008-06-11 11:12:49 +0000 |
commit | 4f836b7732ccaf0421081cbe4198b97dabd60513 (patch) | |
tree | 4053d5d22ae3a5788c85d39649ff313bb14b7b2b /gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc | |
parent | 70ff21342117866ee939f9f7597ee487bae31757 (diff) | |
download | gst-plugins-bad-4f836b7732ccaf0421081cbe4198b97dabd60513.tar.gz gst-plugins-bad-4f836b7732ccaf0421081cbe4198b97dabd60513.tar.bz2 gst-plugins-bad-4f836b7732ccaf0421081cbe4198b97dabd60513.zip |
gst/deinterlace2/: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer, which was relicensed to LGPL f...
Original commit message from CVS:
Based on a patch by: Martin Eikermann <meiker at upb dot de>
* gst/deinterlace2/Makefile.am:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace2_method_get_type),
(gst_deinterlace2_fields_get_type),
(gst_deinterlace2_field_layout_get_type),
(gst_deinterlace2_base_init), (gst_deinterlace2_class_init),
(gst_deinterlace2_init), (gst_deinterlace2_set_method),
(gst_deinterlace2_set_property), (gst_deinterlace2_get_property),
(gst_deinterlace2_finalize), (gst_deinterlace2_pop_history),
(gst_deinterlace2_head_history), (gst_deinterlace2_push_history),
(gst_deinterlace2_deinterlace_scanlines), (gst_deinterlace2_chain),
(gst_deinterlace2_setcaps), (gst_deinterlace2_sink_event),
(gst_deinterlace2_change_state), (gst_deinterlace2_src_event),
(gst_deinterlace2_src_query), (gst_deinterlace2_src_query_types),
(plugin_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/greedy.c: (copy_scanline),
(deinterlace_greedy_packed422_scanline_mmxext),
(dscaler_greedyl_get_method):
* gst/deinterlace2/tvtime/greedyh.asm:
* gst/deinterlace2/tvtime/greedyh.c:
(deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method),
(greedyh_init), (greedyh_filter_mmx), (greedyh_filter_3dnow),
(greedyh_filter_sse):
* gst/deinterlace2/tvtime/greedyh.h:
* gst/deinterlace2/tvtime/greedyhmacros.h:
* gst/deinterlace2/tvtime/mmx.h:
* gst/deinterlace2/tvtime/plugins.h:
* gst/deinterlace2/tvtime/speedtools.h:
* gst/deinterlace2/tvtime/speedy.c: (multiply_alpha), (clip255),
(comb_factor_packed422_scanline_mmx),
(diff_factor_packed422_scanline_c),
(diff_factor_packed422_scanline_mmx),
(diff_packed422_block8x8_mmx), (diff_packed422_block8x8_c),
(packed444_to_packed422_scanline_c),
(packed422_to_packed444_scanline_c),
(packed422_to_packed444_rec601_scanline_c),
(vfilter_chroma_121_packed422_scanline_mmx),
(vfilter_chroma_121_packed422_scanline_c),
(vfilter_chroma_332_packed422_scanline_mmx),
(vfilter_chroma_332_packed422_scanline_c),
(kill_chroma_packed422_inplace_scanline_mmx),
(kill_chroma_packed422_inplace_scanline_c),
(invert_colour_packed422_inplace_scanline_mmx),
(invert_colour_packed422_inplace_scanline_c),
(mirror_packed422_inplace_scanline_c),
(interpolate_packed422_scanline_c),
(convert_uyvy_to_yuyv_scanline_mmx),
(convert_uyvy_to_yuyv_scanline_c),
(interpolate_packed422_scanline_mmx),
(interpolate_packed422_scanline_mmxext),
(blit_colour_packed422_scanline_c),
(blit_colour_packed422_scanline_mmx),
(blit_colour_packed422_scanline_mmxext),
(blit_colour_packed4444_scanline_c),
(blit_colour_packed4444_scanline_mmx),
(blit_colour_packed4444_scanline_mmxext), (small_memcpy),
(speedy_memcpy_c), (speedy_memcpy_mmx), (speedy_memcpy_mmxext),
(blit_packed422_scanline_c), (blit_packed422_scanline_mmx),
(blit_packed422_scanline_mmxext),
(composite_colour4444_alpha_to_packed422_scanline_c),
(composite_colour4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_alpha_to_packed422_scanline_c),
(composite_packed4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_to_packed422_scanline_c),
(composite_packed4444_to_packed422_scanline_mmxext),
(composite_alphamask_to_packed4444_scanline_c),
(composite_alphamask_to_packed4444_scanline_mmxext),
(composite_alphamask_alpha_to_packed4444_scanline_c),
(premultiply_packed4444_scanline_c),
(premultiply_packed4444_scanline_mmxext),
(blend_packed422_scanline_c), (blend_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_c),
(subpix_blit_vertical_packed422_scanline_c),
(a8_subpix_blit_scanline_c), (myround), (init_RGB_to_YCbCr_tables),
(init_YCbCr_to_RGB_tables), (rgb24_to_packed444_rec601_scanline_c),
(rgba32_to_packed4444_rec601_scanline_c),
(packed444_to_rgb24_rec601_scanline_c),
(packed444_to_nonpremultiplied_packed4444_scanline_c),
(aspect_adjust_packed4444_scanline_c), (setup_speedy_calls),
(speedy_get_accel):
* gst/deinterlace2/tvtime/speedy.h:
* gst/deinterlace2/tvtime/sse.h:
* gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy),
(deinterlace_frame_di_tomsmocomp), (dscaler_tomsmocomp_get_method),
(tomsmocomp_init), (tomsmocomp_filter_mmx),
(tomsmocomp_filter_3dnow), (tomsmocomp_filter_sse):
* gst/deinterlace2/tvtime/tomsmocomp.h:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/vfir.c: (deinterlace_line),
(deinterlace_scanline_vfir), (copy_scanline),
(dscaler_vfir_get_method):
* gst/deinterlace2/tvtime/x86-64_macros.inc:
Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer,
which was relicensed to LGPL for GStreamer and in theory provides
better and faster results than the simple deinterlace element.
Fixes bug #163578.
Ported to GStreamer 0.10 but still not enabled or included in the
build system by default because of bad artefacts caused by a bug
somewhere and as it can be only build on x86/amd64 ATM and requires
special CFLAGS. Will be fixed soon.
Diffstat (limited to 'gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc')
-rw-r--r-- | gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc new file mode 100644 index 00000000..7560f404 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc @@ -0,0 +1,193 @@ +// -*- c++ -*- + +unsigned char* pDest; +const unsigned char* pSrcP; +const unsigned char* pSrc; +const unsigned char* pBob; +const unsigned char* pBobP; + +int64_t Max_Mov = 0x0404040404040404ull; +int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; +int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma +int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma +int64_t TENS = 0x0a0a0a0a0a0a0a0aull; +int64_t FOURS = 0x0404040404040404ull; +int64_t ONES = 0x0101010101010101ull; +int64_t Min_Vals = 0x0000000000000000ull; +int64_t Max_Vals = 0x0000000000000000ull; +int64_t ShiftMask = 0xfefffefffefffeffull; + +// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way +// saves a lot of xor's to delete 64bit garbage. + +#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER) +long src_pitch2 = src_pitch; // even & odd lines are not interleaved in DScaler +#else +long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avisynth +#endif + + +long dst_pitch2 = 2 * dst_pitch; +long y; + +#ifdef IS_SSE2 +long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2 +#else +long Last8 = (rowsize-8); // ofs to last 8 bytes in row +#endif + +long dst_pitchw = dst_pitch; // local stor so asm can ref + pSrc = pWeaveSrc; // points 1 weave line above + pSrcP = pWeaveSrcP; // " + +#ifdef DBL_RESIZE + +#ifdef USE_VERTICAL_FILTER + pDest = pWeaveDest + dst_pitch2; +#else + pDest = pWeaveDest + 3*dst_pitch; +#endif + +#else + +#ifdef USE_VERTICAL_FILTER + pDest = pWeaveDest + dst_pitch; +#else + pDest = pWeaveDest + dst_pitch2; +#endif + +#endif + + if (TopFirst) + { + pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously + pBobP = pCopySrcP + src_pitch2; + } + else + { + pBob = pCopySrc; + pBobP = pCopySrcP; + } + +#ifndef _pBob +#define _pBob "%0" +#define _src_pitch2 "%1" +#define _ShiftMask "%2" +#define _pDest "%3" +#define _dst_pitchw "%4" +#define _Last8 "%5" +#define _pSrc "%6" +#define _pSrcP "%7" +#define _pBobP "%8" +#define _DiffThres "%9" +#define _Min_Vals "%10" +#define _Max_Vals "%11" +#define _FOURS "%12" +#define _TENS "%13" +#define _ONES "%14" +#define _UVMask "%15" +#define _Max_Mov "%16" +#define _YMask "%17" +#define _oldbx "%18" +#endif + + long oldbx; + + for (y=1; y < FldHeight-1; y++) + { + // pretend it's indented -->> + __asm__ __volatile__ + ( + // Loop general reg usage + // + // XAX - pBobP, then pDest + // XBX - pBob + // XCX - src_pitch2 + // XDX - current offset + // XDI - prev weave pixels, 1 line up + // XSI - next weave pixels, 1 line up + + // Save "XBX" (-fPIC) + MOVX" %%"XBX", "_oldbx"\n\t" + +#ifdef IS_SSE2 + + // sse2 code deleted for now + +#else + // simple bob first 8 bytes + MOVX" "_pBob", %%"XBX"\n\t" + MOVX" "_src_pitch2", %%"XCX"\n\t" + +#ifdef USE_VERTICAL_FILTER + "movq (%%"XBX"), %%mm0\n\t" + "movq (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"] + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between + V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way + V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way + MOVX" "_pDest", %%"XDI"\n\t" + MOVX" "_dst_pitchw", %%"XAX"\n\t" + V_MOVNTQ ("(%%"XDI")", "%%mm0") + V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1 + + // simple bob last 8 bytes + MOVX" "_Last8", %%"XDX"\n\t" + LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" // ["XBX"+"XDX"] + "movq (%%"XSI"), %%mm0\n\t" + "movq (%%"XSI", %%"XCX"), %%mm1\n\t" // qword ptr["XSI"+"XCX"] + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between + V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way + V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way + ADDX" %%"XDX", %%"XDI"\n\t" // last 8 bytes of dest + V_MOVNTQ ("%%"XDI"", "%%mm0") + V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1) + +#else + "movq (%%"XBX"), %%mm0\n\t" + // pavgb mm0, qword ptr["XBX"+"XCX"] + V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask) + MOVX" "_pDest", %%"XDI"\n\t" + V_MOVNTQ ("(%%"XDI")", "%%mm0") + + // simple bob last 8 bytes + MOVX" "_Last8", %%"XDX"\n\t" + LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"] + "movq (%%"XSI"), %%mm0\n\t" + // pavgb mm0, qword ptr["XSI"+"XCX"] + V_PAVGB ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask) + V_MOVNTQ ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0) +#endif + // now loop and get the middle qwords + MOVX" "_pSrc", %%"XSI"\n\t" + MOVX" "_pSrcP", %%"XDI"\n\t" + MOVX" $8, %%"XDX"\n\t" // curr offset longo all lines + + "1:\n\t" + MOVX" "_pBobP", %%"XAX"\n\t" + ADDX" $8, %%"XDI"\n\t" + ADDX" $8, %%"XSI"\n\t" + ADDX" $8, %%"XBX"\n\t" + ADDX" %%"XDX", %%"XAX"\n\t" + +#ifdef USE_STRANGE_BOB +#include "StrangeBob.inc" +#else +#include "WierdBob.inc" +#endif + + // For non-SSE2: + // through out most of the rest of this loop we will maintain + // mm4 our min bob value + // mm5 best weave pixels so far + // mm6 our max Bob value + // mm7 best weighted pixel ratings so far + + // We will keep a slight bias to using the weave pixels + // from the current location, by rating them by the min distance + // from the Bob value instead of the avg distance from that value. + // our best and only rating so far + "pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet + +#endif |