summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc38
-rw-r--r--gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc72
-rw-r--r--gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc101
-rw-r--r--gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc93
5 files changed, 206 insertions, 108 deletions
diff --git a/ChangeLog b/ChangeLog
index a2c53137..cdc3d14e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-08-26 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+
+ * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
+ * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
+ * gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
+ * gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
+ Unroll the loop to handle two bytes at once. This should give
+ a small speedup and makes it possible to handle chroma and luma
+ different which is needed later.
+
2008-08-26 Edward Hervey <edward.hervey@collabora.co.uk>
* gst/dccp/gstdccpserversink.c:
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc
index ce6d2534..e1560353 100644
--- a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc
+++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc
@@ -114,27 +114,39 @@
return 0;
#else
#ifdef SKIP_SEARCH
- out = best; // just use the results of our wierd bob
+ out[0] = best[0]; // just use the results of our wierd bob
+ out[1] = best[1];
#else
- diff = diff - MIN (diff, 10) - 4;
- if (diff < 0)
- out = weave;
+ diff[0] = diff[0] - MIN (diff[0], 10) - 4;
+ diff[1] = diff[1] - MIN (diff[1] - 10) - 4;
+ if (diff[0] < 0)
+ out[0] = weave[0];
else
- out = best;
+ out[0] = best[0];
+
+ if (diff[1] < 0)
+ out[1] = weave[1];
+ else
+ out[1] = best[1];
+
- out = CLAMP (out, MinVals, MaxVals);
+ out[0] = CLAMP (out[0], MinVals[0], MaxVals[0]);
+ out[1] = CLAMP (out[1], MinVals[1], MaxVals[1]);
#endif
#ifdef USE_VERTICAL_FILTER
- pDest[x] = (out + pBob[0]) / 2;
- pDest[x + dst_pitchw] = (pBob[src_pitch2] + out) / 2;
+ pDest[x] = (out[0] + pBob[0]) / 2;
+ pDest[x + dst_pitchw] = (pBob[src_pitch2] + out[0]) / 2;
+ pDest[x + 1] = (out[1] + pBob[1]) / 2;
+ pDest[x + 1 + dst_pitchw] = (pBob[src_pitch2 + 1] + out[1]) / 2;
#else
- pDest[x] = out;
+ pDest[x] = out[0];
+ pDest[x+1] = out[1];
#endif
- pBob += 1;
- pBobP += 1;
- pSrc += 1;
- pSrcP += 1;
+ pBob += 2;
+ pBobP += 2;
+ pSrc += 2;
+ pSrcP += 2;
}
// adjust for next line
pSrc = src_pitch2 * (y+1) + pWeaveSrc;
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
index 9f42650b..9d6a490f 100644
--- a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
+++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
@@ -6,29 +6,6 @@ const unsigned char* pSrc;
const unsigned char* pBob;
const unsigned char* pBobP;
-#ifndef IS_C
-
-int64_t Max_Mov = 0x0404040404040404ull;
-int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
-int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
-int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma
-int64_t TENS = 0x0a0a0a0a0a0a0a0aull;
-int64_t FOURS = 0x0404040404040404ull;
-int64_t ONES = 0x0101010101010101ull;
-int64_t Min_Vals = 0x0000000000000000ull;
-int64_t Max_Vals = 0x0000000000000000ull;
-int64_t ShiftMask = 0xfefffefffefffeffull;
-
-long oldbx;
-
-#else
-
-#ifdef USE_STRANGE_BOB
-int64_t DiffThres = 0x0f;
-#endif
-
-#endif
-
// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
// saves a lot of xor's to delete 64bit garbage.
@@ -40,23 +17,10 @@ long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avi
long dst_pitch2 = 2 * dst_pitch;
-#ifdef IS_C
+long y;
-long x,best,diff,avg,diff2,out;
-#endif
-long y;
-
-#if defined(IS_SSE2)
-long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2
-#elif defined(IS_C)
-long Last8 = (rowsize-4); // ofs to last two pixel in row
-#else
-long Last8 = (rowsize-8); // ofs to last 8 bytes in row
-#endif
+long Last8;
-#ifndef IS_C
-long dst_pitchw = dst_pitch; // local stor so asm can ref
-#endif
pSrc = pWeaveSrc; // points 1 weave line above
pSrcP = pWeaveSrcP; // "
@@ -112,9 +76,24 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
#define _YMask "%17"
#define _oldbx "%18"
#endif
+ Last8 = (rowsize-8);
for (y=1; y < FldHeight-1; y++)
- {
+ {
+ long dst_pitchw = dst_pitch; // local stor so asm can ref
+ int64_t Max_Mov = 0x0404040404040404ull;
+ int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
+ int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
+ int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma
+ int64_t TENS = 0x0a0a0a0a0a0a0a0aull;
+ int64_t FOURS = 0x0404040404040404ull;
+ int64_t ONES = 0x0101010101010101ull;
+ int64_t Min_Vals = 0x0000000000000000ull;
+ int64_t Max_Vals = 0x0000000000000000ull;
+ int64_t ShiftMask = 0xfefffefffefffeffull;
+
+ long oldbx;
+
// pretend it's indented -->>
__asm__ __volatile__
(
@@ -206,9 +185,20 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
"pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet
#else
+ Last8 = (rowsize - 4);
for (y=1; y < FldHeight-1; y++)
{
+ #ifdef USE_STRANGE_BOB
+ long DiffThres = 0x0f;
+ #endif
+
+ #ifndef SKIP_SEARCH
+ long weave[2], MaxVals[2], MinVals[2];
+ #endif
+
+ long diff[2], best[2], avg[2], diff2[2], out[2], x;
+
#ifdef USE_VERTICAL_FILTER
pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
@@ -246,7 +236,7 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
pSrc += 4;
pSrcP += 4;
- for (x=4; x < Last8; x += 1) {
+ for (x=4; x < Last8; x += 2) {
#ifdef USE_STRANGE_BOB
#include "StrangeBob.inc"
@@ -258,7 +248,7 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
// from the current location, by rating them by the min distance
// from the Bob value instead of the avg distance from that value.
// our best and only rating so far
- diff = 255;
+ diff[0] = diff[1] = 255;
#endif
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc
index 73ce706a..45b4c865 100644
--- a/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc
+++ b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc
@@ -324,65 +324,112 @@
#else
- diff = -1;
- best = 0;
+ diff[0] = -1;
+ diff[1] = -1;
+ best[0] = 0;
+ best[1] = 0;
// j, n
if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres &&
ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) {
- best = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
- diff = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+ best[0] = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+ }
+ if (ABS (pBob[-1] - pBob[src_pitch2 - 3]) < DiffThres &&
+ ABS (pBob[-3] - pBob[src_pitch2 + 5]) > DiffThres) {
+ best[1] = (pBob[-1] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[-1] - pBob[src_pitch2 - 3]);
}
// k & m
if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres &&
ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) {
- best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
- diff = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+ best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+ }
+
+ if (ABS (pBob[3] - pBob[src_pitch2 + 5]) < DiffThres &&
+ ABS (pBob[5] - pBob[src_pitch2 - 3]) > DiffThres) {
+ best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[5] - pBob[src_pitch2 - 3]);
}
// c & d
if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres &&
ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) {
- best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
- diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+ best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+ diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+ }
+
+ if (ABS (pBob[1] - pBob[src_pitch2 + 3]) < DiffThres &&
+ ABS (pBob[3] - pBob[src_pitch2 - 1]) > DiffThres) {
+ best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+ diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
}
// a & f
if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres &&
ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) {
- best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
- diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+ best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+ diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+ }
+
+ if (ABS (pBob[1] - pBob[src_pitch2 - 1]) < DiffThres &&
+ ABS (pBob[-1] - pBob[src_pitch2 + 3]) > DiffThres) {
+ best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+ diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
}
// b,e
if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) {
- best = (pBob[0] + pBob[src_pitch2]) / 2;
- diff = ABS (pBob[0] - pBob[src_pitch2]);
+ best[0] = (pBob[0] + pBob[src_pitch2]) / 2;
+ diff[0] = ABS (pBob[0] - pBob[src_pitch2]);
}
+ if (ABS (pBob[1] - pBob[src_pitch2 + 1]) < DiffThres) {
+ best[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+ diff[1] = ABS (pBob[1] - pBob[src_pitch2 + 1]);
+ }
+
+
// We will also calc here the max/min values to later limit comb
// so the max excursion will not exceed the Max_Comb constant
#ifdef SKIP_SEARCH
- best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
#else
- mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
-
- MinVals = 0;
- MaxVals = 255;
- if (mov > DiffThres) {
- MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
- MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
+ mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+ mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
+
+ MinVals[0] = 0;
+ MinVals[1] = 0;
+ MaxVals[0] = 255;
+ MaxVals[1] = 255;
+ if (mov[0] > DiffThres) {
+ MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+ MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+ }
+
+ if (mov[1] > DiffThres) {
+ MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2+1]), best[1]);
+ MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2+1]), best[1]);
}
- best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
#endif
+ avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+ avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+ diff2[0] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+ diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+ if (diff[0] == -1 || diff2[0] < diff[0]) {
+ best[0] = avg[0];
+ diff[0] = diff2[0];
+ }
- avg = (pBob[src_pitch2] + pBob[0]) / 2;
- diff2 = ABS (pBob[src_pitch2] - pBob[0]);
-
- if (diff == -1 || diff2 < diff) {
- best = avg;
- diff = diff2;
+ if (diff[1] == -1 || diff2[1] < diff[1]) {
+ best[1] = avg[1];
+ diff[1] = diff2[1];
}
#endif
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc
index 6cbd1b8d..f4bbb830 100644
--- a/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc
+++ b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc
@@ -192,56 +192,95 @@
#else
// a,f
- best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
- diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+ best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+ diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+ best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+ diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
// c,d
- if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff) {
- best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
- diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+ if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff[0]) {
+ best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+ diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+ }
+
+ if (ABS (pBob[3] - pBob[src_pitch2 - 1]) < diff[1]) {
+ best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+ diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
}
// j,n
- if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff) {
- best = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
- diff = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+ if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff[0]) {
+ best[0] = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
+ diff[0] = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+ }
+
+ if (ABS (pBob[-3] - pBob[src_pitch2 + 5]) < diff[1]) {
+ best[1] = (pBob[-3] + pBob[src_pitch2 + 5]) / 2;
+ diff[1] = ABS (pBob[-3] - pBob[src_pitch2 + 5]);
}
// k,m
- if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
- best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
- diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+ if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+ best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
}
+ if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+ best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
+ }
// k,m
- if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
- best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
- diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+ if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+ best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+ }
+
+ if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+ best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
}
// We will also calc here the max/min values to later limit comb
// so the max excursion will not exceed the Max_Comb constant
#ifdef SKIP_SEARCH
- best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
#else
- mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+ mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+ mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
- MinVals = 0;
- MaxVals = 255;
- if (mov > Max_Mov) {
- MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
- MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
+ MinVals[0] = 0;
+ MinVals[1] = 0;
+ MaxVals[0] = 255;
+ MaxVals[1] = 255;
+
+ if (mov[0] > Max_Mov[0]) {
+ MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+ MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+ }
+
+ if (mov[1] > Max_Mov[1]) {
+ MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2 + 1]), best[1]);
+ MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2 + 1]), best[1]);
}
- best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
#endif
- avg = (pBob[src_pitch2] + pBob[0]) / 2;
- diff2 = ABS (pBob[src_pitch2] - pBob[0]);
+ avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+ avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+ diff2[0] = ABS (pBob[src_pitch2] - pBob[0]);
+ diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+ if (diff2[0] < diff[0]) {
+ best[0] = avg[0];
+ diff[0] = diff2[0];
+ }
- if (diff2 < diff) {
- best = avg;
- diff = diff2;
+ if (diff2[1] < diff[1]) {
+ best[1] = avg[1];
+ diff[1] = diff2[1];
}
#endif