--- /dev/null
+ /*
+ * Copyright (C) 2001 the xine project
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Deinterlace routines by Miguel Freitas
+ * based of DScaler project sources (deinterlace.sourceforge.net)
+ *
+ * Currently only available for Xv driver and MMX extensions
+ *
+ * small todo list:
+ * - implement non-MMX versions for all methods
+ * - support MMX2 instructions
+ * - move some generic code from xv driver to this file
+ * - make it also work for yuy2 frames
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "deinterlace.h"
+#include "xineutils.h"
+
+#define xine_fast_memcpy memcpy
+
+/*
+ DeinterlaceFieldBob algorithm
+ Based on Virtual Dub plugin by Gunnar Thalin
+ MMX asm version from dscaler project (deinterlace.sourceforge.net)
+ Linux version for Xine player by Miguel Freitas
+*/
+static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height )
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+ int Line;
+ uint64_t *YVal1;
+ uint64_t *YVal2;
+ uint64_t *YVal3;
+ uint64_t *Dest;
+ uint8_t* pEvenLines = psrc[0];
+ uint8_t* pOddLines = psrc[0]+width;
+ int LineLength = width;
+ int SourcePitch = width * 2;
+ int IsOdd = 1;
+ long EdgeDetect = 625;
+ long JaggieThreshold = 73;
+
+ int n;
+
+ uint64_t qwEdgeDetect;
+ uint64_t qwThreshold;
+
+ static mmx_t YMask = {ub:{0xff,0,0xff,0,0xff,0,0xff,0}};
+ static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}};
+
+ qwEdgeDetect = EdgeDetect;
+ qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16);
+ qwThreshold = JaggieThreshold;
+ qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16);
+
+
+ // copy first even line no matter what, and the first odd line if we're
+ // processing an odd field.
+ xine_fast_memcpy(pdst, pEvenLines, LineLength);
+ if (IsOdd)
+ xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength);
+
+ height = height / 2;
+ for (Line = 0; Line < height - 1; ++Line)
+ {
+ if (IsOdd)
+ {
+ YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
+ Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength);
+ }
+ else
+ {
+ YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
+ YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength);
+ }
+
+ // For ease of reading, the comments below assume that we're operating on an odd
+ // field (i.e., that bIsOdd is true). The exact same processing is done when we
+ // operate on an even field, but the roles of the odd and even fields are reversed.
+ // It's just too cumbersome to explain the algorithm in terms of "the next odd
+ // line if we're doing an odd field, or the next even line if we're doing an
+ // even field" etc. So wherever you see "odd" or "even" below, keep in mind that
+ // half the time this function is called, those words' meanings will invert.
+
+ // Copy the odd line to the overlay verbatim.
+ xine_fast_memcpy((char *)Dest + LineLength, YVal3, LineLength);
+
+ n = LineLength >> 3;
+ while( n-- )
+ {
+ movq_m2r (*YVal1++, mm0);
+ movq_m2r (*YVal2++, mm1);
+ movq_m2r (*YVal3++, mm2);
+
+ // get intensities in mm3 - 4
+ movq_r2r ( mm0, mm3 );
+ pand_m2r ( YMask, mm3 );
+ movq_r2r ( mm1, mm4 );
+ pand_m2r ( YMask, mm4 );
+ movq_r2r ( mm2, mm5 );
+ pand_m2r ( YMask, mm5 );
+
+ // get average in mm0
+ pand_m2r ( Mask, mm0 );
+ pand_m2r ( Mask, mm2 );
+ psrlw_i2r ( 01, mm0 );
+ psrlw_i2r ( 01, mm2 );
+ paddw_r2r ( mm2, mm0 );
+
+ // work out (O1 - E) * (O2 - E) / 2 - EdgeDetect * (O1 - O2) ^ 2 >> 12
+ // result will be in mm6
+
+ psrlw_i2r ( 01, mm3 );
+ psrlw_i2r ( 01, mm4 );
+ psrlw_i2r ( 01, mm5 );
+
+ movq_r2r ( mm3, mm6 );
+ psubw_r2r ( mm4, mm6 ); //mm6 = O1 - E
+
+ movq_r2r ( mm5, mm7 );
+ psubw_r2r ( mm4, mm7 ); //mm7 = O2 - E
+
+ pmullw_r2r ( mm7, mm6 ); // mm6 = (O1 - E) * (O2 - E)
+
+ movq_r2r ( mm3, mm7 );
+ psubw_r2r ( mm5, mm7 ); // mm7 = (O1 - O2)
+ pmullw_r2r ( mm7, mm7 ); // mm7 = (O1 - O2) ^ 2
+ psrlw_i2r ( 12, mm7 ); // mm7 = (O1 - O2) ^ 2 >> 12
+ pmullw_m2r ( *&qwEdgeDetect, mm7 );// mm7 = EdgeDetect * (O1 - O2) ^ 2 >> 12
+
+ psubw_r2r ( mm7, mm6 ); // mm6 is what we want
+
+ pcmpgtw_m2r ( *&qwThreshold, mm6 );
+
+ movq_r2r ( mm6, mm7 );
+
+ pand_r2r ( mm6, mm0 );
+
+ pandn_r2r ( mm1, mm7 );
+
+ por_r2r ( mm0, mm7 );
+
+ movq_r2m ( mm7, *Dest++ );
+ }
+ }
+
+ // Copy last odd line if we're processing an even field.
+ if (! IsOdd)
+ {
+ xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength,
+ pOddLines + (height - 1) * SourcePitch,
+ LineLength);
+ }
+
+ // clear out the MMX registers ready for doing floating point
+ // again
+ emms();
+#endif
+}
+
+/* Deinterlace the latest field, with a tendency to weave rather than bob.
+ Good for high detail on low-movement scenes.
+ Seems to produce bad output in general case, need to check if this
+ is normal or if the code is broken.
+*/
+static int deinterlace_weave_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height )
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+
+ int Line;
+ uint64_t *YVal1;
+ uint64_t *YVal2;
+ uint64_t *YVal3;
+ uint64_t *YVal4;
+ uint64_t *Dest;
+ uint8_t* pEvenLines = psrc[0];
+ uint8_t* pOddLines = psrc[0]+width;
+ uint8_t* pPrevLines;
+
+ int LineLength = width;
+ int SourcePitch = width * 2;
+ int IsOdd = 1;
+
+ long TemporalTolerance = 300;
+ long SpatialTolerance = 600;
+ long SimilarityThreshold = 25;
+
+ int n;
+
+ uint64_t qwSpatialTolerance;
+ uint64_t qwTemporalTolerance;
+ uint64_t qwThreshold;
+
+ static mmx_t YMask = {ub:{0xff,0,0xff,0,0xff,0,0xff,0}};
+ static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}};
+
+
+ // Make sure we have all the data we need.
+ if ( psrc[0] == NULL || psrc[1] == NULL )
+ return 0;
+
+ if (IsOdd)
+ pPrevLines = psrc[1] + width;
+ else
+ pPrevLines = psrc[1];
+
+ // Since the code uses MMX to process 4 pixels at a time, we need our constants
+ // to be represented 4 times per quadword.
+ qwSpatialTolerance = SpatialTolerance;
+ qwSpatialTolerance += (qwSpatialTolerance << 48) + (qwSpatialTolerance << 32) + (qwSpatialTolerance << 16);
+ qwTemporalTolerance = TemporalTolerance;
+ qwTemporalTolerance += (qwTemporalTolerance << 48) + (qwTemporalTolerance << 32) + (qwTemporalTolerance << 16);
+ qwThreshold = SimilarityThreshold;
+ qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16);
+
+ // copy first even line no matter what, and the first odd line if we're
+ // processing an even field.
+ xine_fast_memcpy(pdst, pEvenLines, LineLength);
+ if (!IsOdd)
+ xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength);
+
+ height = height / 2;
+ for (Line = 0; Line < height - 1; ++Line)
+ {
+ if (IsOdd)
+ {
+ YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
+ YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ YVal4 = (uint64_t *)(pPrevLines + Line * SourcePitch);
+ Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength);
+ }
+ else
+ {
+ YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
+ YVal4 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch);
+ Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength);
+ }
+
+ // For ease of reading, the comments below assume that we're operating on an odd
+ // field (i.e., that bIsOdd is true). The exact same processing is done when we
+ // operate on an even field, but the roles of the odd and even fields are reversed.
+ // It's just too cumbersome to explain the algorithm in terms of "the next odd
+ // line if we're doing an odd field, or the next even line if we're doing an
+ // even field" etc. So wherever you see "odd" or "even" below, keep in mind that
+ // half the time this function is called, those words' meanings will invert.
+
+ // Copy the even scanline below this one to the overlay buffer, since we'll be
+ // adapting the current scanline to the even lines surrounding it. The scanline
+ // above has already been copied by the previous pass through the loop.
+ xine_fast_memcpy((char *)Dest + LineLength, YVal3, LineLength);
+
+ n = LineLength >> 3;
+ while( n-- )
+ {
+ movq_m2r ( *YVal1++, mm0 ); // mm0 = E1
+ movq_m2r ( *YVal2++, mm1 ); // mm1 = O
+ movq_m2r ( *YVal3++, mm2 ); // mm2 = E2
+
+ movq_r2r ( mm0, mm3 ); // mm3 = intensity(E1)
+ movq_r2r ( mm1, mm4 ); // mm4 = intensity(O)
+ movq_r2r ( mm2, mm6 ); // mm6 = intensity(E2)
+
+ pand_m2r ( YMask, mm3 );
+ pand_m2r ( YMask, mm4 );
+ pand_m2r ( YMask, mm6 );
+
+ // Average E1 and E2 for interpolated bobbing.
+ // leave result in mm0
+ pand_m2r ( Mask, mm0 ); // mm0 = E1 with lower chroma bit stripped off
+ pand_m2r ( Mask, mm2 ); // mm2 = E2 with lower chroma bit stripped off
+ psrlw_i2r ( 01, mm0 ); // mm0 = E1 / 2
+ psrlw_i2r ( 01, mm2 ); // mm2 = E2 / 2
+ paddb_r2r ( mm2, mm0 );
+
+ // The meat of the work is done here. We want to see whether this pixel is
+ // close in luminosity to ANY of: its top neighbor, its bottom neighbor,
+ // or its predecessor. To do this without branching, we use MMX's
+ // saturation feature, which gives us Z(x) = x if x>=0, or 0 if x<0.
+ //
+ // The formula we're computing here is
+ // Z(ST - (E1 - O) ^ 2) + Z(ST - (E2 - O) ^ 2) + Z(TT - (Oold - O) ^ 2)
+ // where ST is spatial tolerance and TT is temporal tolerance. The idea
+ // is that if a pixel is similar to none of its neighbors, the resulting
+ // value will be pretty low, probably zero. A high value therefore indicates
+ // that the pixel had a similar neighbor. The pixel in the same position
+ // in the field before last (Oold) is considered a neighbor since we want
+ // to be able to display 1-pixel-high horizontal lines.
+
+ movq_m2r ( *&qwSpatialTolerance, mm7 );
+ movq_r2r ( mm3, mm5 ); // mm5 = E1
+ psubsw_r2r ( mm4, mm5 ); // mm5 = E1 - O
+ psraw_i2r ( 1, mm5 );
+ pmullw_r2r ( mm5, mm5 ); // mm5 = (E1 - O) ^ 2
+ psubusw_r2r ( mm5, mm7 ); // mm7 = ST - (E1 - O) ^ 2, or 0 if that's negative
+
+ movq_m2r ( *&qwSpatialTolerance, mm3 );
+ movq_r2r ( mm6, mm5 ); // mm5 = E2
+ psubsw_r2r ( mm4, mm5 ); // mm5 = E2 - O
+ psraw_i2r ( 1, mm5 );
+ pmullw_r2r ( mm5, mm5 ); // mm5 = (E2 - O) ^ 2
+ psubusw_r2r ( mm5, mm3 ); // mm0 = ST - (E2 - O) ^ 2, or 0 if that's negative
+ paddusw_r2r ( mm3, mm7 ); // mm7 = (ST - (E1 - O) ^ 2) + (ST - (E2 - O) ^ 2)
+
+ movq_m2r ( *&qwTemporalTolerance, mm3 );
+ movq_m2r ( *YVal4++, mm5 ); // mm5 = Oold
+ pand_m2r ( YMask, mm5 );
+ psubsw_r2r ( mm4, mm5 ); // mm5 = Oold - O
+ psraw_i2r ( 1, mm5 ); // XXX
+ pmullw_r2r ( mm5, mm5 ); // mm5 = (Oold - O) ^ 2
+ psubusw_r2r ( mm5, mm3 ); /* mm0 = TT - (Oold - O) ^ 2, or 0 if that's negative */
+ paddusw_r2r ( mm3, mm7 ); // mm7 = our magic number
+
+ /*
+ * Now compare the similarity totals against our threshold. The pcmpgtw
+ * instruction will populate the target register with a bunch of mask bits,
+ * filling words where the comparison is true with 1s and ones where it's
+ * false with 0s. A few ANDs and NOTs and an OR later, we have bobbed
+ * values for pixels under the similarity threshold and weaved ones for
+ * pixels over the threshold.
+ */
+
+ pcmpgtw_m2r( *&qwThreshold, mm7 ); // mm7 = 0xffff where we're greater than the threshold, 0 elsewhere
+ movq_r2r ( mm7, mm6 ); // mm6 = 0xffff where we're greater than the threshold, 0 elsewhere
+ pand_r2r ( mm1, mm7 ); // mm7 = weaved data where we're greater than the threshold, 0 elsewhere
+ pandn_r2r ( mm0, mm6 ); // mm6 = bobbed data where we're not greater than the threshold, 0 elsewhere
+ por_r2r ( mm6, mm7 ); // mm7 = bobbed and weaved data
+
+ movq_r2m ( mm7, *Dest++ );
+ }
+ }
+
+ // Copy last odd line if we're processing an odd field.
+ if (IsOdd)
+ {
+ xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength,
+ pOddLines + (height - 1) * SourcePitch,
+ LineLength);
+ }
+
+ // clear out the MMX registers ready for doing floating point
+ // again
+ emms();
+
+#endif
+
+ return 1;
+}
+
+
+// This is a simple lightweight DeInterlace method that uses little CPU time
+// but gives very good results for low or intermedite motion. (MORE CPU THAN BOB)
+// It defers frames by one field, but that does not seem to produce noticeable
+// lip sync problems.
+//
+// The method used is to take either the older or newer weave pixel depending
+// upon which give the smaller comb factor, and then clip to avoid large damage
+// when wrong.
+//
+// I'd intended this to be part of a larger more elaborate method added to
+// Blended Clip but this give too good results for the CPU to ignore here.
+static int deinterlace_greedy_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height )
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+ int Line;
+ int LoopCtr;
+ uint64_t *L1; // ptr to Line1, of 3
+ uint64_t *L2; // ptr to Line2, the weave line
+ uint64_t *L3; // ptr to Line3
+ uint64_t *LP2; // ptr to prev Line2
+ uint64_t *Dest;
+ uint8_t* pEvenLines = psrc[0];
+ uint8_t* pOddLines = psrc[0]+width;
+ uint8_t* pPrevLines;
+
+ static mmx_t ShiftMask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}};
+
+ int LineLength = width;
+ int SourcePitch = width * 2;
+ int IsOdd = 1;
+ long GreedyMaxComb = 15;
+ static mmx_t MaxComb;
+ int i;
+
+ if ( psrc[0] == NULL || psrc[1] == NULL )
+ return 0;
+
+ if (IsOdd)
+ pPrevLines = psrc[1] + width;
+ else
+ pPrevLines = psrc[1];
+
+
+ for( i = 0; i < 8; i++ )
+ MaxComb.ub[i] = GreedyMaxComb; // How badly do we let it weave? 0-255
+
+
+ // copy first even line no matter what, and the first odd line if we're
+ // processing an EVEN field. (note diff from other deint rtns.)
+ xine_fast_memcpy(pdst, pEvenLines, LineLength); //DL0
+ if (!IsOdd)
+ xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength); //DL1
+
+ height = height / 2;
+ for (Line = 0; Line < height - 1; ++Line)
+ {
+ LoopCtr = LineLength / 8; // there are LineLength / 8 qwords per line
+
+ if (IsOdd)
+ {
+ L1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
+ L2 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ L3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ LP2 = (uint64_t *)(pPrevLines + Line * SourcePitch); // prev Odd lines
+ Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength);
+ }
+ else
+ {
+ L1 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ L2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ L3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
+ LP2 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); //prev even lines
+ Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength);
+ }
+
+ xine_fast_memcpy((char *)Dest + LineLength, L3, LineLength);
+
+// For ease of reading, the comments below assume that we're operating on an odd
+// field (i.e., that info->IsOdd is true). Assume the obvious for even lines..
+
+ while( LoopCtr-- )
+ {
+ movq_m2r ( *L1++, mm1 );
+ movq_m2r ( *L2++, mm2 );
+ movq_m2r ( *L3++, mm3 );
+ movq_m2r ( *LP2++, mm0 );
+
+ // average L1 and L3 leave result in mm4
+ movq_r2r ( mm1, mm4 ); // L1
+
+ pand_m2r ( ShiftMask, mm4 );
+ psrlw_i2r ( 01, mm4 );
+ movq_r2r ( mm3, mm5 ); // L3
+ pand_m2r ( ShiftMask, mm5 );
+ psrlw_i2r ( 01, mm5 );
+ paddb_r2r ( mm5, mm4 ); // the average, for computing comb
+
+ // get abs value of possible L2 comb
+ movq_r2r ( mm2, mm7 ); // L2
+ psubusb_r2r ( mm4, mm7 ); // L2 - avg
+ movq_r2r ( mm4, mm5 ); // avg
+ psubusb_r2r ( mm2, mm5 ); // avg - L2
+ por_r2r ( mm7, mm5 ); // abs(avg-L2)
+ movq_r2r ( mm4, mm6 ); // copy of avg for later
+
+ // get abs value of possible LP2 comb
+ movq_r2r ( mm0, mm7 ); // LP2
+ psubusb_r2r ( mm4, mm7 ); // LP2 - avg
+ psubusb_r2r ( mm0, mm4 ); // avg - LP2
+ por_r2r ( mm7, mm4 ); // abs(avg-LP2)
+
+ // use L2 or LP2 depending upon which makes smaller comb
+ psubusb_r2r ( mm5, mm4 ); // see if it goes to zero
+ psubusb_r2r ( mm5, mm5 ); // 0
+ pcmpeqb_r2r ( mm5, mm4 ); // if (mm4=0) then FF else 0
+ pcmpeqb_r2r ( mm4, mm5 ); // opposite of mm4
+
+ // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+ pand_r2r ( mm2, mm5 ); // use L2 if mm5 == ff, else 0
+ pand_r2r ( mm0, mm4 ); // use LP2 if mm4 = ff, else 0
+ por_r2r ( mm5, mm4 ); // may the best win
+
+ // Now lets clip our chosen value to be not outside of the range
+ // of the high/low range L1-L3 by more than abs(L1-L3)
+ // This allows some comb but limits the damages and also allows more
+ // detail than a boring oversmoothed clip.
+
+ movq_r2r ( mm1, mm2 ); // copy L1
+ psubusb_r2r ( mm3, mm2 ); // - L3, with saturation
+ paddusb_r2r ( mm3, mm2 ); // now = Max(L1,L3)
+
+ pcmpeqb_r2r ( mm7, mm7 ); // all ffffffff
+ psubusb_r2r ( mm1, mm7 ); // - L1
+ paddusb_r2r ( mm7, mm3 ); // add, may sat at fff..
+ psubusb_r2r ( mm7, mm3 ); // now = Min(L1,L3)
+
+ // allow the value to be above the high or below the low by amt of MaxComb
+ paddusb_m2r ( MaxComb, mm2 ); // increase max by diff
+ psubusb_m2r ( MaxComb, mm3 ); // lower min by diff
+
+ psubusb_r2r ( mm3, mm4 ); // best - Min
+ paddusb_r2r ( mm3, mm4 ); // now = Max(best,Min(L1,L3)
+
+ pcmpeqb_r2r ( mm7, mm7 ); // all ffffffff
+ psubusb_r2r ( mm4, mm7 ); // - Max(best,Min(best,L3)
+ paddusb_r2r ( mm7, mm2 ); // add may sat at FFF..
+ psubusb_r2r ( mm7, mm2 ); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+
+ movq_r2m ( mm2, *Dest++ ); // move in our clipped best
+
+ }
+ }
+
+ /* Copy last odd line if we're processing an Odd field. */
+ if (IsOdd)
+ {
+ xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength,
+ pOddLines + (height - 1) * SourcePitch,
+ LineLength);
+ }
+
+ /* clear out the MMX registers ready for doing floating point again */
+ emms();
+
+#endif
+
+ return 1;
+}
+
+/* Use one field to interpolate the other (low cpu utilization)
+ Will lose resolution but does not produce weaving effect
+ (good for fast moving scenes) also know as "linear interpolation"
+*/
+static void deinterlace_onefield_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height )
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+ int Line;
+ uint64_t *YVal1;
+ uint64_t *YVal3;
+ uint64_t *Dest;
+ uint8_t* pEvenLines = psrc[0];
+ uint8_t* pOddLines = psrc[0]+width;
+ int LineLength = width;
+ int SourcePitch = width * 2;
+ int IsOdd = 1;
+
+ int n;
+
+ static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}};
+
+ /*
+ * copy first even line no matter what, and the first odd line if we're
+ * processing an odd field.
+ */
+
+ xine_fast_memcpy(pdst, pEvenLines, LineLength);
+ if (IsOdd)
+ xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength);
+
+ height = height / 2;
+ for (Line = 0; Line < height - 1; ++Line)
+ {
+ if (IsOdd)
+ {
+ YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch);
+ YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
+ Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength);
+ }
+ else
+ {
+ YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
+ YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
+ Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength);
+ }
+
+ // Copy the odd line to the overlay verbatim.
+ xine_fast_memcpy((char *)Dest + LineLength, YVal3, LineLength);
+
+ n = LineLength >> 3;
+ while( n-- )
+ {
+ movq_m2r (*YVal1++, mm0);
+ movq_m2r (*YVal3++, mm2);
+
+ // get average in mm0
+ pand_m2r ( Mask, mm0 );
+ pand_m2r ( Mask, mm2 );
+ psrlw_i2r ( 01, mm0 );
+ psrlw_i2r ( 01, mm2 );
+ paddw_r2r ( mm2, mm0 );
+
+ movq_r2m ( mm0, *Dest++ );
+ }
+ }
+
+ /* Copy last odd line if we're processing an even field. */
+ if (! IsOdd)
+ {
+ xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength,
+ pOddLines + (height - 1) * SourcePitch,
+ LineLength);
+ }
+
+ /* clear out the MMX registers ready for doing floating point
+ * again
+ */
+ emms();
+#endif
+}
+
+/* Linear Blend filter - does a kind of vertical blurring on the image.
+ (idea borrowed from mplayer's sources)
+*/
+static void deinterlace_linearblend_yuv_mmx( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height )
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+ int Line;
+ uint64_t *YVal1;
+ uint64_t *YVal2;
+ uint64_t *YVal3;
+ uint64_t *Dest;
+ int LineLength = width;
+
+ int n;
+
+ /* Copy first line */
+ xine_fast_memcpy(pdst, psrc[0], LineLength);
+
+ for (Line = 1; Line < height - 1; ++Line)
+ {
+ YVal1 = (uint64_t *)(psrc[0] + (Line - 1) * LineLength);
+ YVal2 = (uint64_t *)(psrc[0] + (Line) * LineLength);
+ YVal3 = (uint64_t *)(psrc[0] + (Line + 1) * LineLength);
+ Dest = (uint64_t *)(pdst + Line * LineLength);
+
+ n = LineLength >> 3;
+ while( n-- )
+ {
+ /* load data from 3 lines */
+ movq_m2r (*YVal1++, mm0);
+ movq_m2r (*YVal2++, mm1);
+ movq_m2r (*YVal3++, mm2);
+
+ /* expand bytes to words */
+ punpckhbw_r2r (mm0, mm3);
+ punpckhbw_r2r (mm1, mm4);
+ punpckhbw_r2r (mm2, mm5);
+ punpcklbw_r2r (mm0, mm0);
+ punpcklbw_r2r (mm1, mm1);
+ punpcklbw_r2r (mm2, mm2);
+
+ /*
+ * deinterlacing:
+ * deint_line = (line0 + 2*line1 + line2) / 4
+ */
+ psrlw_i2r (07, mm0);
+ psrlw_i2r (06, mm1);
+ psrlw_i2r (07, mm2);
+ psrlw_i2r (07, mm3);
+ psrlw_i2r (06, mm4);
+ psrlw_i2r (07, mm5);
+ paddw_r2r (mm1, mm0);
+ paddw_r2r (mm2, mm0);
+ paddw_r2r (mm4, mm3);
+ paddw_r2r (mm5, mm3);
+ psrlw_i2r (03, mm0);
+ psrlw_i2r (03, mm3);
+
+ /* pack 8 words to 8 bytes in mm0 */
+ packuswb_r2r (mm3, mm0);
+
+ movq_r2m ( mm0, *Dest++ );
+ }
+ }
+
+ /* Copy last line */
+ xine_fast_memcpy(pdst + Line * LineLength,
+ psrc[0] + Line * LineLength, LineLength);
+
+ /* clear out the MMX registers ready for doing floating point
+ * again
+ */
+ emms();
+#endif
+}
+
+/* Linear Blend filter - C version contributed by Rogerio Brito.
+ This algorithm has the same interface as the other functions.
+
+ The destination "screen" (pdst) is constructed from the source
+ screen (psrc[0]) line by line.
+
+ The i-th line of the destination screen is the average of 3 lines
+ from the source screen: the (i-1)-th, i-th and (i+1)-th lines, with
+ the i-th line having weight 2 in the computation.
+
+ Remarks:
+ * each line on pdst doesn't depend on previous lines;
+ * due to the way the algorithm is defined, the first & last lines of the
+ screen aren't deinterlaced.
+
+*/
+static void deinterlace_linearblend_yuv( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height )
+{
+ register int x, y;
+ register uint8_t *l0, *l1, *l2, *l3;
+
+ l0 = pdst; /* target line */
+ l1 = psrc[0]; /* 1st source line */
+ l2 = l1 + width; /* 2nd source line = line that follows l1 */
+ l3 = l2 + width; /* 3rd source line = line that follows l2 */
+
+ /* Copy the first line */
+ xine_fast_memcpy(l0, l1, width);
+ l0 += width;
+
+ for (y = 1; y < height-1; ++y) {
+ /* computes avg of: l1 + 2*l2 + l3 */
+
+ for (x = 0; x < width; ++x) {
+ l0[x] = (l1[x] + (l2[x]<<1) + l3[x]) >> 2;
+ }
+
+ /* updates the line pointers */
+ l1 = l2; l2 = l3; l3 += width;
+ l0 += width;
+ }
+
+ /* Copy the last line */
+ xine_fast_memcpy(l0, l1, width);
+}
+
+static int check_for_mmx(void)
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+static int config_flags = -1;
+
+ if ( config_flags == -1 )
+ config_flags = xine_mm_accel();
+ if (config_flags & MM_ACCEL_X86_MMX)
+ return 1;
+ return 0;
+#else
+ return 0;
+#endif
+}
+
+/* generic YUV deinterlacer
+ pdst -> pointer to destination bitmap
+ psrc -> array of pointers to source bitmaps ([0] = most recent)
+ width,height -> dimension for bitmaps
+ method -> DEINTERLACE_xxx
+*/
+
+void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[],
+ int width, int height, int method )
+{
+ switch( method ) {
+ case DEINTERLACE_NONE:
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ break;
+ case DEINTERLACE_BOB:
+ if( check_for_mmx() )
+ deinterlace_bob_yuv_mmx(pdst,psrc,width,height);
+ else /* FIXME: provide an alternative? */
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ break;
+ case DEINTERLACE_WEAVE:
+ if( check_for_mmx() )
+ {
+ if( !deinterlace_weave_yuv_mmx(pdst,psrc,width,height) )
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ }
+ else /* FIXME: provide an alternative? */
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ break;
+ case DEINTERLACE_GREEDY:
+ if( check_for_mmx() )
+ {
+ if( !deinterlace_greedy_yuv_mmx(pdst,psrc,width,height) )
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ }
+ else /* FIXME: provide an alternative? */
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ break;
+ case DEINTERLACE_ONEFIELD:
+ if( check_for_mmx() )
+ deinterlace_onefield_yuv_mmx(pdst,psrc,width,height);
+ else /* FIXME: provide an alternative? */
+ xine_fast_memcpy(pdst,psrc[0],width*height);
+ break;
+ case DEINTERLACE_ONEFIELDXV:
+ lprintf("ONEFIELDXV must be handled by the video driver.\n");
+ break;
+ case DEINTERLACE_LINEARBLEND:
+ if( check_for_mmx() )
+ deinterlace_linearblend_yuv_mmx(pdst,psrc,width,height);
+ else
+ deinterlace_linearblend_yuv(pdst,psrc,width,height);
+ break;
+ default:
+ lprintf("unknow method %d.\n",method);
+ break;
+ }
+}
+
+int deinterlace_yuv_supported ( int method )
+{
+ switch( method ) {
+ case DEINTERLACE_NONE:
+ return 1;
+ case DEINTERLACE_BOB:
+ case DEINTERLACE_WEAVE:
+ case DEINTERLACE_GREEDY:
+ case DEINTERLACE_ONEFIELD:
+ return check_for_mmx();
+ case DEINTERLACE_ONEFIELDXV:
+ lprintf ("ONEFIELDXV must be handled by the video driver.\n");
+ return 0;
+ case DEINTERLACE_LINEARBLEND:
+ return 1;
+ }
+
+ return 0;
+}
+
+char *deinterlace_methods[] = {
+ "none",
+ "bob",
+ "weave",
+ "greedy",
+ "onefield",
+ "onefield_xv",
+ "linearblend",
+ NULL
+};
+
+
--- /dev/null
+/*
+ * Copyright (C) 2000-2004 the xine project
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * $Id$
+ *
+ */
+#ifndef XINEUTILS_H
+#define XINEUTILS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <pthread.h>
+#if HAVE_LIBGEN_H
+# include <libgen.h>
+#endif
+
+#ifdef XINE_COMPILE
+# include "attributes.h"
+# include "compat.h"
+# include "xmlparser.h"
+# include "xine_buffer.h"
+# include "configfile.h"
+#else
+# include <xine/attributes.h>
+# include <xine/compat.h>
+# include <xine/xmlparser.h>
+# include <xine/xine_buffer.h>
+# include <xine/configfile.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+ /*
+ * debugable mutexes
+ */
+
+ typedef struct {
+ pthread_mutex_t mutex;
+ char id[80];
+ char *locked_by;
+ } xine_mutex_t;
+
+ int xine_mutex_init (xine_mutex_t *mutex, const pthread_mutexattr_t *mutexattr,
+ char *id);
+
+ int xine_mutex_lock (xine_mutex_t *mutex, char *who);
+ int xine_mutex_unlock (xine_mutex_t *mutex, char *who);
+ int xine_mutex_destroy (xine_mutex_t *mutex);
+
+
+
+ /* CPU Acceleration */
+
+/*
+ * The type of an value that fits in an MMX register (note that long
+ * long constant values MUST be suffixed by LL and unsigned long long
+ * values by ULL, lest they be truncated by the compiler)
+ */
+
+/* generic accelerations */
+#define MM_ACCEL_MLIB 0x00000001
+
+/* x86 accelerations */
+#define MM_ACCEL_X86_MMX 0x80000000
+#define MM_ACCEL_X86_3DNOW 0x40000000
+#define MM_ACCEL_X86_MMXEXT 0x20000000
+#define MM_ACCEL_X86_SSE 0x10000000
+#define MM_ACCEL_X86_SSE2 0x08000000
+/* powerpc accelerations */
+#define MM_ACCEL_PPC_ALTIVEC 0x04000000
+/* x86 compat defines */
+#define MM_MMX MM_ACCEL_X86_MMX
+#define MM_3DNOW MM_ACCEL_X86_3DNOW
+#define MM_MMXEXT MM_ACCEL_X86_MMXEXT
+#define MM_SSE MM_ACCEL_X86_SSE
+#define MM_SSE2 MM_ACCEL_X86_SSE2
+
+uint32_t xine_mm_accel (void);
+
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+
+typedef union {
+ int64_t q; /* Quadword (64-bit) value */
+ uint64_t uq; /* Unsigned Quadword */
+ int d[2]; /* 2 Doubleword (32-bit) values */
+ unsigned int ud[2]; /* 2 Unsigned Doubleword */
+ short w[4]; /* 4 Word (16-bit) values */
+ unsigned short uw[4]; /* 4 Unsigned Word */
+ char b[8]; /* 8 Byte (8-bit) values */
+ unsigned char ub[8]; /* 8 Unsigned Byte */
+ float s[2]; /* Single-precision (32-bit) value */
+} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */
+
+
+
+#define mmx_i2r(op,imm,reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "i" (imm) )
+
+#define mmx_m2r(op,mem,reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "m" (mem))
+
+#define mmx_r2m(op,reg,mem) \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=m" (mem) \
+ : /* nothing */ )
+
+#define mmx_r2r(op,regs,regd) \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+
+#define emms() __asm__ __volatile__ ("emms")
+
+#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
+#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
+#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
+
+#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
+#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
+#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
+
+#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
+#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
+#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
+#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
+
+#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
+#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
+
+#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
+#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
+#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
+#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
+#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
+#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
+
+#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
+#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
+#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
+#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
+
+#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
+#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
+#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
+#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
+
+#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
+#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
+
+#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
+#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
+
+#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
+#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
+#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
+#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
+#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
+#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
+
+#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
+#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
+#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
+#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
+#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
+#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
+
+#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
+#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
+
+#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
+#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
+
+#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
+#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
+
+#define por_m2r(var,reg) mmx_m2r (por, var, reg)
+#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
+
+#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
+#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
+#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
+#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
+#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
+#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
+#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
+#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
+#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
+
+#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
+#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
+#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
+#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
+#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
+#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
+
+#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
+#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
+#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
+#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
+#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
+#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
+#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
+#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
+#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
+
+#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
+#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
+#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
+#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
+#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
+#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
+
+#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
+#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
+#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
+#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
+
+#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
+#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
+#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
+#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
+
+#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
+#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
+#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
+#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
+#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
+#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
+
+#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
+#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
+#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
+#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
+#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
+#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
+
+#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
+#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
+
+
+/* 3DNOW extensions */
+
+#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
+#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
+
+
+/* AMD MMX extensions - also available in intel SSE */
+
+
+#define mmx_m2ri(op,mem,reg,imm) \
+ __asm__ __volatile__ (#op " %1, %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (mem), "X" (imm))
+#define mmx_r2ri(op,regs,regd,imm) \
+ __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+ : /* nothing */ \
+ : "X" (imm) )
+
+#define mmx_fetch(mem,hint) \
+ __asm__ __volatile__ ("prefetch" #hint " %0" \
+ : /* nothing */ \
+ : "X" (mem))
+
+
+#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
+
+#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
+
+#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
+#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
+#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
+#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
+
+#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
+
+#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
+
+#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
+#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
+
+#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
+#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
+
+#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
+#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
+
+#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
+#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
+
+#define pmovmskb(mmreg,reg) \
+ __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+
+#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
+#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
+
+#define prefetcht0(mem) mmx_fetch (mem, t0)
+#define prefetcht1(mem) mmx_fetch (mem, t1)
+#define prefetcht2(mem) mmx_fetch (mem, t2)
+#define prefetchnta(mem) mmx_fetch (mem, nta)
+
+#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
+#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
+
+#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
+#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
+
+#define sfence() __asm__ __volatile__ ("sfence\n\t")
+
+typedef union {
+ float sf[4]; /* Single-precision (32-bit) value */
+} ATTR_ALIGN(16) sse_t; /* On a 16 byte (128-bit) boundary */
+
+
+#define sse_i2r(op, imm, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (imm) )
+
+#define sse_m2r(op, mem, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (mem))
+
+#define sse_r2m(op, reg, mem) \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=X" (mem) \
+ : /* nothing */ )
+
+#define sse_r2r(op, regs, regd) \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define sse_r2ri(op, regs, regd, imm) \
+ __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+ : /* nothing */ \
+ : "X" (imm) )
+
+#define sse_m2ri(op, mem, reg, subop) \
+ __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
+ : /* nothing */ \
+ : "X" (mem))
+
+
+#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg)
+#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var)
+#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd)
+
+#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var)
+
+#define movups_m2r(var, reg) sse_m2r(movups, var, reg)
+#define movups_r2m(reg, var) sse_r2m(movups, reg, var)
+#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd)
+
+#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd)
+
+#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd)
+
+#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg)
+#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var)
+
+#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg)
+#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var)
+
+#define movss_m2r(var, reg) sse_m2r(movss, var, reg)
+#define movss_r2m(reg, var) sse_r2m(movss, reg, var)
+#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd)
+
+#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index)
+#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index)
+
+#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg)
+#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg)
+
+#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg)
+#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg)
+
+#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg)
+#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg)
+
+#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg)
+#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg)
+
+#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
+#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
+
+#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
+#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
+
+#define movmskps(xmmreg, reg) \
+ __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
+
+#define addps_m2r(var, reg) sse_m2r(addps, var, reg)
+#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd)
+
+#define addss_m2r(var, reg) sse_m2r(addss, var, reg)
+#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd)
+
+#define subps_m2r(var, reg) sse_m2r(subps, var, reg)
+#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd)
+
+#define subss_m2r(var, reg) sse_m2r(subss, var, reg)
+#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd)
+
+#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg)
+#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd)
+
+#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg)
+#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd)
+
+#define divps_m2r(var, reg) sse_m2r(divps, var, reg)
+#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd)
+
+#define divss_m2r(var, reg) sse_m2r(divss, var, reg)
+#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd)
+
+#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg)
+#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd)
+
+#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg)
+#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd)
+
+#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg)
+#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd)
+
+#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg)
+#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd)
+
+#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg)
+#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd)
+
+#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg)
+#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd)
+
+#define andps_m2r(var, reg) sse_m2r(andps, var, reg)
+#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd)
+
+#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg)
+#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd)
+
+#define orps_m2r(var, reg) sse_m2r(orps, var, reg)
+#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd)
+
+#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg)
+#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd)
+
+#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg)
+#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd)
+
+#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg)
+#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd)
+
+#define minps_m2r(var, reg) sse_m2r(minps, var, reg)
+#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd)
+
+#define minss_m2r(var, reg) sse_m2r(minss, var, reg)
+#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd)
+
+#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op)
+#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op)
+
+#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0)
+#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0)
+
+#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1)
+#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1)
+
+#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2)
+#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2)
+
+#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3)
+#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3)
+
+#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4)
+#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4)
+
+#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5)
+#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5)
+
+#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6)
+#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6)
+
+#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7)
+#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7)
+
+#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op)
+#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op)
+
+#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0)
+#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0)
+
+#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1)
+#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1)
+
+#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2)
+#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2)
+
+#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3)
+#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3)
+
+#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4)
+#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4)
+
+#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5)
+#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5)
+
+#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6)
+#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6)
+
+#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7)
+#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7)
+
+#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg)
+#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd)
+
+#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg)
+#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd)
+
+#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg)
+#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd)
+
+#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg)
+#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd)
+
+#define fxrstor(mem) \
+ __asm__ __volatile__ ("fxrstor %0" \
+ : /* nothing */ \
+ : "X" (mem))
+
+#define fxsave(mem) \
+ __asm__ __volatile__ ("fxsave %0" \
+ : /* nothing */ \
+ : "X" (mem))
+
+#define stmxcsr(mem) \
+ __asm__ __volatile__ ("stmxcsr %0" \
+ : /* nothing */ \
+ : "X" (mem))
+
+#define ldmxcsr(mem) \
+ __asm__ __volatile__ ("ldmxcsr %0" \
+ : /* nothing */ \
+ : "X" (mem))
+#endif /*ARCH_X86 */
+
+
+
+ /* Optimized/fast memcpy */
+
+/*
+ TODO : fix dll linkage problem for xine_fast_memcpy on win32
+
+ xine_fast_memcpy dll linkage is screwy here.
+ declaring as dllimport seems to fix the problem
+ but causes compiler warning with libxineutils
+*/
+#ifdef _MSC_VER
+__declspec( dllimport ) extern void *(* xine_fast_memcpy)(void *to, const void *from, size_t len);
+#else
+extern void *(* xine_fast_memcpy)(void *to, const void *from, size_t len);
+#endif
+
+#ifdef HAVE_XINE_INTERNAL_H
+/* Benchmark available memcpy methods */
+void xine_probe_fast_memcpy(xine_t *xine);
+#endif
+
+
+/*
+ * Debug stuff
+ */
+/*
+ * profiling (unworkable in non DEBUG isn't defined)
+ */
+void xine_profiler_init (void);
+int xine_profiler_allocate_slot (char *label);
+void xine_profiler_start_count (int id);
+void xine_profiler_stop_count (int id);
+void xine_profiler_print_results (void);
+
+/*
+ * Allocate and clean memory size_t 'size', then return the pointer
+ * to the allocated memory.
+ */
+#if !defined(__GNUC__) || __GNUC__ < 3
+void *xine_xmalloc(size_t size);
+#else
+void *xine_xmalloc(size_t size) __attribute__ ((__malloc__));
+#endif
+
+/*
+ * Same as above, but memory is aligned to 'alignement'.
+ * **base is used to return pointer to un-aligned memory, use
+ * this to free the mem chunk
+ */
+void *xine_xmalloc_aligned(size_t alignment, size_t size, void **base);
+
+/*
+ * Get user home directory.
+ */
+const char *xine_get_homedir(void);
+
+/*
+ * Clean a string (remove spaces and '=' at the begin,
+ * and '\n', '\r' and spaces at the end.
+ */
+char *xine_chomp (char *str);
+
+/*
+ * A thread-safe usecond sleep
+ */
+void xine_usec_sleep(unsigned usec);
+
+
+ /*
+ * Some string functions
+ */
+
+
+void xine_strdupa(char *dest, char *src);
+#define xine_strdupa(d, s) do { \
+ (d) = NULL; \
+ if((s) != NULL) { \
+ (d) = (char *) alloca(strlen((s)) + 1); \
+ strcpy((d), (s)); \
+ } \
+ } while(0)
+
+/* Shamefully copied from glibc 2.2.3 */
+#ifdef HAVE_STRPBRK
+#define xine_strpbrk strpbrk
+#else
+static inline char *_private_strpbrk(const char *s, const char *accept) {
+
+ while(*s != '\0') {
+ const char *a = accept;
+ while(*a != '\0')
+ if(*a++ == *s)
+ return(char *) s;
+ ++s;
+ }
+
+ return NULL;
+}
+#define xine_strpbrk _private_strpbrk
+#endif
+
+#if defined HAVE_STRSEP && !defined(_MSC_VER)
+#define xine_strsep strsep
+#else
+static inline char *_private_strsep(char **stringp, const char *delim) {
+ char *begin, *end;
+
+ begin = *stringp;
+ if(begin == NULL)
+ return NULL;
+
+ if(delim[0] == '\0' || delim[1] == '\0') {
+ char ch = delim[0];
+
+ if(ch == '\0')
+ end = NULL;
+ else {
+ if(*begin == ch)
+ end = begin;
+ else if(*begin == '\0')
+ end = NULL;
+ else
+ end = strchr(begin + 1, ch);
+ }
+ }
+ else
+ end = xine_strpbrk(begin, delim);
+
+ if(end) {
+ *end++ = '\0';
+ *stringp = end;
+ }
+ else
+ *stringp = NULL;
+
+ return begin;
+}
+#define xine_strsep _private_strsep
+#endif
+
+
+#ifdef HAVE_SETENV
+#define xine_setenv setenv
+#else
+static inline void _private_setenv(const char *name, const char *val, int _xx) {
+ int len = strlen(name) + strlen(val) + 2;
+ char env[len];
+
+ sprintf(env, "%s%c%s", name, '=', val);
+ putenv(env);
+}
+#define xine_setenv _private_setenv
+#endif
+
+/*
+ * Color Conversion Utility Functions
+ * The following data structures and functions facilitate the conversion
+ * of RGB images to packed YUV (YUY2) images. There are also functions to
+ * convert from YUV9 -> YV12. All of the meaty details are written in
+ * color.c.
+ */
+
+typedef struct yuv_planes_s {
+
+ unsigned char *y;
+ unsigned char *u;
+ unsigned char *v;
+ unsigned int row_width; /* frame width */
+ unsigned int row_count; /* frame height */
+
+} yuv_planes_t;
+
+void init_yuv_conversion(void);
+void init_yuv_planes(yuv_planes_t *yuv_planes, int width, int height);
+void free_yuv_planes(yuv_planes_t *yuv_planes);
+
+extern void (*yuv444_to_yuy2)
+ (yuv_planes_t *yuv_planes, unsigned char *yuy2_map, int pitch);
+extern void (*yuv9_to_yv12)
+ (unsigned char *y_src, int y_src_pitch, unsigned char *y_dest, int y_dest_pitch,
+ unsigned char *u_src, int u_src_pitch, unsigned char *u_dest, int u_dest_pitch,
+ unsigned char *v_src, int v_src_pitch, unsigned char *v_dest, int v_dest_pitch,
+ int width, int height);
+extern void (*yuv411_to_yv12)
+ (unsigned char *y_src, int y_src_pitch, unsigned char *y_dest, int y_dest_pitch,
+ unsigned char *u_src, int u_src_pitch, unsigned char *u_dest, int u_dest_pitch,
+ unsigned char *v_src, int v_src_pitch, unsigned char *v_dest, int v_dest_pitch,
+ int width, int height);
+extern void (*yv12_to_yuy2)
+ (unsigned char *y_src, int y_src_pitch,
+ unsigned char *u_src, int u_src_pitch,
+ unsigned char *v_src, int v_src_pitch,
+ unsigned char *yuy2_map, int yuy2_pitch,
+ int width, int height, int progressive);
+extern void (*yuy2_to_yv12)
+ (unsigned char *yuy2_map, int yuy2_pitch,
+ unsigned char *y_dst, int y_dst_pitch,
+ unsigned char *u_dst, int u_dst_pitch,
+ unsigned char *v_dst, int v_dst_pitch,
+ int width, int height);
+
+#define SCALEFACTOR 65536
+#define CENTERSAMPLE 128
+
+#define COMPUTE_Y(r, g, b) \
+ (unsigned char) \
+ ((y_r_table[r] + y_g_table[g] + y_b_table[b]) / SCALEFACTOR)
+#define COMPUTE_U(r, g, b) \
+ (unsigned char) \
+ ((u_r_table[r] + u_g_table[g] + u_b_table[b]) / SCALEFACTOR + CENTERSAMPLE)
+#define COMPUTE_V(r, g, b) \
+ (unsigned char) \
+ ((v_r_table[r] + v_g_table[g] + v_b_table[b]) / SCALEFACTOR + CENTERSAMPLE)
+
+#define UNPACK_BGR15(packed_pixel, r, g, b) \
+ b = (packed_pixel & 0x7C00) >> 7; \
+ g = (packed_pixel & 0x03E0) >> 2; \
+ r = (packed_pixel & 0x001F) << 3;
+
+#define UNPACK_BGR16(packed_pixel, r, g, b) \
+ b = (packed_pixel & 0xF800) >> 8; \
+ g = (packed_pixel & 0x07E0) >> 3; \
+ r = (packed_pixel & 0x001F) << 3;
+
+#define UNPACK_RGB15(packed_pixel, r, g, b) \
+ r = (packed_pixel & 0x7C00) >> 7; \
+ g = (packed_pixel & 0x03E0) >> 2; \
+ b = (packed_pixel & 0x001F) << 3;
+
+#define UNPACK_RGB16(packed_pixel, r, g, b) \
+ r = (packed_pixel & 0xF800) >> 8; \
+ g = (packed_pixel & 0x07E0) >> 3; \
+ b = (packed_pixel & 0x001F) << 3;
+
+extern int y_r_table[256];
+extern int y_g_table[256];
+extern int y_b_table[256];
+
+extern int u_r_table[256];
+extern int u_g_table[256];
+extern int u_b_table[256];
+
+extern int v_r_table[256];
+extern int v_g_table[256];
+extern int v_b_table[256];
+
+/* frame copying functions */
+extern void yv12_to_yv12
+ (unsigned char *y_src, int y_src_pitch, unsigned char *y_dst, int y_dst_pitch,
+ unsigned char *u_src, int u_src_pitch, unsigned char *u_dst, int u_dst_pitch,
+ unsigned char *v_src, int v_src_pitch, unsigned char *v_dst, int v_dst_pitch,
+ int width, int height);
+extern void yuy2_to_yuy2
+ (unsigned char *src, int src_pitch,
+ unsigned char *dst, int dst_pitch,
+ int width, int height);
+
+/* print a hexdump of the given data */
+void xine_hexdump (const char *buf, int length);
+
+/*
+ * Optimization macros for conditions
+ * Taken from the FIASCO L4 microkernel sources
+ */
+#if !defined(__GNUC__) || __GNUC__ < 3
+# define EXPECT_TRUE(x) (x)
+# define EXPECT_FALSE(x) (x)
+#else
+# define EXPECT_TRUE(x) __builtin_expect((x),1)
+# define EXPECT_FALSE(x) __builtin_expect((x),0)
+#endif
+
+#ifdef NDEBUG
+#define _x_assert(exp) \
+ do { \
+ if (!(exp)) \
+ fprintf(stderr, "assert: %s:%d: %s: Assertion `%s' failed.\n", \
+ __FILE__, __LINE__, __XINE_FUNCTION__, #exp); \
+ } while(0)
+#else
+#define _x_assert(exp) \
+ do { \
+ if (!(exp)) { \
+ fprintf(stderr, "assert: %s:%d: %s: Assertion `%s' failed.\n", \
+ __FILE__, __LINE__, __XINE_FUNCTION__, #exp); \
+ abort(); \
+ } \
+ } while(0)
+#endif
+
+#define _x_abort() \
+ do { \
+ fprintf(stderr, "abort: %s:%d: %s: Aborting.\n", \
+ __FILE__, __LINE__, __XINE_FUNCTION__); \
+ abort(); \
+ } while(0)
+
+
+/****** logging with xine **********************************/
+
+#ifndef LOG_MODULE
+ #define LOG_MODULE __FILE__
+#endif /* LOG_MODULE */
+
+#define LOG_MODULE_STRING printf("%s: ", LOG_MODULE );
+
+#ifdef LOG_VERBOSE
+ #define LONG_LOG_MODULE_STRING \
+ printf("%s: (%s:%d) ", LOG_MODULE, __XINE_FUNCTION__, __LINE__ );
+#else
+ #define LONG_LOG_MODULE_STRING LOG_MODULE_STRING
+#endif /* LOG_VERBOSE */
+
+#ifdef LOG
+ #ifdef __GNUC__
+ #define lprintf(fmt, args...) \
+ do { \
+ LONG_LOG_MODULE_STRING \
+ printf(fmt, ##args); \
+ } while(0)
+ #else /* __GNUC__ */
+ #ifdef _MSC_VER
+ #define lprintf(fmtargs) \
+ do { \
+ LONG_LOG_MODULE_STRING \
+ printf("%s", fmtargs); \
+ } while(0)
+ #else /* _MSC_VER */
+ #define lprintf(fmt, ...) \
+ do { \
+ LONG_LOG_MODULE_STRING \
+ printf(__VA_ARGS__); \
+ } while(0)
+ #endif /* _MSC_VER */
+ #endif /* __GNUC__ */
+#else /* LOG */
+ #ifdef __GNUC__
+ #define lprintf(fmt, args...) do {} while(0)
+ #else
+ #ifdef _MSC_VER
+ #define lprintf
+ #else
+ #define lprintf(...) do {} while(0)
+ #endif /* _MSC_VER */
+ #endif /* __GNUC__ */
+#endif /* LOG */
+
+#ifdef __GNUC__
+ #define llprintf(cat, fmt, args...) \
+ do{ \
+ if(cat){ \
+ LONG_LOG_MODULE_STRING \
+ printf( fmt, ##args ); \
+ } \
+ }while(0)
+#else
+#ifdef _MSC_VER
+ #define llprintf(cat, fmtargs) \
+ do{ \
+ if(cat){ \
+ LONG_LOG_MODULE_STRING \
+ printf( "%s", fmtargs ); \
+ } \
+ }while(0)
+#else
+ #define llprintf(cat, ...) \
+ do{ \
+ if(cat){ \
+ LONG_LOG_MODULE_STRING \
+ printf( __VA_ARGS__ ); \
+ } \
+ }while(0)
+#endif /* _MSC_VER */
+#endif /* __GNUC__ */
+
+#ifdef __GNUC__
+ #define xprintf(xine, verbose, fmt, args...) \
+ do { \
+ if((xine) && (xine)->verbosity >= verbose){ \
+ xine_log(xine, XINE_LOG_TRACE, fmt, ##args); \
+ } \
+ } while(0)
+#else
+#ifdef _MSC_VER
+ #define xprintf(xine, verbose, fmtargs) \
+ do { \
+ if((xine) && (xine)->verbosity >= verbose){ \
+ xine_log(xine, XINE_LOG_TRACE, fmtargs); \
+ } \
+ } while(0)
+#else
+ #define xprintf(xine, verbose, ...) \
+ do { \
+ if((xine) && (xine)->verbosity >= verbose){ \
+ xine_log(xine, XINE_LOG_TRACE, __VA_ARGS__); \
+ } \
+ } while(0)
+#endif /* _MSC_VER */
+#endif /* __GNUC__ */
+
+/* time measuring macros for profiling tasks */
+
+#ifdef DEBUG
+# define XINE_PROFILE(function) \
+ do { \
+ struct timeval current_time; \
+ double dtime; \
+ gettimeofday(¤t_time, NULL); \
+ dtime = -(current_time.tv_sec + (current_time.tv_usec / 1000000.0)); \
+ function; \
+ gettimeofday(¤t_time, NULL); \
+ dtime += current_time.tv_sec + (current_time.tv_usec / 1000000.0); \
+ printf("%s: (%s:%d) took %lf seconds\n", \
+ LOG_MODULE, __XINE_FUNCTION__, __LINE__, dtime); \
+ } while(0)
+# define XINE_PROFILE_ACCUMULATE(function) \
+ do { \
+ struct timeval current_time; \
+ static double dtime = 0; \
+ gettimeofday(¤t_time, NULL); \
+ dtime -= current_time.tv_sec + (current_time.tv_usec / 1000000.0); \
+ function; \
+ gettimeofday(¤t_time, NULL); \
+ dtime += current_time.tv_sec + (current_time.tv_usec / 1000000.0); \
+ printf("%s: (%s:%d) took %lf seconds\n", \
+ LOG_MODULE, __XINE_FUNCTION__, __LINE__, dtime); \
+ } while(0)
+#else
+# define XINE_PROFILE(function) function
+# define XINE_PROFILE_ACCUMULATE(function) function
+#endif /* LOG */
+
+
+/******** double chained lists with builtin iterator *******/
+
+typedef struct xine_node_s {
+
+ struct xine_node_s *next, *prev;
+
+ void *content;
+
+ int priority;
+
+} xine_node_t;
+
+
+typedef struct {
+
+ xine_node_t *first, *last, *cur;
+
+} xine_list_t;
+
+
+
+xine_list_t *xine_list_new (void);
+
+
+/**
+ * dispose the whole list.
+ * note: disposes _only_ the list structure, content must be free()d elsewhere
+ */
+void xine_list_free(xine_list_t *l);
+
+
+/**
+ * returns: Boolean
+ */
+int xine_list_is_empty (xine_list_t *l);
+
+/**
+ * return content of first entry in list.
+ */
+void *xine_list_first_content (xine_list_t *l);
+
+/**
+ * return next content in list.
+ */
+void *xine_list_next_content (xine_list_t *l);
+
+/**
+ * Return last content of list.
+ */
+void *xine_list_last_content (xine_list_t *l);
+
+/**
+ * Return previous content of list.
+ */
+void *xine_list_prev_content (xine_list_t *l);
+
+/**
+ * Append content to list, sorted by decreasing priority.
+ */
+void xine_list_append_priority_content (xine_list_t *l, void *content, int priority);
+
+/**
+ * Append content to list.
+ */
+void xine_list_append_content (xine_list_t *l, void *content);
+
+/**
+ * Insert content in list.
+ */
+void xine_list_insert_content (xine_list_t *l, void *content);
+
+/**
+ * Remove current content in list.
+ * note: removes only the list entry; content must be free()d elsewhere.
+ */
+void xine_list_delete_current (xine_list_t *l);
+
+#ifndef HAVE_BASENAME
+/*
+ * get base name
+ */
+char *basename (char const *name);
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif