From: ddennedy Date: Thu, 4 Mar 2004 02:31:30 +0000 (+0000) Subject: added xine-based accellerated deinterlace X-Git-Url: http://research.m1stereo.tv/gitweb?a=commitdiff_plain;h=2e4eb8e703760fb408f86152451aeaf425f6a579;p=melted added xine-based accellerated deinterlace git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@190 d19143bc-622f-0410-bfdd-b5b2a6649095 --- diff --git a/src/modules/Makefile b/src/modules/Makefile index f959a3c..18df650 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -1,4 +1,4 @@ -SUBDIRS = core gtk2 dv sdl mainconcept bluefish ffmpeg resample inigo avformat vorbis speex westley fezzik +SUBDIRS = core gtk2 dv sdl mainconcept bluefish ffmpeg resample inigo avformat vorbis speex westley fezzik xine all clean depend install: list='$(SUBDIRS)'; \ diff --git a/src/modules/core/Makefile b/src/modules/core/Makefile index 63eeace..4dd185c 100644 --- a/src/modules/core/Makefile +++ b/src/modules/core/Makefile @@ -4,7 +4,6 @@ TARGET = ../libmltcore.so OBJS = factory.o \ producer_ppm.o \ filter_brightness.o \ - filter_deinterlace.o \ filter_greyscale.o \ filter_gamma.o \ filter_luma.o \ diff --git a/src/modules/core/configure b/src/modules/core/configure index 735cece..4660916 100755 --- a/src/modules/core/configure +++ b/src/modules/core/configure @@ -4,13 +4,12 @@ if [ "$help" != "1" ] then cat << EOF >> ../producers.dat -ppm libmltcore.so colour libmltcore.so +ppm libmltcore.so EOF cat << EOF >> ../filters.dat brightness libmltcore.so -deinterlace libmltcore.so gamma libmltcore.so greyscale libmltcore.so luma libmltcore.so diff --git a/src/modules/core/factory.c b/src/modules/core/factory.c index 3f47b47..1d037a0 100644 --- a/src/modules/core/factory.c +++ b/src/modules/core/factory.c @@ -22,7 +22,6 @@ #include "producer_ppm.h" #include "filter_brightness.h" -#include "filter_deinterlace.h" #include "filter_gamma.h" #include "filter_luma.h" #include "filter_greyscale.h" @@ -50,8 +49,6 @@ void *mlt_create_filter( char *id, void *arg ) { if ( !strcmp( id, "brightness" ) ) return filter_brightness_init( arg ); - if ( !strcmp( id, "deinterlace" ) ) - return filter_deinterlace_init( arg ); if ( !strcmp( id, "gamma" ) ) return filter_gamma_init( arg ); if ( !strcmp( id, "greyscale" ) ) diff --git a/src/modules/core/producer_colour.c b/src/modules/core/producer_colour.c index 2290b9d..970cacb 100644 --- a/src/modules/core/producer_colour.c +++ b/src/modules/core/producer_colour.c @@ -48,9 +48,7 @@ mlt_producer producer_colour_init( char *colour ) producer->close = producer_close; // Set the default properties - if ( colour == NULL ) - colour = "0x000000ff"; - mlt_properties_set( properties, "resource", colour ); + mlt_properties_set( properties, "resource", colour == NULL ? "0x000000ff" : colour ); return producer; } diff --git a/src/modules/xine/Makefile b/src/modules/xine/Makefile new file mode 100644 index 0000000..a746708 --- /dev/null +++ b/src/modules/xine/Makefile @@ -0,0 +1,29 @@ + +TARGET = ../libmltxine.so + +OBJS = factory.o \ + deinterlace.o \ + cpu_accel.o \ + filter_deinterlace.o + +CFLAGS = -O3 -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread -DARCH_X86 + +SRCS := $(OBJS:.o=.c) + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(CC) -shared -o $@ $(OBJS) $(LDFLAGS) + +depend: $(SRCS) + $(CC) -MM $(CFLAGS) $^ 1>.depend + +dist-clean: clean + rm -f .depend + +clean: + rm -f $(OBJS) $(TARGET) + +ifneq ($(wildcard .depend),) +include .depend +endif diff --git a/src/modules/xine/configure b/src/modules/xine/configure new file mode 100755 index 0000000..022c9cd --- /dev/null +++ b/src/modules/xine/configure @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ "$help" != "1" ] +then + +cat << EOF >> ../filters.dat +deinterlace libmltxine.so +EOF + +fi + diff --git a/src/modules/xine/cpu_accel.c b/src/modules/xine/cpu_accel.c new file mode 100644 index 0000000..f8c0b7a --- /dev/null +++ b/src/modules/xine/cpu_accel.c @@ -0,0 +1,232 @@ +/* + * cpu_accel.c + * Copyright (C) 1999-2001 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//#include "config.h" + +#include +#include +#include +#include +#include +#include + +#define LOG_MODULE "cpu_accel" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include "xineutils.h" + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +#if defined __x86_64__ +static uint32_t arch_accel (void) +{ + uint32_t caps; + /* No need to test for this on AMD64, we know what the + platform has. */ + caps = MM_ACCEL_X86_MMX | MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT | MM_ACCEL_X86_SSE2; + + return caps; +} +#else +static uint32_t arch_accel (void) +{ +#ifndef _MSC_VER + + uint32_t eax, ebx, ecx, edx; + int AMD; + uint32_t caps; + +#ifndef PIC +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("cpuid" \ + : "=a" (eax), \ + "=b" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#else /* PIC version : save ebx */ +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("pushl %%ebx\n\t" \ + "cpuid\n\t" \ + "movl %%ebx,%1\n\t" \ + "popl %%ebx" \ + : "=a" (eax), \ + "=r" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#endif + + __asm__ ("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl" + : "=r" (eax), + "=r" (ebx) + : + : "cc"); + + if (eax == ebx) /* no cpuid */ + return 0; + + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) /* vendor string only */ + return 0; + + AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) /* no MMX */ + return 0; + + caps = MM_ACCEL_X86_MMX; + if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + caps |= MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT; + + if (edx & 0x04000000) /* SSE2 */ + caps |= MM_ACCEL_X86_SSE2; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) /* no extended capabilities */ + return caps; + + cpuid (0x80000001, eax, ebx, ecx, edx); + + if (edx & 0x80000000) + caps |= MM_ACCEL_X86_3DNOW; + + if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ + caps |= MM_ACCEL_X86_MMXEXT; + + return caps; +#else /* _MSC_VER */ + return 0; +#endif +} +#endif /* x86_64 */ + +static jmp_buf sigill_return; + +static void sigill_handler (int n) { + longjmp(sigill_return, 1); +} +#endif /* ARCH_X86 */ + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static void sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} + +static uint32_t arch_accel (void) +{ + signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, SIG_DFL); + return 0; + } + + canjump = 1; + + __asm__ volatile ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal (SIGILL, SIG_DFL); + return MM_ACCEL_PPC_ALTIVEC; +} +#endif /* ARCH_PPC */ + +uint32_t xine_mm_accel (void) +{ + static int initialized = 0; + static uint32_t accel; + + if (!initialized) { +#if defined (ARCH_X86) || (defined (ARCH_PPC) && defined (ENABLE_ALTIVEC)) + accel = arch_accel (); +#elif defined (HAVE_MLIB) +#ifdef MLIB_LAZYLOAD + void *hndl; + + if ((hndl = dlopen("libmlib.so.2", RTLD_LAZY | RTLD_GLOBAL | RTLD_NODELETE)) == NULL) { + accel = 0; + } + else { + dlclose(hndl); + accel = MM_ACCEL_MLIB; + } +#else + accel = MM_ACCEL_MLIB; +#endif +#else + accel = 0; +#endif + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +#ifndef _MSC_VER + /* test OS support for SSE */ + if( accel & MM_ACCEL_X86_SSE ) { + void (*old_sigill_handler)(int); + + old_sigill_handler = signal (SIGILL, sigill_handler); + + if (setjmp(sigill_return)) { + lprintf ("OS doesn't support SSE instructions.\n"); + accel &= ~(MM_ACCEL_X86_SSE|MM_ACCEL_X86_SSE2); + } else { + __asm__ volatile ("xorps %xmm0, %xmm0"); + } + + signal (SIGILL, old_sigill_handler); + } +#endif /* _MSC_VER */ +#endif /* ARCH_X86 || ARCH_X86_64 */ + + if(getenv("XINE_NO_ACCEL")) { + accel = 0; + } + + initialized = 1; + } + + return accel; +} diff --git a/src/modules/xine/deinterlace.c b/src/modules/xine/deinterlace.c new file mode 100644 index 0000000..60139e0 --- /dev/null +++ b/src/modules/xine/deinterlace.c @@ -0,0 +1,859 @@ + /* + * Copyright (C) 2001 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Deinterlace routines by Miguel Freitas + * based of DScaler project sources (deinterlace.sourceforge.net) + * + * Currently only available for Xv driver and MMX extensions + * + * small todo list: + * - implement non-MMX versions for all methods + * - support MMX2 instructions + * - move some generic code from xv driver to this file + * - make it also work for yuy2 frames + * + */ + +#include +#include +#include "deinterlace.h" +#include "xineutils.h" + +#define xine_fast_memcpy memcpy + +/* + DeinterlaceFieldBob algorithm + Based on Virtual Dub plugin by Gunnar Thalin + MMX asm version from dscaler project (deinterlace.sourceforge.net) + Linux version for Xine player by Miguel Freitas +*/ +static void deinterlace_bob_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + int Line; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + long EdgeDetect = 625; + long JaggieThreshold = 73; + + int n; + + uint64_t qwEdgeDetect; + uint64_t qwThreshold; + + static mmx_t YMask = {ub:{0xff,0,0xff,0,0xff,0,0xff,0}}; + static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}}; + + qwEdgeDetect = EdgeDetect; + qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16); + qwThreshold = JaggieThreshold; + qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16); + + + // copy first even line no matter what, and the first odd line if we're + // processing an odd field. + xine_fast_memcpy(pdst, pEvenLines, LineLength); + if (IsOdd) + xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength); + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + if (IsOdd) + { + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + else + { + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + + // For ease of reading, the comments below assume that we're operating on an odd + // field (i.e., that bIsOdd is true). The exact same processing is done when we + // operate on an even field, but the roles of the odd and even fields are reversed. + // It's just too cumbersome to explain the algorithm in terms of "the next odd + // line if we're doing an odd field, or the next even line if we're doing an + // even field" etc. So wherever you see "odd" or "even" below, keep in mind that + // half the time this function is called, those words' meanings will invert. + + // Copy the odd line to the overlay verbatim. + xine_fast_memcpy((char *)Dest + LineLength, YVal3, LineLength); + + n = LineLength >> 3; + while( n-- ) + { + movq_m2r (*YVal1++, mm0); + movq_m2r (*YVal2++, mm1); + movq_m2r (*YVal3++, mm2); + + // get intensities in mm3 - 4 + movq_r2r ( mm0, mm3 ); + pand_m2r ( YMask, mm3 ); + movq_r2r ( mm1, mm4 ); + pand_m2r ( YMask, mm4 ); + movq_r2r ( mm2, mm5 ); + pand_m2r ( YMask, mm5 ); + + // get average in mm0 + pand_m2r ( Mask, mm0 ); + pand_m2r ( Mask, mm2 ); + psrlw_i2r ( 01, mm0 ); + psrlw_i2r ( 01, mm2 ); + paddw_r2r ( mm2, mm0 ); + + // work out (O1 - E) * (O2 - E) / 2 - EdgeDetect * (O1 - O2) ^ 2 >> 12 + // result will be in mm6 + + psrlw_i2r ( 01, mm3 ); + psrlw_i2r ( 01, mm4 ); + psrlw_i2r ( 01, mm5 ); + + movq_r2r ( mm3, mm6 ); + psubw_r2r ( mm4, mm6 ); //mm6 = O1 - E + + movq_r2r ( mm5, mm7 ); + psubw_r2r ( mm4, mm7 ); //mm7 = O2 - E + + pmullw_r2r ( mm7, mm6 ); // mm6 = (O1 - E) * (O2 - E) + + movq_r2r ( mm3, mm7 ); + psubw_r2r ( mm5, mm7 ); // mm7 = (O1 - O2) + pmullw_r2r ( mm7, mm7 ); // mm7 = (O1 - O2) ^ 2 + psrlw_i2r ( 12, mm7 ); // mm7 = (O1 - O2) ^ 2 >> 12 + pmullw_m2r ( *&qwEdgeDetect, mm7 );// mm7 = EdgeDetect * (O1 - O2) ^ 2 >> 12 + + psubw_r2r ( mm7, mm6 ); // mm6 is what we want + + pcmpgtw_m2r ( *&qwThreshold, mm6 ); + + movq_r2r ( mm6, mm7 ); + + pand_r2r ( mm6, mm0 ); + + pandn_r2r ( mm1, mm7 ); + + por_r2r ( mm0, mm7 ); + + movq_r2m ( mm7, *Dest++ ); + } + } + + // Copy last odd line if we're processing an even field. + if (! IsOdd) + { + xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + // clear out the MMX registers ready for doing floating point + // again + emms(); +#endif +} + +/* Deinterlace the latest field, with a tendency to weave rather than bob. + Good for high detail on low-movement scenes. + Seems to produce bad output in general case, need to check if this + is normal or if the code is broken. +*/ +static int deinterlace_weave_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + + int Line; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *YVal4; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + uint8_t* pPrevLines; + + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + + long TemporalTolerance = 300; + long SpatialTolerance = 600; + long SimilarityThreshold = 25; + + int n; + + uint64_t qwSpatialTolerance; + uint64_t qwTemporalTolerance; + uint64_t qwThreshold; + + static mmx_t YMask = {ub:{0xff,0,0xff,0,0xff,0,0xff,0}}; + static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}}; + + + // Make sure we have all the data we need. + if ( psrc[0] == NULL || psrc[1] == NULL ) + return 0; + + if (IsOdd) + pPrevLines = psrc[1] + width; + else + pPrevLines = psrc[1]; + + // Since the code uses MMX to process 4 pixels at a time, we need our constants + // to be represented 4 times per quadword. + qwSpatialTolerance = SpatialTolerance; + qwSpatialTolerance += (qwSpatialTolerance << 48) + (qwSpatialTolerance << 32) + (qwSpatialTolerance << 16); + qwTemporalTolerance = TemporalTolerance; + qwTemporalTolerance += (qwTemporalTolerance << 48) + (qwTemporalTolerance << 32) + (qwTemporalTolerance << 16); + qwThreshold = SimilarityThreshold; + qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16); + + // copy first even line no matter what, and the first odd line if we're + // processing an even field. + xine_fast_memcpy(pdst, pEvenLines, LineLength); + if (!IsOdd) + xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength); + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + if (IsOdd) + { + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal4 = (uint64_t *)(pPrevLines + Line * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + else + { + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + YVal4 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + + // For ease of reading, the comments below assume that we're operating on an odd + // field (i.e., that bIsOdd is true). The exact same processing is done when we + // operate on an even field, but the roles of the odd and even fields are reversed. + // It's just too cumbersome to explain the algorithm in terms of "the next odd + // line if we're doing an odd field, or the next even line if we're doing an + // even field" etc. So wherever you see "odd" or "even" below, keep in mind that + // half the time this function is called, those words' meanings will invert. + + // Copy the even scanline below this one to the overlay buffer, since we'll be + // adapting the current scanline to the even lines surrounding it. The scanline + // above has already been copied by the previous pass through the loop. + xine_fast_memcpy((char *)Dest + LineLength, YVal3, LineLength); + + n = LineLength >> 3; + while( n-- ) + { + movq_m2r ( *YVal1++, mm0 ); // mm0 = E1 + movq_m2r ( *YVal2++, mm1 ); // mm1 = O + movq_m2r ( *YVal3++, mm2 ); // mm2 = E2 + + movq_r2r ( mm0, mm3 ); // mm3 = intensity(E1) + movq_r2r ( mm1, mm4 ); // mm4 = intensity(O) + movq_r2r ( mm2, mm6 ); // mm6 = intensity(E2) + + pand_m2r ( YMask, mm3 ); + pand_m2r ( YMask, mm4 ); + pand_m2r ( YMask, mm6 ); + + // Average E1 and E2 for interpolated bobbing. + // leave result in mm0 + pand_m2r ( Mask, mm0 ); // mm0 = E1 with lower chroma bit stripped off + pand_m2r ( Mask, mm2 ); // mm2 = E2 with lower chroma bit stripped off + psrlw_i2r ( 01, mm0 ); // mm0 = E1 / 2 + psrlw_i2r ( 01, mm2 ); // mm2 = E2 / 2 + paddb_r2r ( mm2, mm0 ); + + // The meat of the work is done here. We want to see whether this pixel is + // close in luminosity to ANY of: its top neighbor, its bottom neighbor, + // or its predecessor. To do this without branching, we use MMX's + // saturation feature, which gives us Z(x) = x if x>=0, or 0 if x<0. + // + // The formula we're computing here is + // Z(ST - (E1 - O) ^ 2) + Z(ST - (E2 - O) ^ 2) + Z(TT - (Oold - O) ^ 2) + // where ST is spatial tolerance and TT is temporal tolerance. The idea + // is that if a pixel is similar to none of its neighbors, the resulting + // value will be pretty low, probably zero. A high value therefore indicates + // that the pixel had a similar neighbor. The pixel in the same position + // in the field before last (Oold) is considered a neighbor since we want + // to be able to display 1-pixel-high horizontal lines. + + movq_m2r ( *&qwSpatialTolerance, mm7 ); + movq_r2r ( mm3, mm5 ); // mm5 = E1 + psubsw_r2r ( mm4, mm5 ); // mm5 = E1 - O + psraw_i2r ( 1, mm5 ); + pmullw_r2r ( mm5, mm5 ); // mm5 = (E1 - O) ^ 2 + psubusw_r2r ( mm5, mm7 ); // mm7 = ST - (E1 - O) ^ 2, or 0 if that's negative + + movq_m2r ( *&qwSpatialTolerance, mm3 ); + movq_r2r ( mm6, mm5 ); // mm5 = E2 + psubsw_r2r ( mm4, mm5 ); // mm5 = E2 - O + psraw_i2r ( 1, mm5 ); + pmullw_r2r ( mm5, mm5 ); // mm5 = (E2 - O) ^ 2 + psubusw_r2r ( mm5, mm3 ); // mm0 = ST - (E2 - O) ^ 2, or 0 if that's negative + paddusw_r2r ( mm3, mm7 ); // mm7 = (ST - (E1 - O) ^ 2) + (ST - (E2 - O) ^ 2) + + movq_m2r ( *&qwTemporalTolerance, mm3 ); + movq_m2r ( *YVal4++, mm5 ); // mm5 = Oold + pand_m2r ( YMask, mm5 ); + psubsw_r2r ( mm4, mm5 ); // mm5 = Oold - O + psraw_i2r ( 1, mm5 ); // XXX + pmullw_r2r ( mm5, mm5 ); // mm5 = (Oold - O) ^ 2 + psubusw_r2r ( mm5, mm3 ); /* mm0 = TT - (Oold - O) ^ 2, or 0 if that's negative */ + paddusw_r2r ( mm3, mm7 ); // mm7 = our magic number + + /* + * Now compare the similarity totals against our threshold. The pcmpgtw + * instruction will populate the target register with a bunch of mask bits, + * filling words where the comparison is true with 1s and ones where it's + * false with 0s. A few ANDs and NOTs and an OR later, we have bobbed + * values for pixels under the similarity threshold and weaved ones for + * pixels over the threshold. + */ + + pcmpgtw_m2r( *&qwThreshold, mm7 ); // mm7 = 0xffff where we're greater than the threshold, 0 elsewhere + movq_r2r ( mm7, mm6 ); // mm6 = 0xffff where we're greater than the threshold, 0 elsewhere + pand_r2r ( mm1, mm7 ); // mm7 = weaved data where we're greater than the threshold, 0 elsewhere + pandn_r2r ( mm0, mm6 ); // mm6 = bobbed data where we're not greater than the threshold, 0 elsewhere + por_r2r ( mm6, mm7 ); // mm7 = bobbed and weaved data + + movq_r2m ( mm7, *Dest++ ); + } + } + + // Copy last odd line if we're processing an odd field. + if (IsOdd) + { + xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + // clear out the MMX registers ready for doing floating point + // again + emms(); + +#endif + + return 1; +} + + +// This is a simple lightweight DeInterlace method that uses little CPU time +// but gives very good results for low or intermedite motion. (MORE CPU THAN BOB) +// It defers frames by one field, but that does not seem to produce noticeable +// lip sync problems. +// +// The method used is to take either the older or newer weave pixel depending +// upon which give the smaller comb factor, and then clip to avoid large damage +// when wrong. +// +// I'd intended this to be part of a larger more elaborate method added to +// Blended Clip but this give too good results for the CPU to ignore here. +static int deinterlace_greedy_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + int Line; + int LoopCtr; + uint64_t *L1; // ptr to Line1, of 3 + uint64_t *L2; // ptr to Line2, the weave line + uint64_t *L3; // ptr to Line3 + uint64_t *LP2; // ptr to prev Line2 + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + uint8_t* pPrevLines; + + static mmx_t ShiftMask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}}; + + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + long GreedyMaxComb = 15; + static mmx_t MaxComb; + int i; + + if ( psrc[0] == NULL || psrc[1] == NULL ) + return 0; + + if (IsOdd) + pPrevLines = psrc[1] + width; + else + pPrevLines = psrc[1]; + + + for( i = 0; i < 8; i++ ) + MaxComb.ub[i] = GreedyMaxComb; // How badly do we let it weave? 0-255 + + + // copy first even line no matter what, and the first odd line if we're + // processing an EVEN field. (note diff from other deint rtns.) + xine_fast_memcpy(pdst, pEvenLines, LineLength); //DL0 + if (!IsOdd) + xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength); //DL1 + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + LoopCtr = LineLength / 8; // there are LineLength / 8 qwords per line + + if (IsOdd) + { + L1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + L2 = (uint64_t *)(pOddLines + Line * SourcePitch); + L3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + LP2 = (uint64_t *)(pPrevLines + Line * SourcePitch); // prev Odd lines + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + else + { + L1 = (uint64_t *)(pOddLines + Line * SourcePitch); + L2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + L3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + LP2 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); //prev even lines + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + + xine_fast_memcpy((char *)Dest + LineLength, L3, LineLength); + +// For ease of reading, the comments below assume that we're operating on an odd +// field (i.e., that info->IsOdd is true). Assume the obvious for even lines.. + + while( LoopCtr-- ) + { + movq_m2r ( *L1++, mm1 ); + movq_m2r ( *L2++, mm2 ); + movq_m2r ( *L3++, mm3 ); + movq_m2r ( *LP2++, mm0 ); + + // average L1 and L3 leave result in mm4 + movq_r2r ( mm1, mm4 ); // L1 + + pand_m2r ( ShiftMask, mm4 ); + psrlw_i2r ( 01, mm4 ); + movq_r2r ( mm3, mm5 ); // L3 + pand_m2r ( ShiftMask, mm5 ); + psrlw_i2r ( 01, mm5 ); + paddb_r2r ( mm5, mm4 ); // the average, for computing comb + + // get abs value of possible L2 comb + movq_r2r ( mm2, mm7 ); // L2 + psubusb_r2r ( mm4, mm7 ); // L2 - avg + movq_r2r ( mm4, mm5 ); // avg + psubusb_r2r ( mm2, mm5 ); // avg - L2 + por_r2r ( mm7, mm5 ); // abs(avg-L2) + movq_r2r ( mm4, mm6 ); // copy of avg for later + + // get abs value of possible LP2 comb + movq_r2r ( mm0, mm7 ); // LP2 + psubusb_r2r ( mm4, mm7 ); // LP2 - avg + psubusb_r2r ( mm0, mm4 ); // avg - LP2 + por_r2r ( mm7, mm4 ); // abs(avg-LP2) + + // use L2 or LP2 depending upon which makes smaller comb + psubusb_r2r ( mm5, mm4 ); // see if it goes to zero + psubusb_r2r ( mm5, mm5 ); // 0 + pcmpeqb_r2r ( mm5, mm4 ); // if (mm4=0) then FF else 0 + pcmpeqb_r2r ( mm4, mm5 ); // opposite of mm4 + + // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 + pand_r2r ( mm2, mm5 ); // use L2 if mm5 == ff, else 0 + pand_r2r ( mm0, mm4 ); // use LP2 if mm4 = ff, else 0 + por_r2r ( mm5, mm4 ); // may the best win + + // Now lets clip our chosen value to be not outside of the range + // of the high/low range L1-L3 by more than abs(L1-L3) + // This allows some comb but limits the damages and also allows more + // detail than a boring oversmoothed clip. + + movq_r2r ( mm1, mm2 ); // copy L1 + psubusb_r2r ( mm3, mm2 ); // - L3, with saturation + paddusb_r2r ( mm3, mm2 ); // now = Max(L1,L3) + + pcmpeqb_r2r ( mm7, mm7 ); // all ffffffff + psubusb_r2r ( mm1, mm7 ); // - L1 + paddusb_r2r ( mm7, mm3 ); // add, may sat at fff.. + psubusb_r2r ( mm7, mm3 ); // now = Min(L1,L3) + + // allow the value to be above the high or below the low by amt of MaxComb + paddusb_m2r ( MaxComb, mm2 ); // increase max by diff + psubusb_m2r ( MaxComb, mm3 ); // lower min by diff + + psubusb_r2r ( mm3, mm4 ); // best - Min + paddusb_r2r ( mm3, mm4 ); // now = Max(best,Min(L1,L3) + + pcmpeqb_r2r ( mm7, mm7 ); // all ffffffff + psubusb_r2r ( mm4, mm7 ); // - Max(best,Min(best,L3) + paddusb_r2r ( mm7, mm2 ); // add may sat at FFF.. + psubusb_r2r ( mm7, mm2 ); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped + + movq_r2m ( mm2, *Dest++ ); // move in our clipped best + + } + } + + /* Copy last odd line if we're processing an Odd field. */ + if (IsOdd) + { + xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + /* clear out the MMX registers ready for doing floating point again */ + emms(); + +#endif + + return 1; +} + +/* Use one field to interpolate the other (low cpu utilization) + Will lose resolution but does not produce weaving effect + (good for fast moving scenes) also know as "linear interpolation" +*/ +static void deinterlace_onefield_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + int Line; + uint64_t *YVal1; + uint64_t *YVal3; + uint64_t *Dest; + uint8_t* pEvenLines = psrc[0]; + uint8_t* pOddLines = psrc[0]+width; + int LineLength = width; + int SourcePitch = width * 2; + int IsOdd = 1; + + int n; + + static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}}; + + /* + * copy first even line no matter what, and the first odd line if we're + * processing an odd field. + */ + + xine_fast_memcpy(pdst, pEvenLines, LineLength); + if (IsOdd) + xine_fast_memcpy(pdst + LineLength, pOddLines, LineLength); + + height = height / 2; + for (Line = 0; Line < height - 1; ++Line) + { + if (IsOdd) + { + YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 2) * LineLength); + } + else + { + YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch); + YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch); + Dest = (uint64_t *)(pdst + (Line * 2 + 1) * LineLength); + } + + // Copy the odd line to the overlay verbatim. + xine_fast_memcpy((char *)Dest + LineLength, YVal3, LineLength); + + n = LineLength >> 3; + while( n-- ) + { + movq_m2r (*YVal1++, mm0); + movq_m2r (*YVal3++, mm2); + + // get average in mm0 + pand_m2r ( Mask, mm0 ); + pand_m2r ( Mask, mm2 ); + psrlw_i2r ( 01, mm0 ); + psrlw_i2r ( 01, mm2 ); + paddw_r2r ( mm2, mm0 ); + + movq_r2m ( mm0, *Dest++ ); + } + } + + /* Copy last odd line if we're processing an even field. */ + if (! IsOdd) + { + xine_fast_memcpy(pdst + (height * 2 - 1) * LineLength, + pOddLines + (height - 1) * SourcePitch, + LineLength); + } + + /* clear out the MMX registers ready for doing floating point + * again + */ + emms(); +#endif +} + +/* Linear Blend filter - does a kind of vertical blurring on the image. + (idea borrowed from mplayer's sources) +*/ +static void deinterlace_linearblend_yuv_mmx( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + int Line; + uint64_t *YVal1; + uint64_t *YVal2; + uint64_t *YVal3; + uint64_t *Dest; + int LineLength = width; + + int n; + + /* Copy first line */ + xine_fast_memcpy(pdst, psrc[0], LineLength); + + for (Line = 1; Line < height - 1; ++Line) + { + YVal1 = (uint64_t *)(psrc[0] + (Line - 1) * LineLength); + YVal2 = (uint64_t *)(psrc[0] + (Line) * LineLength); + YVal3 = (uint64_t *)(psrc[0] + (Line + 1) * LineLength); + Dest = (uint64_t *)(pdst + Line * LineLength); + + n = LineLength >> 3; + while( n-- ) + { + /* load data from 3 lines */ + movq_m2r (*YVal1++, mm0); + movq_m2r (*YVal2++, mm1); + movq_m2r (*YVal3++, mm2); + + /* expand bytes to words */ + punpckhbw_r2r (mm0, mm3); + punpckhbw_r2r (mm1, mm4); + punpckhbw_r2r (mm2, mm5); + punpcklbw_r2r (mm0, mm0); + punpcklbw_r2r (mm1, mm1); + punpcklbw_r2r (mm2, mm2); + + /* + * deinterlacing: + * deint_line = (line0 + 2*line1 + line2) / 4 + */ + psrlw_i2r (07, mm0); + psrlw_i2r (06, mm1); + psrlw_i2r (07, mm2); + psrlw_i2r (07, mm3); + psrlw_i2r (06, mm4); + psrlw_i2r (07, mm5); + paddw_r2r (mm1, mm0); + paddw_r2r (mm2, mm0); + paddw_r2r (mm4, mm3); + paddw_r2r (mm5, mm3); + psrlw_i2r (03, mm0); + psrlw_i2r (03, mm3); + + /* pack 8 words to 8 bytes in mm0 */ + packuswb_r2r (mm3, mm0); + + movq_r2m ( mm0, *Dest++ ); + } + } + + /* Copy last line */ + xine_fast_memcpy(pdst + Line * LineLength, + psrc[0] + Line * LineLength, LineLength); + + /* clear out the MMX registers ready for doing floating point + * again + */ + emms(); +#endif +} + +/* Linear Blend filter - C version contributed by Rogerio Brito. + This algorithm has the same interface as the other functions. + + The destination "screen" (pdst) is constructed from the source + screen (psrc[0]) line by line. + + The i-th line of the destination screen is the average of 3 lines + from the source screen: the (i-1)-th, i-th and (i+1)-th lines, with + the i-th line having weight 2 in the computation. + + Remarks: + * each line on pdst doesn't depend on previous lines; + * due to the way the algorithm is defined, the first & last lines of the + screen aren't deinterlaced. + +*/ +static void deinterlace_linearblend_yuv( uint8_t *pdst, uint8_t *psrc[], + int width, int height ) +{ + register int x, y; + register uint8_t *l0, *l1, *l2, *l3; + + l0 = pdst; /* target line */ + l1 = psrc[0]; /* 1st source line */ + l2 = l1 + width; /* 2nd source line = line that follows l1 */ + l3 = l2 + width; /* 3rd source line = line that follows l2 */ + + /* Copy the first line */ + xine_fast_memcpy(l0, l1, width); + l0 += width; + + for (y = 1; y < height-1; ++y) { + /* computes avg of: l1 + 2*l2 + l3 */ + + for (x = 0; x < width; ++x) { + l0[x] = (l1[x] + (l2[x]<<1) + l3[x]) >> 2; + } + + /* updates the line pointers */ + l1 = l2; l2 = l3; l3 += width; + l0 += width; + } + + /* Copy the last line */ + xine_fast_memcpy(l0, l1, width); +} + +static int check_for_mmx(void) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static int config_flags = -1; + + if ( config_flags == -1 ) + config_flags = xine_mm_accel(); + if (config_flags & MM_ACCEL_X86_MMX) + return 1; + return 0; +#else + return 0; +#endif +} + +/* generic YUV deinterlacer + pdst -> pointer to destination bitmap + psrc -> array of pointers to source bitmaps ([0] = most recent) + width,height -> dimension for bitmaps + method -> DEINTERLACE_xxx +*/ + +void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[], + int width, int height, int method ) +{ + switch( method ) { + case DEINTERLACE_NONE: + xine_fast_memcpy(pdst,psrc[0],width*height); + break; + case DEINTERLACE_BOB: + if( check_for_mmx() ) + deinterlace_bob_yuv_mmx(pdst,psrc,width,height); + else /* FIXME: provide an alternative? */ + xine_fast_memcpy(pdst,psrc[0],width*height); + break; + case DEINTERLACE_WEAVE: + if( check_for_mmx() ) + { + if( !deinterlace_weave_yuv_mmx(pdst,psrc,width,height) ) + xine_fast_memcpy(pdst,psrc[0],width*height); + } + else /* FIXME: provide an alternative? */ + xine_fast_memcpy(pdst,psrc[0],width*height); + break; + case DEINTERLACE_GREEDY: + if( check_for_mmx() ) + { + if( !deinterlace_greedy_yuv_mmx(pdst,psrc,width,height) ) + xine_fast_memcpy(pdst,psrc[0],width*height); + } + else /* FIXME: provide an alternative? */ + xine_fast_memcpy(pdst,psrc[0],width*height); + break; + case DEINTERLACE_ONEFIELD: + if( check_for_mmx() ) + deinterlace_onefield_yuv_mmx(pdst,psrc,width,height); + else /* FIXME: provide an alternative? */ + xine_fast_memcpy(pdst,psrc[0],width*height); + break; + case DEINTERLACE_ONEFIELDXV: + lprintf("ONEFIELDXV must be handled by the video driver.\n"); + break; + case DEINTERLACE_LINEARBLEND: + if( check_for_mmx() ) + deinterlace_linearblend_yuv_mmx(pdst,psrc,width,height); + else + deinterlace_linearblend_yuv(pdst,psrc,width,height); + break; + default: + lprintf("unknow method %d.\n",method); + break; + } +} + +int deinterlace_yuv_supported ( int method ) +{ + switch( method ) { + case DEINTERLACE_NONE: + return 1; + case DEINTERLACE_BOB: + case DEINTERLACE_WEAVE: + case DEINTERLACE_GREEDY: + case DEINTERLACE_ONEFIELD: + return check_for_mmx(); + case DEINTERLACE_ONEFIELDXV: + lprintf ("ONEFIELDXV must be handled by the video driver.\n"); + return 0; + case DEINTERLACE_LINEARBLEND: + return 1; + } + + return 0; +} + +char *deinterlace_methods[] = { + "none", + "bob", + "weave", + "greedy", + "onefield", + "onefield_xv", + "linearblend", + NULL +}; + + diff --git a/src/modules/xine/deinterlace.h b/src/modules/xine/deinterlace.h new file mode 100644 index 0000000..5d9018c --- /dev/null +++ b/src/modules/xine/deinterlace.h @@ -0,0 +1,47 @@ + /* + * Copyright (C) 2001 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Deinterlace routines by Miguel Freitas + * based of DScaler project sources (deinterlace.sourceforge.net) + * + * Currently only available for Xv driver and MMX extensions + * + */ + +#ifndef __DEINTERLACE_H__ +#define __DEINTERLACE_H__ + +//#include "video_out.h" +#include + +int deinterlace_yuv_supported ( int method ); +void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc[], + int width, int height, int method ); + +#define DEINTERLACE_NONE 0 +#define DEINTERLACE_BOB 1 +#define DEINTERLACE_WEAVE 2 +#define DEINTERLACE_GREEDY 3 +#define DEINTERLACE_ONEFIELD 4 +#define DEINTERLACE_ONEFIELDXV 5 +#define DEINTERLACE_LINEARBLEND 6 + +extern char *deinterlace_methods[]; + +#endif diff --git a/src/modules/xine/factory.c b/src/modules/xine/factory.c new file mode 100644 index 0000000..5c19826 --- /dev/null +++ b/src/modules/xine/factory.c @@ -0,0 +1,46 @@ +/* + * factory.c -- the factory method interfaces + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Charles Yates + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include + +#include "filter_deinterlace.h" + +void *mlt_create_producer( char *id, void *arg ) +{ + return NULL; +} + +void *mlt_create_filter( char *id, void *arg ) +{ + if ( !strcmp( id, "deinterlace" ) ) + return filter_deinterlace_init( arg ); + return NULL; +} + +void *mlt_create_transition( char *id, void *arg ) +{ + return NULL; +} + +void *mlt_create_consumer( char *id, void *arg ) +{ + return NULL; +} + diff --git a/src/modules/core/filter_deinterlace.c b/src/modules/xine/filter_deinterlace.c similarity index 79% rename from src/modules/core/filter_deinterlace.c rename to src/modules/xine/filter_deinterlace.c index 3a83842..e4b851c 100644 --- a/src/modules/core/filter_deinterlace.c +++ b/src/modules/xine/filter_deinterlace.c @@ -19,6 +19,7 @@ */ #include "filter_deinterlace.h" +#include "deinterlace.h" #include @@ -41,6 +42,7 @@ screen aren't deinterlaced. */ +#if 0 static void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc, int width, int height ) { register int x, y; @@ -69,6 +71,7 @@ static void deinterlace_yuv( uint8_t *pdst, uint8_t *psrc, int width, int height // Copy the last line memcpy(l0, l1, width); } +#endif /** Do it :-). */ @@ -77,6 +80,9 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format * { int error = 0; + // Pop the service off the stack + mlt_filter filter = mlt_frame_pop_service( this ); + // Check that we want progressive and we aren't already progressive if ( *format == mlt_image_yuv422 && !mlt_properties_get_int( mlt_frame_properties( this ), "progressive" ) && @@ -85,8 +91,21 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format * // Get the input image error = mlt_frame_get_image( this, image, format, width, height, 1 ); + // Determine deinterlace method + char *method_str = mlt_properties_get( mlt_filter_properties( filter ), "method" ); + int method = DEINTERLACE_LINEARBLEND; + + if ( strcmp( method_str, "bob" ) == 0 ) + method = DEINTERLACE_BOB; + else if ( strcmp( method_str, "weave" ) == 0 ) + method = DEINTERLACE_BOB; + else if ( strcmp( method_str, "greedy" ) == 0 ) + method = DEINTERLACE_GREEDY; + else if ( strcmp( method_str, "onefield" ) == 0 ) + method = DEINTERLACE_ONEFIELD; + // Deinterlace the image - deinterlace_yuv( *image, *image, *width * 2, *height ); + deinterlace_yuv( *image, image, *width * 2, *height, method ); // Make sure that others know the frame is deinterlaced mlt_properties_set_int( mlt_frame_properties( this ), "progressive", 1 ); @@ -105,7 +124,12 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format * static mlt_frame deinterlace_process( mlt_filter this, mlt_frame frame ) { + // Push this on to the service stack + mlt_frame_push_service( frame, this ); + + // Push the get_image method on to the stack mlt_frame_push_get_image( frame, filter_get_image ); + return frame; } @@ -116,7 +140,10 @@ mlt_filter filter_deinterlace_init( void *arg ) { mlt_filter this = mlt_filter_new( ); if ( this != NULL ) + { this->process = deinterlace_process; + mlt_properties_set( mlt_filter_properties( this ), "method", arg == NULL ? "linearblend" : arg ); + } return this; } diff --git a/src/modules/core/filter_deinterlace.h b/src/modules/xine/filter_deinterlace.h similarity index 100% rename from src/modules/core/filter_deinterlace.h rename to src/modules/xine/filter_deinterlace.h diff --git a/src/modules/xine/xineutils.h b/src/modules/xine/xineutils.h new file mode 100644 index 0000000..0b41bcc --- /dev/null +++ b/src/modules/xine/xineutils.h @@ -0,0 +1,1098 @@ +/* + * Copyright (C) 2000-2004 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * $Id$ + * + */ +#ifndef XINEUTILS_H +#define XINEUTILS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#if HAVE_LIBGEN_H +# include +#endif + +#ifdef XINE_COMPILE +# include "attributes.h" +# include "compat.h" +# include "xmlparser.h" +# include "xine_buffer.h" +# include "configfile.h" +#else +# include +# include +# include +# include +# include +#endif + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + + /* + * debugable mutexes + */ + + typedef struct { + pthread_mutex_t mutex; + char id[80]; + char *locked_by; + } xine_mutex_t; + + int xine_mutex_init (xine_mutex_t *mutex, const pthread_mutexattr_t *mutexattr, + char *id); + + int xine_mutex_lock (xine_mutex_t *mutex, char *who); + int xine_mutex_unlock (xine_mutex_t *mutex, char *who); + int xine_mutex_destroy (xine_mutex_t *mutex); + + + + /* CPU Acceleration */ + +/* + * The type of an value that fits in an MMX register (note that long + * long constant values MUST be suffixed by LL and unsigned long long + * values by ULL, lest they be truncated by the compiler) + */ + +/* generic accelerations */ +#define MM_ACCEL_MLIB 0x00000001 + +/* x86 accelerations */ +#define MM_ACCEL_X86_MMX 0x80000000 +#define MM_ACCEL_X86_3DNOW 0x40000000 +#define MM_ACCEL_X86_MMXEXT 0x20000000 +#define MM_ACCEL_X86_SSE 0x10000000 +#define MM_ACCEL_X86_SSE2 0x08000000 +/* powerpc accelerations */ +#define MM_ACCEL_PPC_ALTIVEC 0x04000000 +/* x86 compat defines */ +#define MM_MMX MM_ACCEL_X86_MMX +#define MM_3DNOW MM_ACCEL_X86_3DNOW +#define MM_MMXEXT MM_ACCEL_X86_MMXEXT +#define MM_SSE MM_ACCEL_X86_SSE +#define MM_SSE2 MM_ACCEL_X86_SSE2 + +uint32_t xine_mm_accel (void); + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + +typedef union { + int64_t q; /* Quadword (64-bit) value */ + uint64_t uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */ + + + +#define mmx_i2r(op,imm,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_m2r(op,mem,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op,reg,mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op,regs,regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + + +#define emms() __asm__ __volatile__ ("emms") + +#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) +#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) +#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) + +#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) +#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) +#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) + +#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) +#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) +#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) +#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) + +#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) +#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) + +#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) +#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) +#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) +#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) +#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) +#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) + +#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) +#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) +#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) +#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) + +#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) +#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) +#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) +#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) + +#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) +#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) + +#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) +#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) + +#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) +#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) +#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) + +#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) +#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) +#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) + +#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) +#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) + +#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) +#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) + +#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) +#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) + +#define por_m2r(var,reg) mmx_m2r (por, var, reg) +#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) + +#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) +#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) +#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) +#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) +#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) +#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) +#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) +#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) +#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) + +#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) +#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) +#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) +#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) +#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) +#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) + +#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) +#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) +#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) +#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) +#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) +#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) +#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) +#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) +#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) + +#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) +#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) +#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) +#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) +#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) +#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) + +#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) +#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) +#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) +#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) + +#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) +#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) +#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) +#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) + +#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) +#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) +#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) +#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) +#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) +#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) + +#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) +#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) +#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) +#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) +#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) +#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) + +#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) +#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) + + +/* 3DNOW extensions */ + +#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) +#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) + + +/* AMD MMX extensions - also available in intel SSE */ + + +#define mmx_m2ri(op,mem,reg,imm) \ + __asm__ __volatile__ (#op " %1, %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem), "X" (imm)) +#define mmx_r2ri(op,regs,regd,imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "X" (imm) ) + +#define mmx_fetch(mem,hint) \ + __asm__ __volatile__ ("prefetch" #hint " %0" \ + : /* nothing */ \ + : "X" (mem)) + + +#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) + +#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) + +#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) +#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) +#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) +#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) + +#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) + +#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) + +#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) +#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) + +#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) +#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) + +#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) +#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) + +#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) +#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) + +#define pmovmskb(mmreg,reg) \ + __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) + +#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) +#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) + +#define prefetcht0(mem) mmx_fetch (mem, t0) +#define prefetcht1(mem) mmx_fetch (mem, t1) +#define prefetcht2(mem) mmx_fetch (mem, t2) +#define prefetchnta(mem) mmx_fetch (mem, nta) + +#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) +#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) + +#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) +#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) + +#define sfence() __asm__ __volatile__ ("sfence\n\t") + +typedef union { + float sf[4]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(16) sse_t; /* On a 16 byte (128-bit) boundary */ + + +#define sse_i2r(op, imm, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm) ) + +#define sse_m2r(op, mem, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)) + +#define sse_r2m(op, reg, mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ) + +#define sse_r2r(op, regs, regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + +#define sse_r2ri(op, regs, regd, imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "X" (imm) ) + +#define sse_m2ri(op, mem, reg, subop) \ + __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \ + : /* nothing */ \ + : "X" (mem)) + + +#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg) +#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var) +#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd) + +#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var) + +#define movups_m2r(var, reg) sse_m2r(movups, var, reg) +#define movups_r2m(reg, var) sse_r2m(movups, reg, var) +#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd) + +#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd) + +#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd) + +#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg) +#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var) + +#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg) +#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var) + +#define movss_m2r(var, reg) sse_m2r(movss, var, reg) +#define movss_r2m(reg, var) sse_r2m(movss, reg, var) +#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd) + +#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index) +#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index) + +#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg) +#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg) + +#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg) +#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg) + +#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg) +#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg) + +#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg) +#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg) + +#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) +#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) + +#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) +#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) + +#define movmskps(xmmreg, reg) \ + __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) + +#define addps_m2r(var, reg) sse_m2r(addps, var, reg) +#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd) + +#define addss_m2r(var, reg) sse_m2r(addss, var, reg) +#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd) + +#define subps_m2r(var, reg) sse_m2r(subps, var, reg) +#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd) + +#define subss_m2r(var, reg) sse_m2r(subss, var, reg) +#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd) + +#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg) +#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd) + +#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg) +#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd) + +#define divps_m2r(var, reg) sse_m2r(divps, var, reg) +#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd) + +#define divss_m2r(var, reg) sse_m2r(divss, var, reg) +#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd) + +#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg) +#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd) + +#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg) +#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd) + +#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg) +#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd) + +#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg) +#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd) + +#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg) +#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd) + +#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg) +#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd) + +#define andps_m2r(var, reg) sse_m2r(andps, var, reg) +#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd) + +#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg) +#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd) + +#define orps_m2r(var, reg) sse_m2r(orps, var, reg) +#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd) + +#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg) +#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd) + +#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg) +#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd) + +#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg) +#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd) + +#define minps_m2r(var, reg) sse_m2r(minps, var, reg) +#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd) + +#define minss_m2r(var, reg) sse_m2r(minss, var, reg) +#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd) + +#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op) +#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op) + +#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0) +#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0) + +#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1) +#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1) + +#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2) +#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2) + +#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3) +#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3) + +#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4) +#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4) + +#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5) +#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5) + +#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6) +#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6) + +#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7) +#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7) + +#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op) +#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op) + +#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0) +#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0) + +#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1) +#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1) + +#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2) +#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2) + +#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3) +#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3) + +#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4) +#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4) + +#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5) +#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5) + +#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6) +#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6) + +#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7) +#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7) + +#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg) +#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd) + +#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg) +#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd) + +#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg) +#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd) + +#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg) +#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd) + +#define fxrstor(mem) \ + __asm__ __volatile__ ("fxrstor %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define fxsave(mem) \ + __asm__ __volatile__ ("fxsave %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define stmxcsr(mem) \ + __asm__ __volatile__ ("stmxcsr %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define ldmxcsr(mem) \ + __asm__ __volatile__ ("ldmxcsr %0" \ + : /* nothing */ \ + : "X" (mem)) +#endif /*ARCH_X86 */ + + + + /* Optimized/fast memcpy */ + +/* + TODO : fix dll linkage problem for xine_fast_memcpy on win32 + + xine_fast_memcpy dll linkage is screwy here. + declaring as dllimport seems to fix the problem + but causes compiler warning with libxineutils +*/ +#ifdef _MSC_VER +__declspec( dllimport ) extern void *(* xine_fast_memcpy)(void *to, const void *from, size_t len); +#else +extern void *(* xine_fast_memcpy)(void *to, const void *from, size_t len); +#endif + +#ifdef HAVE_XINE_INTERNAL_H +/* Benchmark available memcpy methods */ +void xine_probe_fast_memcpy(xine_t *xine); +#endif + + +/* + * Debug stuff + */ +/* + * profiling (unworkable in non DEBUG isn't defined) + */ +void xine_profiler_init (void); +int xine_profiler_allocate_slot (char *label); +void xine_profiler_start_count (int id); +void xine_profiler_stop_count (int id); +void xine_profiler_print_results (void); + +/* + * Allocate and clean memory size_t 'size', then return the pointer + * to the allocated memory. + */ +#if !defined(__GNUC__) || __GNUC__ < 3 +void *xine_xmalloc(size_t size); +#else +void *xine_xmalloc(size_t size) __attribute__ ((__malloc__)); +#endif + +/* + * Same as above, but memory is aligned to 'alignement'. + * **base is used to return pointer to un-aligned memory, use + * this to free the mem chunk + */ +void *xine_xmalloc_aligned(size_t alignment, size_t size, void **base); + +/* + * Get user home directory. + */ +const char *xine_get_homedir(void); + +/* + * Clean a string (remove spaces and '=' at the begin, + * and '\n', '\r' and spaces at the end. + */ +char *xine_chomp (char *str); + +/* + * A thread-safe usecond sleep + */ +void xine_usec_sleep(unsigned usec); + + + /* + * Some string functions + */ + + +void xine_strdupa(char *dest, char *src); +#define xine_strdupa(d, s) do { \ + (d) = NULL; \ + if((s) != NULL) { \ + (d) = (char *) alloca(strlen((s)) + 1); \ + strcpy((d), (s)); \ + } \ + } while(0) + +/* Shamefully copied from glibc 2.2.3 */ +#ifdef HAVE_STRPBRK +#define xine_strpbrk strpbrk +#else +static inline char *_private_strpbrk(const char *s, const char *accept) { + + while(*s != '\0') { + const char *a = accept; + while(*a != '\0') + if(*a++ == *s) + return(char *) s; + ++s; + } + + return NULL; +} +#define xine_strpbrk _private_strpbrk +#endif + +#if defined HAVE_STRSEP && !defined(_MSC_VER) +#define xine_strsep strsep +#else +static inline char *_private_strsep(char **stringp, const char *delim) { + char *begin, *end; + + begin = *stringp; + if(begin == NULL) + return NULL; + + if(delim[0] == '\0' || delim[1] == '\0') { + char ch = delim[0]; + + if(ch == '\0') + end = NULL; + else { + if(*begin == ch) + end = begin; + else if(*begin == '\0') + end = NULL; + else + end = strchr(begin + 1, ch); + } + } + else + end = xine_strpbrk(begin, delim); + + if(end) { + *end++ = '\0'; + *stringp = end; + } + else + *stringp = NULL; + + return begin; +} +#define xine_strsep _private_strsep +#endif + + +#ifdef HAVE_SETENV +#define xine_setenv setenv +#else +static inline void _private_setenv(const char *name, const char *val, int _xx) { + int len = strlen(name) + strlen(val) + 2; + char env[len]; + + sprintf(env, "%s%c%s", name, '=', val); + putenv(env); +} +#define xine_setenv _private_setenv +#endif + +/* + * Color Conversion Utility Functions + * The following data structures and functions facilitate the conversion + * of RGB images to packed YUV (YUY2) images. There are also functions to + * convert from YUV9 -> YV12. All of the meaty details are written in + * color.c. + */ + +typedef struct yuv_planes_s { + + unsigned char *y; + unsigned char *u; + unsigned char *v; + unsigned int row_width; /* frame width */ + unsigned int row_count; /* frame height */ + +} yuv_planes_t; + +void init_yuv_conversion(void); +void init_yuv_planes(yuv_planes_t *yuv_planes, int width, int height); +void free_yuv_planes(yuv_planes_t *yuv_planes); + +extern void (*yuv444_to_yuy2) + (yuv_planes_t *yuv_planes, unsigned char *yuy2_map, int pitch); +extern void (*yuv9_to_yv12) + (unsigned char *y_src, int y_src_pitch, unsigned char *y_dest, int y_dest_pitch, + unsigned char *u_src, int u_src_pitch, unsigned char *u_dest, int u_dest_pitch, + unsigned char *v_src, int v_src_pitch, unsigned char *v_dest, int v_dest_pitch, + int width, int height); +extern void (*yuv411_to_yv12) + (unsigned char *y_src, int y_src_pitch, unsigned char *y_dest, int y_dest_pitch, + unsigned char *u_src, int u_src_pitch, unsigned char *u_dest, int u_dest_pitch, + unsigned char *v_src, int v_src_pitch, unsigned char *v_dest, int v_dest_pitch, + int width, int height); +extern void (*yv12_to_yuy2) + (unsigned char *y_src, int y_src_pitch, + unsigned char *u_src, int u_src_pitch, + unsigned char *v_src, int v_src_pitch, + unsigned char *yuy2_map, int yuy2_pitch, + int width, int height, int progressive); +extern void (*yuy2_to_yv12) + (unsigned char *yuy2_map, int yuy2_pitch, + unsigned char *y_dst, int y_dst_pitch, + unsigned char *u_dst, int u_dst_pitch, + unsigned char *v_dst, int v_dst_pitch, + int width, int height); + +#define SCALEFACTOR 65536 +#define CENTERSAMPLE 128 + +#define COMPUTE_Y(r, g, b) \ + (unsigned char) \ + ((y_r_table[r] + y_g_table[g] + y_b_table[b]) / SCALEFACTOR) +#define COMPUTE_U(r, g, b) \ + (unsigned char) \ + ((u_r_table[r] + u_g_table[g] + u_b_table[b]) / SCALEFACTOR + CENTERSAMPLE) +#define COMPUTE_V(r, g, b) \ + (unsigned char) \ + ((v_r_table[r] + v_g_table[g] + v_b_table[b]) / SCALEFACTOR + CENTERSAMPLE) + +#define UNPACK_BGR15(packed_pixel, r, g, b) \ + b = (packed_pixel & 0x7C00) >> 7; \ + g = (packed_pixel & 0x03E0) >> 2; \ + r = (packed_pixel & 0x001F) << 3; + +#define UNPACK_BGR16(packed_pixel, r, g, b) \ + b = (packed_pixel & 0xF800) >> 8; \ + g = (packed_pixel & 0x07E0) >> 3; \ + r = (packed_pixel & 0x001F) << 3; + +#define UNPACK_RGB15(packed_pixel, r, g, b) \ + r = (packed_pixel & 0x7C00) >> 7; \ + g = (packed_pixel & 0x03E0) >> 2; \ + b = (packed_pixel & 0x001F) << 3; + +#define UNPACK_RGB16(packed_pixel, r, g, b) \ + r = (packed_pixel & 0xF800) >> 8; \ + g = (packed_pixel & 0x07E0) >> 3; \ + b = (packed_pixel & 0x001F) << 3; + +extern int y_r_table[256]; +extern int y_g_table[256]; +extern int y_b_table[256]; + +extern int u_r_table[256]; +extern int u_g_table[256]; +extern int u_b_table[256]; + +extern int v_r_table[256]; +extern int v_g_table[256]; +extern int v_b_table[256]; + +/* frame copying functions */ +extern void yv12_to_yv12 + (unsigned char *y_src, int y_src_pitch, unsigned char *y_dst, int y_dst_pitch, + unsigned char *u_src, int u_src_pitch, unsigned char *u_dst, int u_dst_pitch, + unsigned char *v_src, int v_src_pitch, unsigned char *v_dst, int v_dst_pitch, + int width, int height); +extern void yuy2_to_yuy2 + (unsigned char *src, int src_pitch, + unsigned char *dst, int dst_pitch, + int width, int height); + +/* print a hexdump of the given data */ +void xine_hexdump (const char *buf, int length); + +/* + * Optimization macros for conditions + * Taken from the FIASCO L4 microkernel sources + */ +#if !defined(__GNUC__) || __GNUC__ < 3 +# define EXPECT_TRUE(x) (x) +# define EXPECT_FALSE(x) (x) +#else +# define EXPECT_TRUE(x) __builtin_expect((x),1) +# define EXPECT_FALSE(x) __builtin_expect((x),0) +#endif + +#ifdef NDEBUG +#define _x_assert(exp) \ + do { \ + if (!(exp)) \ + fprintf(stderr, "assert: %s:%d: %s: Assertion `%s' failed.\n", \ + __FILE__, __LINE__, __XINE_FUNCTION__, #exp); \ + } while(0) +#else +#define _x_assert(exp) \ + do { \ + if (!(exp)) { \ + fprintf(stderr, "assert: %s:%d: %s: Assertion `%s' failed.\n", \ + __FILE__, __LINE__, __XINE_FUNCTION__, #exp); \ + abort(); \ + } \ + } while(0) +#endif + +#define _x_abort() \ + do { \ + fprintf(stderr, "abort: %s:%d: %s: Aborting.\n", \ + __FILE__, __LINE__, __XINE_FUNCTION__); \ + abort(); \ + } while(0) + + +/****** logging with xine **********************************/ + +#ifndef LOG_MODULE + #define LOG_MODULE __FILE__ +#endif /* LOG_MODULE */ + +#define LOG_MODULE_STRING printf("%s: ", LOG_MODULE ); + +#ifdef LOG_VERBOSE + #define LONG_LOG_MODULE_STRING \ + printf("%s: (%s:%d) ", LOG_MODULE, __XINE_FUNCTION__, __LINE__ ); +#else + #define LONG_LOG_MODULE_STRING LOG_MODULE_STRING +#endif /* LOG_VERBOSE */ + +#ifdef LOG + #ifdef __GNUC__ + #define lprintf(fmt, args...) \ + do { \ + LONG_LOG_MODULE_STRING \ + printf(fmt, ##args); \ + } while(0) + #else /* __GNUC__ */ + #ifdef _MSC_VER + #define lprintf(fmtargs) \ + do { \ + LONG_LOG_MODULE_STRING \ + printf("%s", fmtargs); \ + } while(0) + #else /* _MSC_VER */ + #define lprintf(fmt, ...) \ + do { \ + LONG_LOG_MODULE_STRING \ + printf(__VA_ARGS__); \ + } while(0) + #endif /* _MSC_VER */ + #endif /* __GNUC__ */ +#else /* LOG */ + #ifdef __GNUC__ + #define lprintf(fmt, args...) do {} while(0) + #else + #ifdef _MSC_VER + #define lprintf + #else + #define lprintf(...) do {} while(0) + #endif /* _MSC_VER */ + #endif /* __GNUC__ */ +#endif /* LOG */ + +#ifdef __GNUC__ + #define llprintf(cat, fmt, args...) \ + do{ \ + if(cat){ \ + LONG_LOG_MODULE_STRING \ + printf( fmt, ##args ); \ + } \ + }while(0) +#else +#ifdef _MSC_VER + #define llprintf(cat, fmtargs) \ + do{ \ + if(cat){ \ + LONG_LOG_MODULE_STRING \ + printf( "%s", fmtargs ); \ + } \ + }while(0) +#else + #define llprintf(cat, ...) \ + do{ \ + if(cat){ \ + LONG_LOG_MODULE_STRING \ + printf( __VA_ARGS__ ); \ + } \ + }while(0) +#endif /* _MSC_VER */ +#endif /* __GNUC__ */ + +#ifdef __GNUC__ + #define xprintf(xine, verbose, fmt, args...) \ + do { \ + if((xine) && (xine)->verbosity >= verbose){ \ + xine_log(xine, XINE_LOG_TRACE, fmt, ##args); \ + } \ + } while(0) +#else +#ifdef _MSC_VER + #define xprintf(xine, verbose, fmtargs) \ + do { \ + if((xine) && (xine)->verbosity >= verbose){ \ + xine_log(xine, XINE_LOG_TRACE, fmtargs); \ + } \ + } while(0) +#else + #define xprintf(xine, verbose, ...) \ + do { \ + if((xine) && (xine)->verbosity >= verbose){ \ + xine_log(xine, XINE_LOG_TRACE, __VA_ARGS__); \ + } \ + } while(0) +#endif /* _MSC_VER */ +#endif /* __GNUC__ */ + +/* time measuring macros for profiling tasks */ + +#ifdef DEBUG +# define XINE_PROFILE(function) \ + do { \ + struct timeval current_time; \ + double dtime; \ + gettimeofday(¤t_time, NULL); \ + dtime = -(current_time.tv_sec + (current_time.tv_usec / 1000000.0)); \ + function; \ + gettimeofday(¤t_time, NULL); \ + dtime += current_time.tv_sec + (current_time.tv_usec / 1000000.0); \ + printf("%s: (%s:%d) took %lf seconds\n", \ + LOG_MODULE, __XINE_FUNCTION__, __LINE__, dtime); \ + } while(0) +# define XINE_PROFILE_ACCUMULATE(function) \ + do { \ + struct timeval current_time; \ + static double dtime = 0; \ + gettimeofday(¤t_time, NULL); \ + dtime -= current_time.tv_sec + (current_time.tv_usec / 1000000.0); \ + function; \ + gettimeofday(¤t_time, NULL); \ + dtime += current_time.tv_sec + (current_time.tv_usec / 1000000.0); \ + printf("%s: (%s:%d) took %lf seconds\n", \ + LOG_MODULE, __XINE_FUNCTION__, __LINE__, dtime); \ + } while(0) +#else +# define XINE_PROFILE(function) function +# define XINE_PROFILE_ACCUMULATE(function) function +#endif /* LOG */ + + +/******** double chained lists with builtin iterator *******/ + +typedef struct xine_node_s { + + struct xine_node_s *next, *prev; + + void *content; + + int priority; + +} xine_node_t; + + +typedef struct { + + xine_node_t *first, *last, *cur; + +} xine_list_t; + + + +xine_list_t *xine_list_new (void); + + +/** + * dispose the whole list. + * note: disposes _only_ the list structure, content must be free()d elsewhere + */ +void xine_list_free(xine_list_t *l); + + +/** + * returns: Boolean + */ +int xine_list_is_empty (xine_list_t *l); + +/** + * return content of first entry in list. + */ +void *xine_list_first_content (xine_list_t *l); + +/** + * return next content in list. + */ +void *xine_list_next_content (xine_list_t *l); + +/** + * Return last content of list. + */ +void *xine_list_last_content (xine_list_t *l); + +/** + * Return previous content of list. + */ +void *xine_list_prev_content (xine_list_t *l); + +/** + * Append content to list, sorted by decreasing priority. + */ +void xine_list_append_priority_content (xine_list_t *l, void *content, int priority); + +/** + * Append content to list. + */ +void xine_list_append_content (xine_list_t *l, void *content); + +/** + * Insert content in list. + */ +void xine_list_insert_content (xine_list_t *l, void *content); + +/** + * Remove current content in list. + * note: removes only the list entry; content must be free()d elsewhere. + */ +void xine_list_delete_current (xine_list_t *l); + +#ifndef HAVE_BASENAME +/* + * get base name + */ +char *basename (char const *name); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif