From 3f7c53230945e427b019ba5df4fec587e19e29c1 Mon Sep 17 00:00:00 2001 From: ddennedy Date: Thu, 11 Mar 2004 07:22:22 +0000 Subject: [PATCH] added very preliminary mmx for composite. bugfixes to -x and too small rescaling. git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@200 d19143bc-622f-0410-bfdd-b5b2a6649095 --- src/modules/core/Makefile | 14 ++- src/modules/core/composite_line_yuv_mmx.S | 203 +++++++++++++++++++++++++++++ src/modules/core/filter_resize.c | 2 +- src/modules/core/transition_composite.c | 101 +++++++++------ src/modules/gtk2/filter_rescale.c | 2 + 5 files changed, 275 insertions(+), 47 deletions(-) create mode 100644 src/modules/core/composite_line_yuv_mmx.S diff --git a/src/modules/core/Makefile b/src/modules/core/Makefile index 4dd185c..fd7d0df 100644 --- a/src/modules/core/Makefile +++ b/src/modules/core/Makefile @@ -18,14 +18,19 @@ OBJS = factory.o \ transition_mix.o \ transition_region.o -CFLAGS = -O3 -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread +ASM_OBJS = composite_line_yuv_mmx.o + +CFLAGS = -O3 -DUSE_MMX -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread SRCS := $(OBJS:.o=.c) all: $(TARGET) -$(TARGET): $(OBJS) - $(CC) -shared -o $@ $(OBJS) $(LDFLAGS) +$(TARGET): $(OBJS) $(ASM_OBJS) + $(CC) -shared -o $@ $(OBJS) $(ASM_OBJS) $(LDFLAGS) + +composite_line_yuv_mmx.o: composite_line_yuv_mmx.S + $(CC) -o $@ -c composite_line_yuv_mmx.S depend: $(SRCS) $(CC) -MM $(CFLAGS) $^ 1>.depend @@ -34,9 +39,8 @@ dist-clean: clean rm -f .depend clean: - rm -f $(OBJS) $(TARGET) + rm -f $(OBJS) $(ASM_OBJS) $(TARGET) ifneq ($(wildcard .depend),) include .depend endif - diff --git a/src/modules/core/composite_line_yuv_mmx.S b/src/modules/core/composite_line_yuv_mmx.S new file mode 100644 index 0000000..7a5fb80 --- /dev/null +++ b/src/modules/core/composite_line_yuv_mmx.S @@ -0,0 +1,203 @@ + .file "composite_line_yuv_mmx" + .version "01.01" + +gcc2_compiled.: +.data + +.text + .align 16 + +#if !defined(__MINGW32__) && !defined(__CYGWIN__) +.globl composite_line_yuv_mmx + .type composite_line_yuv_mmx,@function +composite_line_yuv_mmx: +#else +.globl _composite_line_yuv_mmx +_composite_line_yuv_mmx: +#endif + +/* + * Arguments + * + * dest: 8(%ebp) %esi + * src: 12(%ebp) + * width_src: 16(%ebp) + * alpha: 20(%ebp) + * weight: 24(%ebp) + * luma: 28(%ebp) + * softness: 32(%ebp) + */ + +/* + * Function call entry + */ + pushl %ebp + movl %esp,%ebp + subl $28,%esp + pushl %edi + pushl %esi + pushl %ebx + +/* Initialise */ + movl 8(%ebp), %esi # get dest + movl $0, %edx # j = 0 + +.loop: + + movl $0xff, %ecx # a = 255 + cmpl $0, 20(%ebp) # if alpha == NULL + je .noalpha + movl 20(%ebp), %edi # a = alpha[ j ] + movb (%edi,%edx), %cl +.noalpha: + + movl 24(%ebp), %eax # mix = weight + cmpl $0, 28(%ebp) # if luma == NULL + je .noluma + movl 28(%ebp), %edi # mix = ... + movl %edx, %ebx + #sall $1, %ebx + movw (%edi,%ebx), %ax # luma[ j*2 ] + cmpw %cx, %ax + jl .luma0 + movl %eax, %ebx + addl 32(%ebp), %ebx # + softness + cmpw %bx, %cx + jge .luma1 + /* TODO: linear interpolate between edges eax and ebx */ + jmp .noluma +.luma0: + movl $0, %eax + jmp .noluma +.luma1: + movl $0xffff, %eax +.noluma: + shrl $8, %eax + + movl %edx, %ebx # edx will be destroyed by mulw + mull %ecx # mix = mix * a... + movl %ebx, %edx # restore edx + shrl $8, %eax # >>8 + andl $0xff, %eax + +/* put alpha and (1-alpha) into mm0 */ +/* 0 aa 0 1-a 0 aa 0 1-a */ + + /* duplicate word */ + movl %eax, %ecx + shll $16, %ecx + orl %eax, %ecx + + movd %ecx, %mm1 + + /* (1 << 16) - mix */ + movl $0x000000ff, %ecx + subl %eax, %ecx + andl $0xff, %ecx + + /* duplicate word */ + movl %ecx, %eax + shll $16, %eax + orl %eax, %ecx + + movd %ecx, %mm0 + + /* unpack words into double words */ + punpcklwd %mm1, %mm0 + +/* put src yuv and dest yuv into mm1 */ +/* 0 UVs 0 UVd 0 Ys 0 Yd */ + + movl 12(%ebp), %edi # get src + movb (%edi), %cl + shll $8, %ecx + movb 1(%edi), %al + shll $24, %eax + orl %eax, %ecx + + movb (%esi), %al # get dest + orl %eax, %ecx + movb 1(%esi), %al + shll $16, %eax + orl %eax, %ecx + + movd %ecx, %mm1 + punpcklbw %mm4, %mm1 + +/* alpha composite */ + pmaddwd %mm1, %mm0 + psrld $8, %mm0 + +/* store result */ + movd %mm0, %eax + movb %al, (%esi) + pextrw $2, %mm0, %eax + movl $128, %eax + movb %al, 1(%esi) + +/* for..next */ + addl $1, %edx # j++ + cmpl %edx, 16(%ebp) # if ( j == width_src ) + je .out + + addl $2, %esi + addl $2, 12(%ebp) + + jmp .loop + +.out: + emms + leal -40(%ebp),%esp + popl %ebx + popl %esi + popl %edi + movl %ebp,%esp + popl %ebp + ret + + +/********************************************/ + +.align 8 +#if !defined(__MINGW32__) && !defined(__CYGWIN__) +.globl composite_have_mmx + .type composite_have_mmx,@function +composite_have_mmx: +#else +.globl _composite_have_mmx +_composite_have_mmx: +#endif + + push %ebx + +# Check if bit 21 in flags word is writeable + + pushfl + popl %eax + movl %eax,%ebx + xorl $0x00200000, %eax + pushl %eax + popfl + pushfl + popl %eax + + cmpl %eax, %ebx + + je .notfound + +# OK, we have CPUID + + movl $1, %eax + cpuid + + test $0x00800000, %edx + jz .notfound + + movl $1, %eax + jmp .out2 + +.notfound: + movl $0, %eax +.out2: + popl %ebx + ret diff --git a/src/modules/core/filter_resize.c b/src/modules/core/filter_resize.c index 7a1f5dd..435cf12 100644 --- a/src/modules/core/filter_resize.c +++ b/src/modules/core/filter_resize.c @@ -130,7 +130,7 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format * } } - return 0; + return error; } /** Filter processing. diff --git a/src/modules/core/transition_composite.c b/src/modules/core/transition_composite.c index 48f2d8a..48d1c6e 100644 --- a/src/modules/core/transition_composite.c +++ b/src/modules/core/transition_composite.c @@ -27,6 +27,14 @@ #include #include +typedef void ( *composite_line_fn )( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness ); + +/* mmx function declarations */ +#ifdef USE_MMX + void composite_line_yuv_mmx( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness ); + int composite_have_mmx( void ); +#endif + /** Geometry struct. */ @@ -155,9 +163,11 @@ static void geometry_calculate( struct geometry_s *output, struct geometry_s *in output->mix = in->mix + ( out->mix - in->mix ) * position; output->distort = in->distort; - output->x = ( int )floor( output->x ) & 0xfffffffe; - output->w = ( int )floor( output->w ) & 0xfffffffe; - output->sw &= 0xfffffffe; + // DRD> These break on negative values. I do not think they are needed + // since yuv_composite takes care of YUYV group alignment + //output->x = ( int )floor( output->x ) & 0xfffffffe; + //output->w = ( int )floor( output->w ) & 0xfffffffe; + //output->sw &= 0xfffffffe; } void transition_destroy_keys( void *arg ) @@ -481,22 +491,48 @@ static void luma_read_yuv422( uint8_t *image, uint16_t **map, int width, int hei *p++ = ( image[ i ] - 16 ) * 299; // 299 = 65535 / 219 } + +/** Composite a source line over a destination line +*/ + +static inline +void composite_line_yuv( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness ) +{ + register int j; + int a, mix; + + for ( j = 0; j < width_src; j ++ ) + { + a = ( alpha == NULL ) ? 255 : *alpha ++; + mix = ( luma == NULL ) ? weight : linearstep( luma[ j ], luma[ j ] + softness, weight ); + mix = ( mix * ( a + 1 ) ) >> 8; + *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16; + dest++; + *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16; + dest++; + } +} + /** Composite function. */ -static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int bpp, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness ) +static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness, composite_line_fn line_fn ) { int ret = 0; - int i, j; + int i; int x_src = 0, y_src = 0; int32_t weight = ( 1 << 16 ) * ( geometry.mix / 100 ); - int stride_src = width_src * bpp; - int stride_dest = width_dest * bpp; + int step = ( field > -1 ) ? 2 : 1; + int bpp = 2; + int stride_src = width_src * bpp * step; + int stride_dest = width_dest * bpp * step; + int alpha_stride = stride_src / bpp; // Adjust to consumer scale int x = geometry.x * width_dest / geometry.nw; int y = geometry.y * height_dest / geometry.nh; + // Align x to a full YUYV group x &= 0xfffffffe; width_src &= 0xfffffffe; @@ -565,38 +601,13 @@ static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int height_src--; } - uint8_t *p = p_src; - uint8_t *q = p_dest; - uint8_t *o = p_dest; - uint16_t *l = p_luma; - uint8_t *z = p_alpha; - - uint8_t a; - int32_t current_weight; - int32_t value; - int step = ( field > -1 ) ? 2 : 1; - - stride_src = stride_src * step; - int alpha_stride = stride_src / bpp; - stride_dest = stride_dest * step; + if ( line_fn == NULL ) + line_fn = composite_line_yuv; // now do the compositing only to cropped extents for ( i = 0; i < height_src; i += step ) { - p = p_src; - q = p_dest; - o = q; - l = p_luma; - z = p_alpha; - - for ( j = 0; j < width_src; j ++ ) - { - a = ( z == NULL ) ? 255 : *z ++; - current_weight = ( l == NULL ) ? weight : linearstep( l[ j ], l[ j ] + softness, weight ); - value = ( current_weight * ( a + 1 ) ) >> 8; - *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16; - *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16; - } + line_fn( p_dest, p_src, width_src, p_alpha, weight, p_luma, softness ); p_src += stride_src; p_dest += stride_dest; @@ -805,7 +816,7 @@ static int get_b_frame_image( mlt_transition this, mlt_frame b_frame, uint8_t ** x -= x % 2; // optimization points - no work to do - if ( *width <= 0 || *height <= 0 ) + if ( *width < 1 || *height < 1 ) return 1; if ( ( x < 0 && -x >= *width ) || ( y < 0 && -y >= *height ) ) @@ -895,7 +906,7 @@ mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame, mlt_pos h = result.h * height / result.nh; x &= 0xfffffffe; - w &= 0xfffffffe; + //w &= 0xfffffffe; // Now we need to create a new destination image dest = mlt_pool_alloc( w * h * 2 ); @@ -979,7 +990,6 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f { uint8_t *dest = *image; uint8_t *src = image_b; - int bpp = 2; uint8_t *alpha = mlt_frame_get_alpha_mask( b_frame ); int progressive = mlt_properties_get_int( a_props, "progressive" ) || mlt_properties_get_int( a_props, "consumer_progressive" ) || @@ -988,6 +998,7 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f int32_t luma_softness = mlt_properties_get_double( properties, "softness" ) * ( 1 << 16 ); uint16_t *luma_bitmap = get_luma( properties, width_b, height_b ); + composite_line_fn line_fn = mlt_properties_get_int( properties, "_MMX" ) ? composite_line_yuv_mmx : composite_line_yuv; for ( field = 0; field < ( progressive ? 1 : 2 ); field++ ) { @@ -1001,7 +1012,7 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f alignment_calculate( &result ); // Composite the b_frame on the a_frame - composite_yuv( dest, *width, *height, bpp, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness ); + composite_yuv( dest, *width, *height, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness, line_fn ); } } } @@ -1030,11 +1041,19 @@ mlt_transition transition_composite_init( char *arg ) mlt_transition this = calloc( sizeof( struct mlt_transition_s ), 1 ); if ( this != NULL && mlt_transition_init( this, NULL ) == 0 ) { + mlt_properties properties = mlt_transition_properties( this ); + this->process = composite_process; - mlt_properties_set( mlt_transition_properties( this ), "start", arg != NULL ? arg : "85%,5%:10%x10%" ); + + // Default starting motion and zoom + mlt_properties_set( properties, "start", arg != NULL ? arg : "85%,5%:10%x10%" ); // Default factory - mlt_properties_set( mlt_transition_properties( this ), "factory", "fezzik" ); + mlt_properties_set( properties, "factory", "fezzik" ); + +#ifdef USE_MMX + //mlt_properties_set_int( properties, "_MMX", composite_have_mmx() ); +#endif } return this; } diff --git a/src/modules/gtk2/filter_rescale.c b/src/modules/gtk2/filter_rescale.c index 5151a05..c57e33d 100644 --- a/src/modules/gtk2/filter_rescale.c +++ b/src/modules/gtk2/filter_rescale.c @@ -38,6 +38,8 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format * *width = 720; if ( *height == 0 ) *height = 576; + if ( *width < 2 || *height < 6 ) + return 1; mlt_properties properties = mlt_frame_properties( this ); int iwidth = *width; -- 1.7.4.4