From 5f0547204a5a08185e9d8162016cc2ec9a670486 Mon Sep 17 00:00:00 2001 From: ddennedy Date: Fri, 30 Jan 2004 13:51:05 +0000 Subject: [PATCH] some bugfixes and rescale filter git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@101 d19143bc-622f-0410-bfdd-b5b2a6649095 --- src/framework/mlt_frame.c | 22 +- src/modules/avformat/producer_avformat.c | 1 + src/modules/gtk2/Makefile | 21 +- src/modules/gtk2/configure | 4 + src/modules/gtk2/factory.c | 3 + src/modules/gtk2/filter_rescale.c | 143 ++++++ src/modules/gtk2/filter_rescale.h | 28 ++ src/modules/gtk2/have_mmx.S | 53 ++ src/modules/gtk2/pixops.c | 781 ++++++++++++++++++++++++++++++ src/modules/gtk2/pixops.h | 69 +++ src/modules/gtk2/producer_pango.c | 2 +- src/modules/gtk2/scale_line_22_33_mmx.S | 183 +++++++ src/modules/vorbis/Makefile | 2 +- 13 files changed, 1295 insertions(+), 17 deletions(-) create mode 100644 src/modules/gtk2/filter_rescale.c create mode 100644 src/modules/gtk2/filter_rescale.h create mode 100644 src/modules/gtk2/have_mmx.S create mode 100644 src/modules/gtk2/pixops.c create mode 100644 src/modules/gtk2/pixops.h create mode 100644 src/modules/gtk2/scale_line_22_33_mmx.S diff --git a/src/framework/mlt_frame.c b/src/framework/mlt_frame.c index 3f583d9..632d73a 100644 --- a/src/framework/mlt_frame.c +++ b/src/framework/mlt_frame.c @@ -723,15 +723,18 @@ int mlt_frame_mix_audio( mlt_frame this, mlt_frame that, float weight, int16_t * int16_t *src, *dest; //static int16_t *extra_src = NULL, *extra_dest = NULL; static int extra_src_samples = 0, extra_dest_samples = 0; - int frequency_src = 0, frequency_dest = 0; - int channels_src = 0, channels_dest = 0; - int samples_src = 0, samples_dest = 0; + int frequency_src = *channels, frequency_dest = *channels; + int channels_src = *channels, channels_dest = *channels; + int samples_src = *samples, samples_dest = *samples; int i, j; + double d = 0, s = 0; mlt_frame_get_audio( this, &p_dest, format, &frequency_dest, &channels_dest, &samples_dest ); - //fprintf( stderr, "frame dest samples %d channels %d position %f\n", samples_dest, channels_dest, mlt_properties_get_position( mlt_frame_properties( this ), "position" ) ); + fprintf( stderr, "frame dest samples %d channels %d position %lld\n", samples_dest, channels_dest, mlt_properties_get_position( mlt_frame_properties( this ), "position" ) ); mlt_frame_get_audio( that, &p_src, format, &frequency_src, &channels_src, &samples_src ); - //fprintf( stderr, "frame src samples %d channels %d\n", samples_src, channels_src ); + fprintf( stderr, "frame src samples %d channels %d\n", samples_src, channels_src ); + src = p_src; + dest = p_dest; if ( channels_src > 6 ) channels_src = 0; if ( channels_dest > 6 ) @@ -759,9 +762,6 @@ int mlt_frame_mix_audio( mlt_frame this, mlt_frame that, float weight, int16_t * } else src = p_src; -#else - src = p_src; - dest = p_dest; #endif // determine number of samples to process @@ -778,8 +778,10 @@ int mlt_frame_mix_audio( mlt_frame this, mlt_frame that, float weight, int16_t * { for ( j = 0; j < *channels; j++ ) { - double d = (double) dest[ i * channels_dest + j ]; - double s = (double) src[ i * channels_src + j ]; + if ( j < channels_dest ) + d = (double) dest[ i * channels_dest + j ]; + if ( j < channels_src ) + s = (double) src[ i * channels_src + j ]; dest[ i * channels_dest + j ] = s * weight + d * ( 1.0 - weight ); } } diff --git a/src/modules/avformat/producer_avformat.c b/src/modules/avformat/producer_avformat.c index d19c165..bbefd2a 100644 --- a/src/modules/avformat/producer_avformat.c +++ b/src/modules/avformat/producer_avformat.c @@ -699,6 +699,7 @@ static int producer_get_audio( mlt_frame frame, int16_t **buffer, mlt_audio_form } // Now handle the audio if we have enough + if ( audio_used >= *samples ) { *buffer = malloc( *samples * *channels * sizeof( int16_t ) ); diff --git a/src/modules/gtk2/Makefile b/src/modules/gtk2/Makefile index 3130918..cd001dd 100644 --- a/src/modules/gtk2/Makefile +++ b/src/modules/gtk2/Makefile @@ -3,9 +3,14 @@ TARGET = ../libmltgtk2.so OBJS = factory.o \ producer_pixbuf.o \ - producer_pango.o + producer_pango.o \ + pixops.o \ + filter_rescale.o -CFLAGS = `pkg-config gdk-pixbuf-2.0 --cflags` `pkg-config pangoft2 --cflags` -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread +ASM_OBJS = have_mmx.o \ + scale_line_22_33_mmx.o + +CFLAGS = -O3 -DUSE_MMX `pkg-config gdk-pixbuf-2.0 --cflags` `pkg-config pangoft2 --cflags` -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread LDFLAGS = `pkg-config gdk-pixbuf-2.0 --libs` `pkg-config pangoft2 --libs` @@ -13,8 +18,14 @@ SRCS := $(OBJS:.o=.c) all: $(TARGET) -$(TARGET): $(OBJS) - $(CC) -shared -o $@ $(OBJS) $(LDFLAGS) +$(TARGET): $(OBJS) $(ASM_OBJS) + $(CC) -shared -o $@ $(OBJS) $(ASM_OBJS) $(LDFLAGS) + +have_mmx.o: + $(CC) -o $@ -c have_mmx.S + +scale_line_22_33_mmx.o: + $(CC) -o $@ -c scale_line_22_33_mmx.S depend: $(SRCS) $(CC) -MM $(CFLAGS) $^ 1>.depend @@ -23,7 +34,7 @@ dist-clean: clean rm -f .depend clean: - rm -f $(OBJS) $(TARGET) + rm -f $(OBJS) $(ASM_OBJS) $(TARGET) ifneq ($(wildcard .depend),) include .depend diff --git a/src/modules/gtk2/configure b/src/modules/gtk2/configure index 04a232a..34f5515 100755 --- a/src/modules/gtk2/configure +++ b/src/modules/gtk2/configure @@ -8,5 +8,9 @@ pixbuf libmltgtk2.so pango libmltgtk2.so EOF +cat << EOF >> ../filters.dat +rescale libmltgtk2.so +EOF + fi diff --git a/src/modules/gtk2/factory.c b/src/modules/gtk2/factory.c index 38e1692..bbddaa2 100644 --- a/src/modules/gtk2/factory.c +++ b/src/modules/gtk2/factory.c @@ -22,6 +22,7 @@ #include "producer_pixbuf.h" #include "producer_pango.h" +#include "filter_rescale.h" void *mlt_create_producer( char *id, void *arg ) { @@ -34,6 +35,8 @@ void *mlt_create_producer( char *id, void *arg ) void *mlt_create_filter( char *id, void *arg ) { + if ( !strcmp( id, "rescale" ) ) + return filter_rescale_init( arg ); return NULL; } diff --git a/src/modules/gtk2/filter_rescale.c b/src/modules/gtk2/filter_rescale.c new file mode 100644 index 0000000..cf0905e --- /dev/null +++ b/src/modules/gtk2/filter_rescale.c @@ -0,0 +1,143 @@ +/* + * filter_rescale.c -- scale the producer video frame size to match the consumer + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Dan Dennedy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include "filter_rescale.h" +#include "pixops.h" + +#include + +#include +#include +#include +#include + +/** Do it :-). +*/ + +static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable ) +{ + if ( *width == 0 ) + *width = 720; + if ( *height == 0 ) + *height = 576; + + mlt_properties properties = mlt_frame_properties( this ); + int iwidth = *width; + int iheight = *height; + int owidth = *width; + int oheight = *height; + uint8_t *input = NULL; + + char *interps = mlt_properties_get( properties, "rescale.interp" ); + int interp = PIXOPS_INTERP_BILINEAR; + if ( strcmp( interps, "nearest" ) == 0 ) + interp = PIXOPS_INTERP_NEAREST; + else if ( strcmp( interps, "tiles" ) == 0 ) + interp = PIXOPS_INTERP_TILES; + else if ( strcmp( interps, "hyper" ) == 0 ) + interp = PIXOPS_INTERP_HYPER; + + mlt_frame_get_image( this, &input, format, &iwidth, &iheight, 0 ); + + // If width and height are correct, don't do anything + if ( iwidth != owidth || iheight != oheight ) + { + if ( *format == mlt_image_yuv422 ) + { + // Create the output image + uint8_t *output = malloc( owidth * oheight * 2 ); + + // Calculate strides + int istride = iwidth * 2; + int ostride = owidth * 2; + + yuv422_scale_simple( output, owidth, oheight, ostride, input, iwidth, iheight, istride, interp ); + + // Now update the frame + mlt_properties_set_data( properties, "image", output, owidth * oheight * 2, free, NULL ); + mlt_properties_set_int( properties, "width", owidth ); + mlt_properties_set_int( properties, "height", oheight ); + + // Return the output + *image = output; + } + else if ( *format == mlt_image_rgb24 || *format == mlt_image_rgb24a ) + { + int bpp = (*format == mlt_image_rgb24a ? 4 : 3 ); + GdkPixbuf *pixbuf = gdk_pixbuf_new_from_data( input, GDK_COLORSPACE_RGB, + (*format == mlt_image_rgb24a), 24, iwidth, iheight, + iwidth * bpp, NULL, NULL ); + GdkPixbuf *scaled = gdk_pixbuf_scale_simple( pixbuf, owidth, oheight, interp ); + + // Create the output image + uint8_t *output = malloc( owidth * oheight * bpp ); + + int i; + for ( i = 0; i < oheight; i++ ) + memcpy( output + i * owidth * bpp, + gdk_pixbuf_get_pixels( scaled ) + i * gdk_pixbuf_get_rowstride( scaled ), + gdk_pixbuf_get_width( scaled ) * bpp ); + + g_object_unref( pixbuf ); + g_object_unref( scaled ); + + // Now update the frame + mlt_properties_set_data( properties, "image", output, owidth * oheight * bpp, free, NULL ); + mlt_properties_set_int( properties, "width", owidth ); + mlt_properties_set_int( properties, "height", oheight ); + + // Return the output + *image = output; + } + } + else + *image = input; + + return 0; +} + +/** Filter processing. +*/ + +static mlt_frame filter_process( mlt_filter this, mlt_frame frame ) +{ + mlt_frame_push_get_image( frame, filter_get_image ); + mlt_properties_set( mlt_frame_properties( frame ), "rescale.interp", + mlt_properties_get( mlt_filter_properties( this ), "interpolation" ) ); + return frame; +} + +/** Constructor for the filter. +*/ + +mlt_filter filter_rescale_init( char *arg ) +{ + mlt_filter this = calloc( sizeof( struct mlt_filter_s ), 1 ); + if ( mlt_filter_init( this, this ) == 0 ) + { + this->process = filter_process; + if ( arg != NULL ) + mlt_properties_set( mlt_filter_properties( this ), "interpolation", arg ); + else + mlt_properties_set( mlt_filter_properties( this ), "interpolation", "bilinear" ); + } + return this; +} + diff --git a/src/modules/gtk2/filter_rescale.h b/src/modules/gtk2/filter_rescale.h new file mode 100644 index 0000000..58340ff --- /dev/null +++ b/src/modules/gtk2/filter_rescale.h @@ -0,0 +1,28 @@ +/* + * filter_rescale.h -- scale the producer video frame size to match the consumer + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Dan Dennedy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _FILTER_RESCALE_H_ +#define _FILTER_RESCALE_H_ + +#include + +extern mlt_filter filter_rescale_init( char *arg ); + +#endif diff --git a/src/modules/gtk2/have_mmx.S b/src/modules/gtk2/have_mmx.S new file mode 100644 index 0000000..4f8f5d8 --- /dev/null +++ b/src/modules/gtk2/have_mmx.S @@ -0,0 +1,53 @@ + .file "have_mmx.S" + .version "01.01" +gcc2_compiled.: +.text + .align 16 + +#if !defined(__MINGW32__) && !defined(__CYGWIN__) + +.globl pixops_have_mmx + .type pixops_have_mmx,@function +pixops_have_mmx: + +#else + +.globl _pixops_have_mmx +_pixops_have_mmx: + +#endif + + push %ebx + +# Check if bit 21 in flags word is writeable + + pushfl + popl %eax + movl %eax,%ebx + xorl $0x00200000, %eax + pushl %eax + popfl + pushfl + popl %eax + + cmpl %eax, %ebx + + je .notfound + +# OK, we have CPUID + + movl $1, %eax + cpuid + + test $0x00800000, %edx + jz .notfound + + movl $1, %eax + jmp .out + +.notfound: + movl $0, %eax +.out: + popl %ebx + ret + diff --git a/src/modules/gtk2/pixops.c b/src/modules/gtk2/pixops.c new file mode 100644 index 0000000..9fe02b0 --- /dev/null +++ b/src/modules/gtk2/pixops.c @@ -0,0 +1,781 @@ +/* GdkPixbuf library - Scaling and compositing functions + * + * Copyright (C) 1999 The Free Software Foundation + * + * Author: Owen Taylor + * Modified for YUV422 by Dan Dennedy + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include +#include +#include + +#include "pixops.h" + +#define SUBSAMPLE_BITS 4 +#define SUBSAMPLE (1 << SUBSAMPLE_BITS) +#define SUBSAMPLE_MASK ((1 << SUBSAMPLE_BITS)-1) +#define SCALE_SHIFT 16 + +typedef struct _PixopsFilter PixopsFilter; +typedef struct _PixopsFilterDimension PixopsFilterDimension; + +struct _PixopsFilterDimension +{ + int n; + double offset; + double *weights; +}; + +struct _PixopsFilter +{ + PixopsFilterDimension x; + PixopsFilterDimension y; + double overall_alpha; +}; + +typedef guchar *( *PixopsLineFunc ) ( int *weights, int n_x, int n_y, + guchar *dest, int dest_x, guchar *dest_end, + guchar **src, + int x_init, int x_step, int src_width ); + +typedef void ( *PixopsPixelFunc ) ( guchar *dest, guint y1, guint cr, guint y2, guint cb ); + + +/* mmx function declarations */ +#ifdef USE_MMX +guchar *pixops_scale_line_22_33_mmx ( guint32 weights[ 16 ][ 8 ], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init ); +int pixops_have_mmx ( void ); +#endif + +static inline int +get_check_shift ( int check_size ) +{ + int check_shift = 0; + g_return_val_if_fail ( check_size >= 0, 4 ); + + while ( !( check_size & 1 ) ) + { + check_shift++; + check_size >>= 1; + } + + return check_shift; +} + +static inline void +pixops_scale_nearest ( guchar *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + const guchar *src_buf, + int src_width, + int src_height, + int src_rowstride, + double scale_x, + double scale_y ) +{ + int i, j; + int x; + int x_step = ( 1 << SCALE_SHIFT ) / scale_x; + int y_step = ( 1 << SCALE_SHIFT ) / scale_y; + + for ( i = 0; i < ( render_y1 - render_y0 ); i++ ) + { + const guchar *src = src_buf + ( ( ( i + render_y0 ) * y_step + y_step / 2 ) >> SCALE_SHIFT ) * src_rowstride; + guchar *dest = dest_buf + i * dest_rowstride; + + x = render_x0 * x_step + x_step / 2; + + for ( j = 0; j < ( render_x1 - render_x0 ); j++ ) + { + const guchar *p = src + ( x >> SCALE_SHIFT ) * 4; + guint32 *p32; + + p32 = ( guint32 * ) dest; + *p32 = *( ( guint32 * ) p ); + + dest += 4; + x += x_step; + } + } +} + + +static inline guchar * +scale_line ( int *weights, int n_x, int n_y, + guchar *dest, int dest_x, guchar *dest_end, + guchar **src, + int x_init, int x_step, int src_width ) +{ + int x = x_init; + int i, j; + + while ( dest < dest_end ) + { + int x_scaled = x >> SCALE_SHIFT; + int *pixel_weights; + + pixel_weights = weights + ( ( x >> ( SCALE_SHIFT - SUBSAMPLE_BITS ) ) & SUBSAMPLE_MASK ) * n_x * n_y; + + unsigned int y1 = 0, cb = 0, y2 = 0, cr = 0; + for ( i = 0; i < n_y; i++ ) + { + guchar *q = src[ i ] + x_scaled * 4; + int *line_weights = pixel_weights + n_x * i; + + for ( j = 0; j < n_x; j++ ) + { + unsigned int ta = line_weights[ j ]; + + y1 += ta * q[ 0 ]; + cb += ta * q[ 1 ]; + y2 += ta * q[ 2 ]; + cr += ta * q[ 3 ]; + + q += 4; + } + } + + dest[ 0 ] = ( y1 + 0xffff ) >> 16; + dest[ 1 ] = ( cb + 0xffff ) >> 16; + dest[ 2 ] = ( y2 + 0xffff ) >> 16; + dest[ 3 ] = ( cr + 0xffff ) >> 16; + + dest += 4; + + x += x_step; + } + + return dest; +} + +#ifdef USE_MMX +static inline guchar * +scale_line_22_33_mmx_stub ( int *weights, int n_x, int n_y, + guchar *dest, int dest_x, guchar *dest_end, + guchar **src, + int x_init, int x_step, int src_width ) +{ + guint32 mmx_weights[ 16 ][ 8 ]; + int j; + + for ( j = 0; j < 16; j++ ) + { + mmx_weights[ j ][ 0 ] = 0x00010001 * ( weights[ 4 * j ] >> 8 ); + mmx_weights[ j ][ 1 ] = 0x00010001 * ( weights[ 4 * j ] >> 8 ); + mmx_weights[ j ][ 2 ] = 0x00010001 * ( weights[ 4 * j + 1 ] >> 8 ); + mmx_weights[ j ][ 3 ] = 0x00010001 * ( weights[ 4 * j + 1 ] >> 8 ); + mmx_weights[ j ][ 4 ] = 0x00010001 * ( weights[ 4 * j + 2 ] >> 8 ); + mmx_weights[ j ][ 5 ] = 0x00010001 * ( weights[ 4 * j + 2 ] >> 8 ); + mmx_weights[ j ][ 6 ] = 0x00010001 * ( weights[ 4 * j + 3 ] >> 8 ); + mmx_weights[ j ][ 7 ] = 0x00010001 * ( weights[ 4 * j + 3 ] >> 8 ); + } + + return pixops_scale_line_22_33_mmx ( mmx_weights, dest, src[ 0 ], src[ 1 ], x_step, dest_end, x_init ); +} +#endif /* USE_MMX */ + +static inline guchar * +scale_line_22_33 ( int *weights, int n_x, int n_y, + guchar *dest, int dest_x, guchar *dest_end, + guchar **src, + int x_init, int x_step, int src_width ) +{ + int x = x_init; + guchar *src0 = src[ 0 ]; + guchar *src1 = src[ 1 ]; + + while ( dest < dest_end ) + { + unsigned int y1, cb, y2, cr; + int x_scaled = x >> SCALE_SHIFT; + int *pixel_weights; + guchar *q0, *q1; + int w1, w2, w3, w4; + + q0 = src0 + x_scaled * 4; + q1 = src1 + x_scaled * 4; + + pixel_weights = weights + ( ( x >> ( SCALE_SHIFT - SUBSAMPLE_BITS ) ) & SUBSAMPLE_MASK ) * 4; + + w1 = pixel_weights[ 0 ]; + w2 = pixel_weights[ 1 ]; + w3 = pixel_weights[ 2 ]; + w4 = pixel_weights[ 3 ]; + + y1 = w1 * q0[ 0 ]; + cb = w1 * q0[ 1 ]; + y2 = w1 * q0[ 2 ]; + cr = w1 * q0[ 3 ]; + + y1 += w2 * q0[ 4 ]; + cb += w2 * q0[ 5 ]; + y2 += w2 * q0[ 6 ]; + cr += w2 * q0[ 7 ]; + + y1 += w3 * q1[ 0 ]; + cb += w3 * q1[ 1 ]; + y2 += w3 * q1[ 2 ]; + cr += w3 * q1[ 3 ]; + + y1 += w4 * q1[ 4 ]; + cb += w4 * q1[ 5 ]; + y2 += w4 * q1[ 6 ]; + cr += w4 * q1[ 7 ]; + + dest[ 0 ] = ( y1 + 0x8000 ) >> 16; + dest[ 1 ] = ( cb + 0x8000 ) >> 16; + dest[ 2 ] = ( y2 + 0x8000 ) >> 16; + dest[ 3 ] = ( cr + 0x8000 ) >> 16; + + dest += 4; + x += x_step; + } + + return dest; +} + + +static inline void +process_pixel ( int *weights, int n_x, int n_y, + guchar *dest, int dest_x, int dest_channels, + guchar **src, int src_channels, + int x_start, int src_width ) +{ + unsigned int y1 = 0, cb = 0, y2 = 0, cr = 0; + int i, j; + + for ( i = 0; i < n_y; i++ ) + { + int *line_weights = weights + n_x * i; + + for ( j = 0; j < n_x; j++ ) + { + unsigned int ta; + guchar *q; + + if ( x_start + j < 0 ) + q = src[ i ]; + else if ( x_start + j < src_width ) + q = src[ i ] + ( x_start + j ) * src_channels; + else + q = src[ i ] + ( src_width - 1 ) * src_channels; + + ta = 0xff * line_weights[ j ]; + + y1 += ta * q[ 0 ]; + cb += ta * q[ 1 ]; + y2 += ta * q[ 2 ]; + cr += ta * q[ 3 ]; + } + } + + dest[ 0 ] = ( y1 + 0xffffff ) >> 24; + dest[ 1 ] = ( cb + 0xffffff ) >> 24; + dest[ 2 ] = ( y2 + 0xffffff ) >> 24; + dest[ 3 ] = ( cr + 0xffffff ) >> 24; +} + + +static inline void +correct_total ( int *weights, + int n_x, + int n_y, + int total, + double overall_alpha ) +{ + int correction = ( int ) ( 0.5 + 65536 * overall_alpha ) - total; + int remaining, c, d, i; + + if ( correction != 0 ) + { + remaining = correction; + for ( d = 1, c = correction; c != 0 && remaining != 0; d++, c = correction / d ) + for ( i = n_x * n_y - 1; i >= 0 && c != 0 && remaining != 0; i-- ) + if ( *( weights + i ) + c >= 0 ) + { + *( weights + i ) += c; + remaining -= c; + if ( ( 0 < remaining && remaining < c ) || + ( 0 > remaining && remaining > c ) ) + c = remaining; + } + } +} + + +static inline int * +make_filter_table ( PixopsFilter *filter ) +{ + int i_offset, j_offset; + int n_x = filter->x.n; + int n_y = filter->y.n; + int *weights = g_new ( int, SUBSAMPLE * SUBSAMPLE * n_x * n_y ); + + for ( i_offset = 0; i_offset < SUBSAMPLE; i_offset++ ) + for ( j_offset = 0; j_offset < SUBSAMPLE; j_offset++ ) + { + double weight; + int *pixel_weights = weights + ( ( i_offset * SUBSAMPLE ) + j_offset ) * n_x * n_y; + int total = 0; + int i, j; + + for ( i = 0; i < n_y; i++ ) + for ( j = 0; j < n_x; j++ ) + { + weight = filter->x.weights[ ( j_offset * n_x ) + j ] * + filter->y.weights[ ( i_offset * n_y ) + i ] * + filter->overall_alpha * 65536 + 0.5; + + total += ( int ) weight; + + *( pixel_weights + n_x * i + j ) = weight; + } + + correct_total ( pixel_weights, n_x, n_y, total, filter->overall_alpha ); + } + + return weights; +} + + +static inline void +pixops_process ( guchar *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + gboolean dest_has_alpha, + const guchar *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + gboolean src_has_alpha, + double scale_x, + double scale_y, + int check_x, + int check_y, + int check_size, + guint32 color1, + guint32 color2, + PixopsFilter *filter, + PixopsLineFunc line_func ) +{ + int i, j; + int x, y; /* X and Y position in source (fixed_point) */ + + guchar **line_bufs = g_new ( guchar *, filter->y.n ); + int *filter_weights = make_filter_table ( filter ); + + int x_step = ( 1 << SCALE_SHIFT ) / scale_x; /* X step in source (fixed point) */ + int y_step = ( 1 << SCALE_SHIFT ) / scale_y; /* Y step in source (fixed point) */ + + int check_shift = check_size ? get_check_shift ( check_size ) : 0; + + int scaled_x_offset = floor ( filter->x.offset * ( 1 << SCALE_SHIFT ) ); + + /* Compute the index where we run off the end of the source buffer. The furthest + * source pixel we access at index i is: + * + * ((render_x0 + i) * x_step + scaled_x_offset) >> SCALE_SHIFT + filter->x.n - 1 + * + * So, run_end_index is the smallest i for which this pixel is src_width, i.e, for which: + * + * (i + render_x0) * x_step >= ((src_width - filter->x.n + 1) << SCALE_SHIFT) - scaled_x_offset + * + */ +#define MYDIV(a,b) ((a) > 0 ? (a) / (b) : ((a) - (b) + 1) / (b)) /* Division so that -1/5 = -1 */ + + int run_end_x = ( ( ( src_width - filter->x.n + 1 ) << SCALE_SHIFT ) - scaled_x_offset ); + int run_end_index = MYDIV ( run_end_x + x_step - 1, x_step ) - render_x0; + run_end_index = MIN ( run_end_index, render_x1 - render_x0 ); + + y = render_y0 * y_step + floor ( filter->y.offset * ( 1 << SCALE_SHIFT ) ); + for ( i = 0; i < ( render_y1 - render_y0 ); i++ ) + { + int dest_x; + int y_start = y >> SCALE_SHIFT; + int x_start; + int *run_weights = filter_weights + + ( ( y >> ( SCALE_SHIFT - SUBSAMPLE_BITS ) ) & SUBSAMPLE_MASK ) * + filter->x.n * filter->y.n * SUBSAMPLE; + guchar *new_outbuf; + guint32 tcolor1, tcolor2; + + guchar *outbuf = dest_buf + dest_rowstride * i; + guchar *outbuf_end = outbuf + dest_channels * ( render_x1 - render_x0 ); + + if ( ( ( i + check_y ) >> check_shift ) & 1 ) + { + tcolor1 = color2; + tcolor2 = color1; + } + else + { + tcolor1 = color1; + tcolor2 = color2; + } + + for ( j = 0; j < filter->y.n; j++ ) + { + if ( y_start < 0 ) + line_bufs[ j ] = ( guchar * ) src_buf; + else if ( y_start < src_height ) + line_bufs[ j ] = ( guchar * ) src_buf + src_rowstride * y_start; + else + line_bufs[ j ] = ( guchar * ) src_buf + src_rowstride * ( src_height - 1 ); + + y_start++; + } + + dest_x = check_x; + x = render_x0 * x_step + scaled_x_offset; + x_start = x >> SCALE_SHIFT; + + while ( x_start < 0 && outbuf < outbuf_end ) + { + process_pixel ( run_weights + ( ( x >> ( SCALE_SHIFT - SUBSAMPLE_BITS ) ) & SUBSAMPLE_MASK ) * ( filter->x.n * filter->y.n ), + filter->x.n, filter->y.n, + outbuf, dest_x, dest_channels, + line_bufs, src_channels, + x >> SCALE_SHIFT, src_width ); + + x += x_step; + x_start = x >> SCALE_SHIFT; + dest_x++; + outbuf += dest_channels; + } + + new_outbuf = ( *line_func ) ( run_weights, filter->x.n, filter->y.n, + outbuf, dest_x, + dest_buf + dest_rowstride * i + run_end_index * dest_channels, + line_bufs, + x, x_step, src_width ); + + dest_x += ( new_outbuf - outbuf ) / dest_channels; + + x = ( dest_x - check_x + render_x0 ) * x_step + scaled_x_offset; + outbuf = new_outbuf; + + while ( outbuf < outbuf_end ) + { + process_pixel ( run_weights + ( ( x >> ( SCALE_SHIFT - SUBSAMPLE_BITS ) ) & SUBSAMPLE_MASK ) * ( filter->x.n * filter->y.n ), + filter->x.n, filter->y.n, + outbuf, dest_x, dest_channels, + line_bufs, src_channels, + x >> SCALE_SHIFT, src_width ); + + x += x_step; + dest_x++; + outbuf += dest_channels; + } + + y += y_step; + } + + g_free ( line_bufs ); + g_free ( filter_weights ); +} + + +/* Compute weights for reconstruction by replication followed by + * sampling with a box filter + */ +static inline void +tile_make_weights ( PixopsFilterDimension *dim, + double scale ) +{ + int n = ceil ( 1 / scale + 1 ); + double *pixel_weights = g_new ( double, SUBSAMPLE * n ); + int offset; + int i; + + dim->n = n; + dim->offset = 0; + dim->weights = pixel_weights; + + for ( offset = 0; offset < SUBSAMPLE; offset++ ) + { + double x = ( double ) offset / SUBSAMPLE; + double a = x + 1 / scale; + + for ( i = 0; i < n; i++ ) + { + if ( i < x ) + { + if ( i + 1 > x ) + * ( pixel_weights++ ) = ( MIN ( i + 1, a ) - x ) * scale; + else + *( pixel_weights++ ) = 0; + } + else + { + if ( a > i ) + * ( pixel_weights++ ) = ( MIN ( i + 1, a ) - i ) * scale; + else + *( pixel_weights++ ) = 0; + } + } + } +} + +/* Compute weights for a filter that, for minification + * is the same as 'tiles', and for magnification, is bilinear + * reconstruction followed by a sampling with a delta function. + */ +static inline void +bilinear_magnify_make_weights ( PixopsFilterDimension *dim, + double scale ) +{ + double * pixel_weights; + int n; + int offset; + int i; + + if ( scale > 1.0 ) /* Linear */ + { + n = 2; + dim->offset = 0.5 * ( 1 / scale - 1 ); + } + else /* Tile */ + { + n = ceil ( 1.0 + 1.0 / scale ); + dim->offset = 0.0; + } + + dim->n = n; + dim->weights = g_new ( double, SUBSAMPLE * n ); + + pixel_weights = dim->weights; + + for ( offset = 0; offset < SUBSAMPLE; offset++ ) + { + double x = ( double ) offset / SUBSAMPLE; + + if ( scale > 1.0 ) /* Linear */ + { + for ( i = 0; i < n; i++ ) + *( pixel_weights++ ) = ( ( ( i == 0 ) ? ( 1 - x ) : x ) / scale ) * scale; + } + else /* Tile */ + { + double a = x + 1 / scale; + + /* x + * ---------|--.-|----|--.-|------- SRC + * ------------|---------|--------- DEST + */ + for ( i = 0; i < n; i++ ) + { + if ( i < x ) + { + if ( i + 1 > x ) + * ( pixel_weights++ ) = ( MIN ( i + 1, a ) - x ) * scale; + else + *( pixel_weights++ ) = 0; + } + else + { + if ( a > i ) + * ( pixel_weights++ ) = ( MIN ( i + 1, a ) - i ) * scale; + else + *( pixel_weights++ ) = 0; + } + } + } + } +} + +/* Computes the integral from b0 to b1 of + * + * f(x) = x; 0 <= x < 1 + * f(x) = 0; otherwise + * + * We combine two of these to compute the convolution of + * a box filter with a triangular spike. + */ +static inline double +linear_box_half ( double b0, double b1 ) +{ + double a0, a1; + double x0, x1; + + a0 = 0.; + a1 = 1.; + + if ( a0 < b0 ) + { + if ( a1 > b0 ) + { + x0 = b0; + x1 = MIN ( a1, b1 ); + } + else + return 0; + } + else + { + if ( b1 > a0 ) + { + x0 = a0; + x1 = MIN ( a1, b1 ); + } + else + return 0; + } + + return 0.5 * ( x1 * x1 - x0 * x0 ); +} + +/* Compute weights for reconstructing with bilinear + * interpolation, then sampling with a box filter + */ +static inline void +bilinear_box_make_weights ( PixopsFilterDimension *dim, + double scale ) +{ + int n = ceil ( 1 / scale + 2.0 ); + double *pixel_weights = g_new ( double, SUBSAMPLE * n ); + double w; + int offset, i; + + dim->offset = -1.0; + dim->n = n; + dim->weights = pixel_weights; + + for ( offset = 0 ; offset < SUBSAMPLE; offset++ ) + { + double x = ( double ) offset / SUBSAMPLE; + double a = x + 1 / scale; + + for ( i = 0; i < n; i++ ) + { + w = linear_box_half ( 0.5 + i - a, 0.5 + i - x ); + w += linear_box_half ( 1.5 + x - i, 1.5 + a - i ); + + *( pixel_weights++ ) = w * scale; + } + } +} + + +static inline void +make_weights ( PixopsFilter *filter, + PixopsInterpType interp_type, + double scale_x, + double scale_y ) +{ + switch ( interp_type ) + { + case PIXOPS_INTERP_NEAREST: + g_assert_not_reached (); + break; + + case PIXOPS_INTERP_TILES: + tile_make_weights ( &filter->x, scale_x ); + tile_make_weights ( &filter->y, scale_y ); + break; + + case PIXOPS_INTERP_BILINEAR: + bilinear_magnify_make_weights ( &filter->x, scale_x ); + bilinear_magnify_make_weights ( &filter->y, scale_y ); + break; + + case PIXOPS_INTERP_HYPER: + bilinear_box_make_weights ( &filter->x, scale_x ); + bilinear_box_make_weights ( &filter->y, scale_y ); + break; + } +} + + +void +yuv422_scale ( guchar *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + gboolean dest_has_alpha, + const guchar *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + gboolean src_has_alpha, + double scale_x, + double scale_y, + PixopsInterpType interp_type ) +{ + PixopsFilter filter; + PixopsLineFunc line_func; + +#ifdef USE_MMX + gboolean found_mmx = pixops_have_mmx(); +#endif + + //g_return_if_fail ( !( dest_channels == 3 && dest_has_alpha ) ); + //g_return_if_fail ( !( src_channels == 3 && src_has_alpha ) ); + //g_return_if_fail ( !( src_has_alpha && !dest_has_alpha ) ); + + if ( scale_x == 0 || scale_y == 0 ) + return ; + + if ( interp_type == PIXOPS_INTERP_NEAREST ) + { + pixops_scale_nearest ( dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, + src_buf, src_width, src_height, src_rowstride, + scale_x, scale_y ); + return ; + } + + filter.overall_alpha = 1.0; + make_weights ( &filter, interp_type, scale_x, scale_y ); + +fprintf( stderr, "RESCALE: %d %d\n", filter.x.n, filter.y.n ); + if ( filter.x.n == 2 && filter.y.n == 2 ) + { +#ifdef USE_MMX + if ( 0 && found_mmx ) + line_func = scale_line_22_33_mmx_stub; + else +#endif + + line_func = scale_line_22_33; + } + else + line_func = scale_line; + + pixops_process ( dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, 0, 0, 0, 0, 0, + &filter, line_func ); + + g_free ( filter.x.weights ); + g_free ( filter.y.weights ); +} + diff --git a/src/modules/gtk2/pixops.h b/src/modules/gtk2/pixops.h new file mode 100644 index 0000000..37d6f37 --- /dev/null +++ b/src/modules/gtk2/pixops.h @@ -0,0 +1,69 @@ +/* GdkPixbuf library - Scaling and compositing functions + * + * Copyright (C) 1999 The Free Software Foundation + * + * Author: Owen Taylor + * Modified for YUV422 by: Dan Dennedy + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef PIXOPS_H +#define PIXOPS_H + +#include + +/* Interpolation modes; must match GdkInterpType */ +typedef enum { + PIXOPS_INTERP_NEAREST, + PIXOPS_INTERP_TILES, + PIXOPS_INTERP_BILINEAR, + PIXOPS_INTERP_HYPER +} PixopsInterpType; + +/* Scale src_buf from src_width / src_height by factors scale_x, scale_y + * and composite the portion corresponding to + * render_x, render_y, render_width, render_height in the new + * coordinate system into dest_buf starting at 0, 0 + */ +void yuv422_scale (guchar *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + int dest_has_alpha, + const guchar *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + int src_has_alpha, + double scale_x, + double scale_y, + PixopsInterpType interp_type); + +#define yuv422_scale_simple( dest_buf, dest_width, dest_height, dest_rowstride, src_buf, src_width, src_height, src_rowstride, interp_type ) \ + yuv422_scale( (dest_buf), 0, 0, \ + (dest_width)/2, (dest_height), \ + (dest_rowstride), 4, 0, \ + (src_buf), (src_width)/2, (src_height), \ + (src_rowstride), 4, 0, \ + (double) (dest_width) / (src_width), (double) (dest_height) / (src_height), \ + (PixopsInterpType) interp_type ); + +#endif diff --git a/src/modules/gtk2/producer_pango.c b/src/modules/gtk2/producer_pango.c index f1952dc..7fa3764 100644 --- a/src/modules/gtk2/producer_pango.c +++ b/src/modules/gtk2/producer_pango.c @@ -103,7 +103,7 @@ mlt_producer producer_pango_init( const char *filename ) size += strlen( line ) + 1; if ( markup ) { - realloc( markup, size ); + markup = realloc( markup, size ); strcat( markup, line ); } else diff --git a/src/modules/gtk2/scale_line_22_33_mmx.S b/src/modules/gtk2/scale_line_22_33_mmx.S new file mode 100644 index 0000000..f389217 --- /dev/null +++ b/src/modules/gtk2/scale_line_22_33_mmx.S @@ -0,0 +1,183 @@ + .file "scale_line_22_33_mmx.S" + .version "01.01" +gcc2_compiled.: +.text + .align 16 + +#if !defined(__MINGW32__) && !defined(__CYGWIN__) + +.globl pixops_scale_line_22_33_mmx + .type pixops_scale_line_22_33_mmx,@function +pixops_scale_line_22_33_mmx: + +#else + +.globl _pixops_scale_line_22_33_mmx +_pixops_scale_line_22_33_mmx: + +#endif +/* + * Arguments + * + * weights: 8(%ebp) + * p: 12(%ebp) %esi + * q1: 16(%ebp) + * q2: 20(%ebp) + * xstep: 24(%ebp) + * p_end: 28(%ebp) + * xinit: 32(%ebp) + * +*/ + +/* + * Function call entry + */ + pushl %ebp + movl %esp,%ebp + subl $28,%esp + pushl %edi + pushl %esi + pushl %ebx +/* Locals: + * int x %ebx + * int x_scaled -24(%ebp) + */ + +/* + * Setup + */ +/* Initialize variables */ + movl 32(%ebp),%ebx + movl 32(%ebp),%edx + sarl $16,%edx + movl 12(%ebp),%esi + + cmpl 28(%ebp),%esi + jnb .out + +/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining + * points we are interpolating between, as: + * + * 000000BB00GG00RR + */ + +/* Load initial values into %mm1, %mm3 */ + leal (%edx,%edx,2),%edx # Multiply by 3 + + movl 16(%ebp),%edi + pxor %mm4, %mm4 + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm1 + punpcklbw %mm4, %mm1 + + movl 20(%ebp),%edi + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm3 + punpcklbw %mm4, %mm3 + + addl $65536,%ebx + movl %ebx,%edx + sarl $16,%edx + + jmp .newx + .p2align 4,,7 +.loop: +/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y + * 16 4 0xf 2 2 + */ + movl %ebx,%eax + andl $0xf000,%eax + shrl $7,%eax + +/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and + * accumulate. + */ + movq (%edi,%eax),%mm4 + pmullw %mm0,%mm4 + movq 8(%edi,%eax),%mm5 + pmullw %mm1,%mm5 + movq 16(%edi,%eax),%mm6 + movq 24(%edi,%eax),%mm7 + pmullw %mm2,%mm6 + pmullw %mm3,%mm7 + paddw %mm4, %mm5 + paddw %mm6, %mm7 + paddw %mm5, %mm7 + +/* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256 + */ + pxor %mm4, %mm4 + movl $8421504, %eax # 0x00808080 + movd %eax, %mm6 + punpcklbw %mm4, %mm6 + paddw %mm6, %mm7 + psrlw $8, %mm7 + +/* Pack into %eax and store result + */ + packuswb %mm7, %mm7 + movd %mm7, %eax + + movb %al, (%esi) + shrl $8, %eax + movw %ax, 1(%esi) + addl $3, %esi + + cmpl %esi,28(%ebp) + je .out + +/* x += x_step; */ + addl 24(%ebp),%ebx +/* x_scaled = x >> 16; */ + movl %ebx,%edx + sarl $16,%edx + + cmpl %edx,-24(%ebp) + je .loop + +.newx: + movl %edx,-24(%ebp) +/* + * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 + */ + movq %mm1, %mm0 + movq %mm3, %mm2 + + leal (%edx,%edx,2),%edx # Multiply by 3 + + movl 16(%ebp),%edi + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm1 + punpcklbw %mm4, %mm1 + + movl 20(%ebp),%edi + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm3 + punpcklbw %mm4, %mm3 + + movl 8(%ebp),%edi + + jmp .loop + +.out: + movl %esi,%eax + emms + leal -40(%ebp),%esp + popl %ebx + popl %esi + popl %edi + movl %ebp,%esp + popl %ebp + ret diff --git a/src/modules/vorbis/Makefile b/src/modules/vorbis/Makefile index ac457c0..6e28934 100644 --- a/src/modules/vorbis/Makefile +++ b/src/modules/vorbis/Makefile @@ -6,7 +6,7 @@ OBJS = factory.o \ CFLAGS = -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -LDFLAGS = -lvorbisfile +LDFLAGS = -lvorbis -lvorbisfile SRCS := $(OBJS:.o=.c) -- 1.7.4.4