From: lilo_booter <lilo_booter@d19143bc-622f-0410-bfdd-b5b2a6649095>
Date: Mon, 23 Feb 2004 08:20:58 +0000 (+0000)
Subject: Big luma optimisations, minor pooling optimisations
X-Git-Url: http://research.m1stereo.tv/gitweb?a=commitdiff_plain;h=7ac623df605d7daa8125b189cf77a937cfe03888;p=melted

Big luma optimisations, minor pooling optimisations


git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@162 d19143bc-622f-0410-bfdd-b5b2a6649095
---

diff --git a/src/framework/mlt_pool.c b/src/framework/mlt_pool.c
index 5f891e9..daa0d29 100644
--- a/src/framework/mlt_pool.c
+++ b/src/framework/mlt_pool.c
@@ -22,6 +22,7 @@
 #include "mlt_deque.h"
 
 #include <stdlib.h>
+#include <malloc.h>
 #include <string.h>
 #include <pthread.h>
 
@@ -45,6 +46,7 @@ typedef struct mlt_pool_s
 typedef struct mlt_release_s
 {
 	mlt_pool pool;
+	int references;
 }
 *mlt_release;
 
@@ -92,11 +94,14 @@ static void *pool_fetch( mlt_pool this )
 		{
 			// Pop the top of the stack
 			ptr = mlt_deque_pop_back( this->stack );
+
+			// Assign the reference
+			( ( mlt_release )ptr )->references = 1;
 		}
 		else
 		{
 			// We need to generate a release item
-			mlt_release release = malloc( sizeof( struct mlt_release_s ) + this->size );
+			mlt_release release = memalign( 16, this->size );
 
 			// Initialise it
 			if ( release != NULL )
@@ -104,6 +109,9 @@ static void *pool_fetch( mlt_pool this )
 				// Assign the pool
 				release->pool = this;
 
+				// Assign the reference
+				release->references = 1;
+
 				// Determine the ptr
 				ptr = ( void * )release + sizeof( struct mlt_release_s );
 			}
@@ -126,7 +134,7 @@ static void pool_return( void *ptr )
 	if ( ptr != NULL )
 	{
 		// Get the release pointer
-		mlt_release that = ( void * )ptr - sizeof( struct mlt_release_s );
+		mlt_release that = ptr - sizeof( struct mlt_release_s );
 
 		// Get the pool
 		mlt_pool this = that->pool;
@@ -223,7 +231,7 @@ void *mlt_pool_alloc( int size )
 	int index = 8;
 
 	// Minimum size pooled is 256 bytes
-	size = size + 4;
+	size = size + sizeof( mlt_release );
 	while ( ( 1 << index ) < size )
 		index ++;
 
@@ -234,38 +242,6 @@ void *mlt_pool_alloc( int size )
 	return pool_fetch( pool );
 }
 
-/** Allocate size bytes from the pool.
-*/
-
-void *mlt_pool_allocate( int size, void **release )
-{
-	// This is the real release structure we'll return
-	void *real = NULL;
-
-	// This will be used to obtain the pool to use
-	mlt_pool pool = NULL;
-
-	// Determines the index of the pool to use
-	int index = 0;
-
-	// Minimum size pooled is 256 bytes
-	size = size >> 8;
-	while ( ( 1 << index ) < size )
-		index ++;
-
-	// Now get the pool at the index
-	pool = mlt_properties_get_data_at( pools, index + 1, NULL );
-
-	// Now get the real item
-	real = pool_fetch( pool );
-
-	// Assign to release
-	*release = real;
-	
-	// Otherwise return a NULL to indicate failure
-	return real;
-}
-
 /** Release the allocated memory.
 */
 
diff --git a/src/framework/mlt_pool.h b/src/framework/mlt_pool.h
index 87ef08b..7a9c1cb 100644
--- a/src/framework/mlt_pool.h
+++ b/src/framework/mlt_pool.h
@@ -23,7 +23,6 @@
 
 extern void mlt_pool_init( );
 extern void *mlt_pool_alloc( int size );
-extern void *mlt_pool_allocate( int size, void **release );
 extern void mlt_pool_release( void *release );
 extern void mlt_pool_close( );
 
diff --git a/src/modules/core/transition_luma.c b/src/modules/core/transition_luma.c
index 9a180d1..40d5ca5 100644
--- a/src/modules/core/transition_luma.c
+++ b/src/modules/core/transition_luma.c
@@ -32,10 +32,9 @@
 typedef struct 
 {
 	struct mlt_transition_s parent;
-	char *filename;
 	int width;
 	int height;
-	float *bitmap;
+	uint16_t *bitmap;
 }
 transition_luma;
 
@@ -43,22 +42,6 @@ transition_luma;
 // forward declarations
 static void transition_close( mlt_transition parent );
 
-
-// image processing functions
-
-static inline float smoothstep( float edge1, float edge2, float a )
-{
-	if ( a < edge1 )
-		return 0.0;
-
-	if ( a >= edge2 )
-		return 1.0;
-
-	a = ( a - edge1 ) / ( edge2 - edge1 );
-
-	return ( a * a * ( 3 - 2 * a ) );
-}
-
 /** Calculate the position for this frame.
 */
 
@@ -89,8 +72,7 @@ static float delta_calculate( mlt_transition this, mlt_frame frame )
 
 	// Now do the calcs
 	float x = ( float )( position - in ) / ( float )( out - in + 1 );
-	position++;
-	float y = ( float )( position - in ) / ( float )( out - in + 1 );
+	float y = ( float )( position + 1 - in ) / ( float )( out - in + 1 );
 
 	return ( y - x ) / 2.0;
 }
@@ -101,28 +83,48 @@ static inline int dissolve_yuv( mlt_frame this, mlt_frame that, float weight, in
 	int width_src = width, height_src = height;
 	mlt_image_format format = mlt_image_yuv422;
 	uint8_t *p_src, *p_dest;
-	float weight_complement = 1 - weight;
 	uint8_t *p;
 	uint8_t *limit;
 
-	mlt_frame_get_image( this, &p_dest, &format, &width, &height, 1 /* writable */ );
-	mlt_frame_get_image( that, &p_src, &format, &width_src, &height_src, 0 /* writable */ );
-	
+	int32_t weigh = weight * ( 1 << 16 );
+	int32_t weigh_complement = ( 1 - weight ) * ( 1 << 16 );
+
+	mlt_frame_get_image( this, &p_dest, &format, &width, &height, 1 );
+	mlt_frame_get_image( that, &p_src, &format, &width_src, &height_src, 0 );
+
 	p = p_dest;
 	limit = p_dest + height_src * width_src * 2;
 
 	while ( p < limit )
-		*p_dest++ = ( uint8_t )( *p_src++ * weight + *p++ * weight_complement );
+	{
+		*p_dest++ = ( *p_src++ * weigh + *p++ * weigh_complement ) >> 16;
+		*p_dest++ = ( *p_src++ * weigh + *p++ * weigh_complement ) >> 16;
+	}
 
 	return ret;
 }
 
+// image processing functions
+
+static inline uint32_t smoothstep( int32_t edge1, int32_t edge2, uint32_t a )
+{
+	if ( a < edge1 )
+		return 0;
+
+	if ( a >= edge2 )
+		return 0x10000;
+
+	a = ( ( a - edge1 ) << 16 ) / ( edge2 - edge1 );
+
+	return ( ( ( a * a ) >> 16 )  * ( ( 3 << 16 ) - ( 2 * a ) ) ) >> 16;
+}
+
 /** powerful stuff
 
     \param field_order -1 = progressive, 0 = lower field first, 1 = top field first
 */
 static void luma_composite( mlt_frame a_frame, mlt_frame b_frame, int luma_width, int luma_height,
-							float *luma_bitmap, float pos, float frame_delta, float softness, int field_order,
+							uint16_t *luma_bitmap, float pos, float frame_delta, float softness, int field_order,
 							int *width, int *height )
 {
 	int width_src = *width, height_src = *height;
@@ -132,60 +134,77 @@ static void luma_composite( mlt_frame a_frame, mlt_frame b_frame, int luma_width
 	int i, j;
 	int stride_src;
 	int stride_dest;
-	float weight = 0;
-	int field;
+	uint16_t weight = 0;
 
 	format_src = mlt_image_yuv422;
 	format_dest = mlt_image_yuv422;
 
-	mlt_frame_get_image( a_frame, &p_dest, &format_dest, &width_dest, &height_dest, 1 /* writable */ );
-	mlt_frame_get_image( b_frame, &p_src, &format_src, &width_src, &height_src, 0 /* writable */ );
+	mlt_frame_get_image( a_frame, &p_dest, &format_dest, &width_dest, &height_dest, 1 );
+	mlt_frame_get_image( b_frame, &p_src, &format_src, &width_src, &height_src, 0 );
 
 	stride_src = width_src * 2;
 	stride_dest = width_dest * 2;
 
 	// Offset the position based on which field we're looking at ...
-	float field_pos[ 2 ];
-	field_pos[ 0 ] = pos + ( ( field_order == 0 ? 1 : 0 ) * frame_delta * 0.5 );
-	field_pos[ 1 ] = pos + ( ( field_order == 0 ? 0 : 1 ) * frame_delta * 0.5 );
+	int32_t field_pos[ 2 ];
+	field_pos[ 0 ] = ( pos + ( ( field_order == 0 ? 1 : 0 ) * frame_delta * 0.5 ) ) * ( 1 << 16 ) * ( 1.0 + softness );
+	field_pos[ 1 ] = ( pos + ( ( field_order == 0 ? 0 : 1 ) * frame_delta * 0.5 ) ) * ( 1 << 16 ) * ( 1.0 + softness );
 
-	// adjust the position for the softness level
-	field_pos[ 0 ] *= ( 1.0 + softness );
-	field_pos[ 1 ] *= ( 1.0 + softness );
+	register uint8_t *p;
+	register uint8_t *q;
+	register uint8_t *o;
+	uint16_t  *l;
 
-	uint8_t *p;
-	uint8_t *q;
-	uint8_t *o;
-	float  *l;
+	uint32_t value;
 
-	uint8_t y;
-	uint8_t uv;
-	float value;
+	int32_t x_diff = ( luma_width << 16 ) / *width;
+	int32_t y_diff = ( luma_height << 16 ) / *height;
+	int32_t x_offset = 0;
+	int32_t y_offset = 0;
+	uint8_t *p_row;
+	uint8_t *q_row;
 
-	float x_diff = ( float )luma_width / ( float )*width;
-	float y_diff = ( float )luma_height / ( float )*height;
+	int32_t i_softness = softness * ( 1 << 16 );
+
+	int field_count = field_order < 0 ? 1 : 2;
+	int field_stride_src = field_count * stride_src;
+	int field_stride_dest = field_count * stride_dest;
+
+	int field = 0;
 
 	// composite using luma map
-	for ( field = 0; field < ( field_order < 0 ? 1 : 2 ); ++field )
+	while ( field < field_count )
 	{
-		for ( i = field; i < height_src; i += ( field_order < 0 ? 1 : 2 ) )
-		{
-			p = &p_src[ i * stride_src ];
-			q = &p_dest[ i * stride_dest ];
-			o = &p_dest[ i * stride_dest ];
-			l = &luma_bitmap[ ( int )( ( float )i * y_diff ) * luma_width ];
+		p_row = p_src + field * stride_src;
+		q_row = p_dest + field * stride_dest;
+		y_offset = ( field * luma_width ) << 16;
+		i = field;
 
-			for ( j = 0; j < width_src; j ++ )
+		while ( i < height_src )
+		{
+			p = p_row;
+			q = q_row;
+			o = q;
+			l = luma_bitmap + ( y_offset >> 16 ) * ( luma_width * field_count );
+			x_offset = 0;
+			j = width_src;
+
+			while( j -- )
 			{
-				y = *p ++;
-				uv = *p ++;
-             	weight = l[ ( int )( ( float )j * x_diff ) ];
-   				value = smoothstep( weight, weight + softness, field_pos[ field ] );
-
-				*o ++ = (uint8_t)( y * value + *q++ * ( 1 - value ) );
-				*o ++ = (uint8_t)( uv * value + *q++ * ( 1 - value ) );
+             	weight = l[ x_offset >> 16 ];
+   				value = smoothstep( weight, i_softness + weight, field_pos[ field ] );
+				*o ++ = ( *p ++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
+				*o ++ = ( *p ++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
+				x_offset += x_diff;
 			}
+
+			y_offset += y_diff;
+			i += field_count;
+			p_row += field_stride_src;
+			q_row += field_stride_dest;
 		}
+
+		field ++;
 	}
 }
 
@@ -208,7 +227,7 @@ static int transition_get_image( mlt_frame this, uint8_t **image, mlt_image_form
 	float frame_delta = mlt_properties_get_double( b_props, "luma.delta" );
 	int luma_width = mlt_properties_get_int( b_props, "luma.width" );
 	int luma_height = mlt_properties_get_int( b_props, "luma.height" );
-	float *luma_bitmap = mlt_properties_get_data( b_props, "luma.bitmap", NULL );
+	uint16_t *luma_bitmap = mlt_properties_get_data( b_props, "luma.bitmap", NULL );
 	float luma_softness = mlt_properties_get_double( b_props, "luma.softness" );
 	int progressive = mlt_properties_get_int( b_props, "progressive" ) ||
 			mlt_properties_get_int( a_props, "consumer_progressive" ) ||
@@ -248,7 +267,7 @@ static int transition_get_image( mlt_frame this, uint8_t **image, mlt_image_form
 /** Load the luma map from PGM stream.
 */
 
-static void luma_read_pgm( FILE *f, float **map, int *width, int *height )
+static void luma_read_pgm( FILE *f, uint16_t **map, int *width, int *height )
 {
 	uint8_t *data = NULL;
 	while (1)
@@ -257,7 +276,7 @@ static void luma_read_pgm( FILE *f, float **map, int *width, int *height )
 		int i = 2;
 		int maxval;
 		int bpp;
-		float *p;
+		uint16_t *p;
 		
 		line[127] = '\0';
 
@@ -313,7 +332,7 @@ static void luma_read_pgm( FILE *f, float **map, int *width, int *height )
 			break;
 		
 		// allocate the luma bitmap
-		*map = p = (float*)mlt_pool_alloc( *width * *height * sizeof( float ) );
+		*map = p = (uint16_t*)mlt_pool_alloc( *width * *height * sizeof( uint16_t ) );
 		if ( *map == NULL )
 			break;
 
@@ -321,9 +340,9 @@ static void luma_read_pgm( FILE *f, float **map, int *width, int *height )
 		for ( i = 0; i < *width * *height * bpp; i += bpp )
 		{
 			if ( bpp == 1 )
-				*p++ = (float) data[ i ] / (float) maxval;
+				*p++ = data[ i ] << 8;
 			else
-				*p++ = (float) ( ( data[ i ] << 8 ) + data[ i+1 ] ) / (float) maxval;
+				*p++ = ( data[ i ] << 8 ) + data[ i+1 ];
 		}
 
 		break;
@@ -348,17 +367,12 @@ static mlt_frame transition_process( mlt_transition transition, mlt_frame a_fram
 	mlt_properties b_props = mlt_frame_properties( b_frame );
 
 	// If the filename property changed, reload the map
-	char *luma_file = mlt_properties_get( properties, "resource" );
-	if ( luma_file != NULL && ( this->filename == NULL || ( this->filename && strcmp( luma_file, this->filename ) ) ) )
+	char *lumafile = mlt_properties_get( properties, "resource" );
+	if ( this->bitmap == NULL && lumafile != NULL )
 	{
-		FILE *pipe;
-		
-		free( this->filename );
-		this->filename = strdup( luma_file );
-		pipe = fopen( luma_file, "r" );
+		FILE *pipe = fopen( lumafile, "r" );
 		if ( pipe != NULL )
 		{
-			mlt_pool_release( this->bitmap );
 			luma_read_pgm( pipe, &this->bitmap, &this->width, &this->height );
 			fclose( pipe );
 		}
@@ -404,7 +418,6 @@ static void transition_close( mlt_transition parent )
 {
 	transition_luma *this = (transition_luma*) parent->child;
 	mlt_pool_release( this->bitmap );
-	free( this->filename );
 	free( this );
 }