From 98c26d8006a0778435f801a35a3ca9e692f103d0 Mon Sep 17 00:00:00 2001 From: dezeroex Date: Sat, 30 Jul 2005 23:16:58 +0000 Subject: [PATCH] Added a README file with lots of juicy info. Added a denoise motion vectors function, enabled by default; the results seem very good. Removed some unused development code. git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@792 d19143bc-622f-0410-bfdd-b5b2a6649095 --- src/modules/motion_est/README | 69 +++++++ src/modules/motion_est/filter_motion_est.c | 278 ++++++++++----------------- 2 files changed, 172 insertions(+), 175 deletions(-) create mode 100644 src/modules/motion_est/README diff --git a/src/modules/motion_est/README b/src/modules/motion_est/README new file mode 100644 index 0000000..b5ea3a4 --- /dev/null +++ b/src/modules/motion_est/README @@ -0,0 +1,69 @@ +COMPILE: + +This module performs motion estimation and ancillary functions. To compile this module, you must supply these options to the root configure script: + +--enable-gpl --enable-motion-est + + +EXAMPLES: + +> inigo -filter motion_est + +But that won't be very interesting. Do this to display the motion vectors: + +> inigo -filter motion_est -filter vismv + +If your using a movie file that contains a crop, you will get better results with this: + +> inigo -filter crop_detect -filter motion_est -filter vismv + +If your computer is unable to do the above examples in real time, try this: + +> inigo -filter motion_est -filter vismv -consumer inigo real_time=0 + +If you'd like to see the motion vectors before the median denoising function is applied, do this: + +> inigo -filter motion_est denoise=0 -filter vismv + +To automatically track an object in the frame, try this: + +> inigo -filter autotrack_rectangle:X,Y:WxH debug=1 + +(Where X,Y is the origin of the rectangle indexed from upper left and WxH is the dimensions of the rectangle.) + + +NOTES (and deficiencies): + +1. Ignore shot change detection when your using the autotrack_rectangle filter. + +2. Don't assume motion vectors displayed while stepping backwards and forward are that same vectors + that would be calculated while playing the footage from start to finish, nonstop. Stepping forward + should be fine after a few frames, however. + +3. SSE instructions are lazily assumed. MMX and Altivec would be good too. + +4. Motion estimation is only performed in the luma color space. + +5. Motion vectors should have sub-pixel accuracy. + +6. Motion vectors are not serializable yet. + +7. A diligent test suite is needed. + +8. Multithreaded code will see HUGE benefits on multi-CPU systems. Donations of a multi-core cpu or a + multi-cpu system to the author will encourage development. + +9. Macroblock sizes are not dynamic (Though settable at runtime.) + +10. Notes (5), (7), and (9) would go a long ways to making this code suitable for a modern video encoder. + +11. Shot change works well but arbitrarily chosen thresholds need to be tuned. + +12. Given the nature of documentation of other motion estimation code bases, I will GLADLY clarify and + document any piece of code upon request. + +13. Considerable effort has been put into the speed. + +Zachary Drew +drew0054@tc.umn.edu + diff --git a/src/modules/motion_est/filter_motion_est.c b/src/modules/motion_est/filter_motion_est.c index 49d7e5b..6fa38ed 100644 --- a/src/modules/motion_est/filter_motion_est.c +++ b/src/modules/motion_est/filter_motion_est.c @@ -79,6 +79,8 @@ struct motion_est_context_s int edge_blocks_x, edge_blocks_y; int initial_thresh; int check_chroma; // if check_chroma == 1 then compare chroma + int denoise; + int previous_msad; /* bounds */ struct mlt_geometry_item_s prev_bounds; // Cache last frame's bounds (needed for predictor vectors validity) @@ -107,8 +109,6 @@ struct motion_est_context_s /* run-time configurable comparison functions */ int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int); int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int); - //int (*vert_deviation_reference)(uint8_t *, int, int, int, int); - //int (*horiz_deviation_reference)(uint8_t *, int, int, int, int); }; @@ -447,68 +447,92 @@ static inline int median_predictor(int a, int b, int c) { return b; } -#if 0 -inline static int vertical_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h ) -{ - int i, j, average, deviation = 0; - for ( i = 0; i < w; i++ ){ - average = 0; - for ( j = 0; j < h; j++ ){ - average += *(block + i*xstride + j*ystride); - } - average /= h; - for ( j = 0; j < h; j++ ){ - deviation += ABS(average - block[i*xstride + j*ystride]); - } - } +// Macros for pointer calculations +#define CURRENT(i,j) ( c->current_vectors + (j)*c->mv_buffer_width + (i) ) +#define FORMER(i,j) ( c->former_vectors + (j)*c->mv_buffer_width + (i) ) +#define DENOISE(i,j) ( c->denoise_vectors + (j)*c->mv_buffer_width + (i) ) - return deviation; +int ncompare (const void * a, const void * b) +{ + return ( *(int*)a - *(int*)b ); } -#endif -#if 0 -inline static int horizontal_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h ) +// motion vector denoising +// for x and y components seperately, +// change the vector to be the median value of the 9 adjacent vectors +static void median_denoise( motion_vector *v, struct motion_est_context_s *c ) { - int i, j, average, deviation = 0; - for ( j = 0; j < h; j++ ){ - average = 0; - for ( i = 0; i < w; i++ ){ - average += block[i*xstride + j*ystride]; - } - average /= w; - for ( i = 0; i < w; i++ ){ - deviation += ABS(average - block[i*xstride + j*ystride]); - } - } + int xvalues[9], yvalues[9]; - return deviation; -} -#endif + int i,j,n; + for( i = c->left_mb; i <= c->right_mb; i++ ){ + for( j = c->top_mb; j <= c->bottom_mb; j++ ) + { + n = 0; -// Macros for pointer calculations -#define CURRENT(i,j) ( c->current_vectors + (j)*c->mv_buffer_width + (i) ) -#define FORMER(i,j) ( c->former_vectors + (j)*c->mv_buffer_width + (i) ) + xvalues[n ] = CURRENT(i,j)->dx; // Center + yvalues[n++] = CURRENT(i,j)->dy; -#if 0 -void collect_pre_statistics( struct motion_est_context_s *c, uint8_t *image ) { + if( i > c->left_mb ) // Not in First Column + { + xvalues[n ] = CURRENT(i-1,j)->dx; // Left + yvalues[n++] = CURRENT(i-1,j)->dy; - int i, j, count = 0; - uint8_t *p; + if( j > c->top_mb ) { + xvalues[n ] = CURRENT(i-1,j-1)->dx; // Upper Left + yvalues[n++] = CURRENT(i-1,j-1)->dy; + } - for ( i = c->left_mb; i <= c->right_mb; i++ ){ - for ( j = c->top_mb; j <= c->bottom_mb; j++ ){ - count++; - p = image + i * c->macroblock_width * c->xstride + j * c->macroblock_height * c->ystride; - CURRENT(i,j)->vert_dev = c->vert_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height ); - CURRENT(i,j)->horiz_dev = c->horiz_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height ); - } + if( j < c->bottom_mb ) { + xvalues[n ] = CURRENT(i-1,j+1)->dx; // Bottom Left + yvalues[n++] = CURRENT(i-1,j+1)->dy; + } + } + if( i < c->right_mb ) // Not in Last Column + { + xvalues[n ] = CURRENT(i+1,j)->dx; // Right + yvalues[n++] = CURRENT(i+1,j)->dy; + + + if( j > c->top_mb ) { + xvalues[n ] = CURRENT(i+1,j-1)->dx; // Upper Right + yvalues[n++] = CURRENT(i+1,j-1)->dy; + } + + if( j < c->bottom_mb ) { + xvalues[n ] = CURRENT(i+1,j+1)->dx; // Bottom Right + yvalues[n++] = CURRENT(i+1,j+1)->dy; + } + } + if( j > c->top_mb ) // Not in First Row + { + xvalues[n ] = CURRENT(i,j-1)->dx; // Top + yvalues[n++] = CURRENT(i,j-1)->dy; + } + + if( j < c->bottom_mb ) // Not in Last Row + { + xvalues[n ] = CURRENT(i,j+1)->dx; // Bottom + yvalues[n++] = CURRENT(i,j+1)->dy; + } + + qsort (xvalues, n, sizeof(int), ncompare); + qsort (yvalues, n, sizeof(int), ncompare); + + if( n % 2 == 1 ) { + DENOISE(i,j)->dx = xvalues[n/2]; + DENOISE(i,j)->dy = yvalues[n/2]; + } + else { + DENOISE(i,j)->dx = (xvalues[n/2] + xvalues[n/2+1])/2; + DENOISE(i,j)->dy = (yvalues[n/2] + yvalues[n/2+1])/2; + } + } } -} -#endif -static void median_denoise( motion_vector *v, struct motion_est_context_s *c ) -{ -// for ( int i = 0; i++ + motion_vector *t = c->current_vectors; + c->current_vectors = c->denoise_vectors; + c->denoise_vectors = t; } @@ -625,117 +649,6 @@ static void search( struct yuv_data from, //msad < c->initial_thresh ) // The vector is probably good. - continue; - - struct motion_vector_s reverse; - reverse.dx = -here->dx; - reverse.dy = -here->dy; - reverse.msad = here->msad; - - // calculate psuedo block coordinates - from_x += here->dx; - from_y += here->dy; - - n = 0; -#endif - - // Calculate the real block closest to our psuedo block -#if 0 - int ri = ( ABS( here->dx ) + c->macroblock_width/2 ) / c->macroblock_width; - if ( ri != 0 ) ri *= here->dx / ABS(here->dx); // Recover sign - ri += i; - if ( ri < 0 ) ri = 0; - else if ( ri >= c->mv_buffer_width ) ri = c->mv_buffer_width; - - int rj = ( ABS( here->dy ) + c->macroblock_height/2 ) / c->macroblock_height; - if ( rj != 0 ) rj *= here->dy / ABS(here->dy); // Recover sign - rj += j; - if ( rj < 0 ) rj = 0; - else if ( rj >= c->mv_buffer_height ) rj = c->mv_buffer_height; - - /* Adjacent to collocated */ - if( c->former_vectors_valid ) - { - // Top of colocated - if( rj > c->prev_top_mb ){// && COL_TOP->valid ){ - candidates[n ].dx = -FORMER(ri,rj-1)->dx; - candidates[n++].dy = -FORMER(ri,rj-1)->dy; - } - - // Left of colocated - if( ri > c->prev_left_mb ){// && COL_LEFT->valid ){ - candidates[n ].dx = -FORMER(ri-1,rj)->dx; - candidates[n++].dy = -FORMER(ri-1,rj)->dy; - } - - // Right of colocated - if( ri < c->prev_right_mb ){// && COL_RIGHT->valid ){ - candidates[n ].dx = -FORMER(ri+1,rj)->dx; - candidates[n++].dy = -FORMER(ri+1,rj)->dy; - } - - // Bottom of colocated - if( rj < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){ - candidates[n ].dx = -FORMER(ri,rj+1)->dx; - candidates[n++].dy = -FORMER(ri,rj+1)->dy; - } - - // And finally, colocated - candidates[n ].dx = -FORMER(ri,rj)->dx; - candidates[n++].dy = -FORMER(ri,rj)->dy; - } -#endif -#if 0 - // Zero vector - candidates[n].dx = 0; - candidates[n++].dy = 0; - - check_candidates ( &to, &from, from_x, from_y, candidates, 1, 1, &reverse, c ); - - /* Scan for the best candidate */ - while( n ) { - n--; - - score = compare( to, from, from_x, from_y, /* to and from are reversed */ - from_x + candidates[n].dx, /* to x */ - from_y + candidates[n].dy, /* to y */ - c); /* context */ - - if ( score < reverse.msad ) { - reverse.dx = candidates[n].dx; - reverse.dy = candidates[n].dy; - reverse.msad = score; - if ( score < c->initial_thresh ) - n=0; // Simplified version of early termination thresh - } - } - -// if ( reverse.msad == here->msad) // If nothing better was found -// { // this is an opportunity -// // to skip 4 block comparisons -// continue; // in the diamond search -// } - - - diamond_search( &to, &from, from_x, from_y, &reverse, c); /* to and from are reversed */ - - if ( ABS( reverse.dx + here->dx ) + ABS( reverse.dy + here->dy ) > 5 ) -// if ( here->msad > reverse.msad + c->initial_thresh*10 ) - { - here->valid = 2; - } - -#endif } /* End column loop */ } /* End row loop */ @@ -829,9 +742,18 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format // Get the motion_est context object struct motion_est_context_s *c = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL); + #ifdef BENCHMARK + struct timeval start; gettimeofday(&start, NULL ); + #endif + // Get the new image and frame number int error = mlt_frame_get_image( frame, image, format, width, height, 1 ); + #ifdef BENCHMARK + struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec); + fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference); + #endif + if( error != 0 ) mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr ); @@ -873,6 +795,9 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format if( mlt_properties_get( properties, "check_chroma" ) != NULL ) c->check_chroma = mlt_properties_get_int( properties, "check_chroma" ); + if( mlt_properties_get( properties, "denoise" ) != NULL ) + c->denoise = mlt_properties_get_int( properties, "denoise" ); + init_optimizations( c ); // Calculate the dimensions in macroblock units @@ -939,6 +864,7 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format c->former_frame_position = c->current_frame_position; + c->previous_msad = 0; c->initialized = 1; } @@ -973,9 +899,6 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format // If video is advancing, run motion vector algorithm and etc... if( c->former_frame_position + 1 == c->current_frame_position ) { - #ifdef BENCHMARK - struct timeval start; gettimeofday(&start, NULL ); - #endif // Swap the motion vector buffers and reuse allocated memory struct motion_vector_s *temp = c->current_vectors; @@ -1017,20 +940,16 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format //collect_pre_statistics( context, *image ); search( c->current_image, c->former_image, c ); - //median_denoise( c->current_vectors, c ); - collect_post_statistics( c ); - #ifdef BENCHMARK - struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec); - fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference); - #endif // Detect shot changes - if( c->comparison_average > 12 * c->macroblock_width * c->macroblock_height ) { - //fprintf(stderr, " - SAD: %d <>\n", c->comparison_average); + if( c->comparison_average > 10 * c->macroblock_width * c->macroblock_height && + c->comparison_average > c->previous_msad * 2 ) + { + fprintf(stderr, " - SAD: %d <>\n", c->comparison_average); mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1); // c->former_vectors_valid = 0; // Invalidate the previous frame's predictors c->shot_change = 1; @@ -1041,7 +960,14 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format //fprintf(stderr, " - SAD: %d\n", c->comparison_average); } + c->previous_msad = c->comparison_average; + if( c->comparison_average != 0 ) { + + // denoise the vector buffer + if( c->denoise ) + median_denoise( c->current_vectors, c ); + // Pass the new vector data into the frame mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors", (void*)c->current_vectors, c->mv_size, NULL, NULL ); @@ -1079,6 +1005,7 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format // Remember which frame this is c->former_frame_position = c->current_frame_position; + if ( c->check_chroma == 0 ) memcpy( c->former_image.y, *image, *width * *height * c->xstride ); @@ -1133,6 +1060,7 @@ mlt_filter filter_motion_est_init( char *arg ) context->limit_y = 64; context->search_method = DIAMOND_SEARCH; context->check_chroma = 0; + context->denoise = 1; /* reference functions that may have optimized versions */ context->compare_reference = sad_reference; -- 1.7.4.4