Added a README file with lots of juicy info. Added a denoise motion vectors function...
authordezeroex <dezeroex@d19143bc-622f-0410-bfdd-b5b2a6649095>
Sat, 30 Jul 2005 23:16:58 +0000 (23:16 +0000)
committerdezeroex <dezeroex@d19143bc-622f-0410-bfdd-b5b2a6649095>
Sat, 30 Jul 2005 23:16:58 +0000 (23:16 +0000)
git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@792 d19143bc-622f-0410-bfdd-b5b2a6649095

src/modules/motion_est/README [new file with mode: 0644]
src/modules/motion_est/filter_motion_est.c

diff --git a/src/modules/motion_est/README b/src/modules/motion_est/README
new file mode 100644 (file)
index 0000000..b5ea3a4
--- /dev/null
@@ -0,0 +1,69 @@
+COMPILE:
+
+This module performs motion estimation and ancillary functions. To compile this module, you must supply these options to the root configure script:
+
+--enable-gpl --enable-motion-est
+
+
+EXAMPLES:
+
+> inigo -filter motion_est <movie_file>
+
+But that won't be very interesting. Do this to display the motion vectors:
+
+> inigo -filter motion_est -filter vismv <movie_file>
+
+If your using a movie file that contains a crop, you will get better results with this:
+
+> inigo -filter crop_detect -filter motion_est -filter vismv <movie_file>
+
+If your computer is unable to do the above examples in real time, try this:
+
+> inigo -filter motion_est -filter vismv -consumer inigo real_time=0 <movie_file>
+
+If you'd like to see the motion vectors before the median denoising function is applied, do this:
+
+> inigo -filter motion_est denoise=0 -filter vismv <movie_file>
+
+To automatically track an object in the frame, try this:
+
+> inigo -filter autotrack_rectangle:X,Y:WxH debug=1 <movie_file>
+
+(Where X,Y is the origin of the rectangle indexed from upper left and WxH is the dimensions of the rectangle.)
+
+
+NOTES (and deficiencies):
+
+1.  Ignore shot change detection when your using the autotrack_rectangle filter.
+
+2.  Don't assume motion vectors displayed while stepping backwards and forward are that same vectors
+    that would be calculated while playing the footage from start to finish, nonstop. Stepping forward
+    should be fine after a few frames, however.
+
+3.  SSE instructions are lazily assumed. MMX and Altivec would be good too.
+
+4.  Motion estimation is only performed in the luma color space.
+
+5.  Motion vectors should have sub-pixel accuracy.
+
+6.  Motion vectors are not serializable yet.
+
+7.  A diligent test suite is needed.
+
+8.  Multithreaded code will see HUGE benefits on multi-CPU systems. Donations of a multi-core cpu or a
+    multi-cpu system to the author will encourage development.
+
+9.  Macroblock sizes are not dynamic (Though settable at runtime.)
+
+10. Notes (5), (7), and (9) would go a long ways to making this code suitable for a modern video encoder.
+
+11. Shot change works well but arbitrarily chosen thresholds need to be tuned.
+
+12. Given the nature of documentation of other motion estimation code bases, I will GLADLY clarify and
+    document any piece of code upon request.
+
+13. Considerable effort has been put into the speed.
+
+Zachary Drew
+drew0054@tc.umn.edu
+
index 49d7e5b..6fa38ed 100644 (file)
@@ -79,6 +79,8 @@ struct motion_est_context_s
        int edge_blocks_x, edge_blocks_y;
        int initial_thresh;
        int check_chroma;                       // if check_chroma == 1 then compare chroma
+       int denoise;
+       int previous_msad;
 
        /* bounds */
        struct mlt_geometry_item_s prev_bounds; // Cache last frame's bounds (needed for predictor vectors validity)
@@ -107,8 +109,6 @@ struct motion_est_context_s
        /* run-time configurable comparison functions */
        int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int);
        int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int);
-       //int (*vert_deviation_reference)(uint8_t *, int, int, int, int);
-       //int (*horiz_deviation_reference)(uint8_t *, int, int, int, int);
 
 };
 
@@ -447,68 +447,92 @@ static inline int median_predictor(int a, int b, int c) {
        return b;
 }
 
-#if 0
-inline static int vertical_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
-{
-       int i, j, average, deviation = 0;
-       for ( i = 0; i < w; i++ ){
-               average = 0;
-               for ( j = 0; j < h; j++ ){
-                       average += *(block + i*xstride + j*ystride);
-               }
-               average /= h;
-               for ( j = 0; j < h; j++ ){
-                       deviation += ABS(average - block[i*xstride + j*ystride]);
-               }
-       }
+// Macros for pointer calculations
+#define CURRENT(i,j)   ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
+#define FORMER(i,j)    ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
+#define DENOISE(i,j)   ( c->denoise_vectors + (j)*c->mv_buffer_width + (i) )
 
-       return deviation;
+int ncompare (const void * a, const void * b)
+{
+       return ( *(int*)a - *(int*)b );
 }
-#endif
 
-#if 0
-inline static int horizontal_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
+// motion vector denoising
+// for x and y components seperately,
+// change the vector to be the median value of the 9 adjacent vectors
+static void median_denoise( motion_vector *v, struct motion_est_context_s *c )
 {
-       int i, j, average, deviation = 0;
-       for ( j = 0; j < h; j++ ){
-               average = 0;
-               for ( i = 0; i < w; i++ ){
-                       average += block[i*xstride + j*ystride];
-               }
-               average /= w;
-               for ( i = 0; i < w; i++ ){
-                       deviation += ABS(average - block[i*xstride + j*ystride]);
-               }
-       }
+       int xvalues[9], yvalues[9];
 
-       return deviation;
-}
-#endif
+       int i,j,n;
+       for( i = c->left_mb; i <= c->right_mb; i++ ){
+               for( j = c->top_mb; j <= c->bottom_mb; j++ )
+               {
+                       n = 0;
 
-// Macros for pointer calculations
-#define CURRENT(i,j)   ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
-#define FORMER(i,j)    ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
+                       xvalues[n  ] = CURRENT(i,j)->dx; // Center
+                       yvalues[n++] = CURRENT(i,j)->dy;
 
-#if 0
-void collect_pre_statistics( struct motion_est_context_s *c, uint8_t *image ) {
+                       if( i > c->left_mb ) // Not in First Column
+                       {
+                               xvalues[n  ] = CURRENT(i-1,j)->dx; // Left
+                               yvalues[n++] = CURRENT(i-1,j)->dy;
 
-       int i, j, count = 0;
-       uint8_t *p;
+                               if( j > c->top_mb ) {
+                                       xvalues[n  ] = CURRENT(i-1,j-1)->dx; // Upper Left
+                                       yvalues[n++] = CURRENT(i-1,j-1)->dy;
+                               }
 
-       for ( i = c->left_mb; i <= c->right_mb; i++ ){
-        for ( j = c->top_mb; j <= c->bottom_mb; j++ ){  
-               count++;
-               p = image + i * c->macroblock_width * c->xstride + j * c->macroblock_height * c->ystride;
-               CURRENT(i,j)->vert_dev = c->vert_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
-               CURRENT(i,j)->horiz_dev = c->horiz_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
-        }
+                               if( j < c->bottom_mb ) {
+                                       xvalues[n  ] = CURRENT(i-1,j+1)->dx; // Bottom Left
+                                       yvalues[n++] = CURRENT(i-1,j+1)->dy;
+                               }
+                       }
+                       if( i < c->right_mb ) // Not in Last Column
+                       {
+                               xvalues[n  ] = CURRENT(i+1,j)->dx; // Right
+                               yvalues[n++] = CURRENT(i+1,j)->dy;
+                               
+                               
+                               if( j > c->top_mb ) {
+                                       xvalues[n  ] = CURRENT(i+1,j-1)->dx; // Upper Right
+                                       yvalues[n++] = CURRENT(i+1,j-1)->dy;
+                               }
+
+                               if( j < c->bottom_mb ) {
+                                       xvalues[n  ] = CURRENT(i+1,j+1)->dx; // Bottom Right
+                                       yvalues[n++] = CURRENT(i+1,j+1)->dy;
+                               }
+                       }
+                       if( j > c->top_mb ) // Not in First Row
+                       {
+                               xvalues[n  ] = CURRENT(i,j-1)->dx; // Top
+                               yvalues[n++] = CURRENT(i,j-1)->dy;
+                       }
+
+                       if( j < c->bottom_mb ) // Not in Last Row
+                       {
+                               xvalues[n  ] = CURRENT(i,j+1)->dx; // Bottom
+                               yvalues[n++] = CURRENT(i,j+1)->dy;
+                       }
+
+                       qsort (xvalues, n, sizeof(int), ncompare);
+                       qsort (yvalues, n, sizeof(int), ncompare);
+
+                       if( n % 2 == 1 ) {
+                               DENOISE(i,j)->dx = xvalues[n/2];
+                               DENOISE(i,j)->dy = yvalues[n/2];
+                       }
+                       else {
+                               DENOISE(i,j)->dx = (xvalues[n/2] + xvalues[n/2+1])/2;
+                               DENOISE(i,j)->dy = (yvalues[n/2] + yvalues[n/2+1])/2;
+                       }
+               }
        }
-}
-#endif
 
-static void median_denoise( motion_vector *v, struct motion_est_context_s *c )
-{
-//     for ( int i = 0; i++
+       motion_vector *t = c->current_vectors;
+       c->current_vectors = c->denoise_vectors;
+       c->denoise_vectors = t;
 
 }
 
@@ -625,117 +649,6 @@ static void search( struct yuv_data from,                 //<! Image data. Motion vector sourc
                full_search( from, to, from_x, from_y, here, c); 
 #endif
 
-
-               /* Do things in Reverse
-                * Check for occlusions. A block from last frame becomes obscured this frame.
-                * A bogus motion vector will result. To look for this, run the search in reverse
-                * and see if the vector is good backwards and forwards. Most occlusions won't be.
-                * The new source block may not correspond exactly to blocks in the vector buffer
-                * The opposite case, a block being revealed is inherently ignored.
-                */
-#if 0
-               if ( here->msad < c->initial_thresh )           // The vector is probably good.
-                       continue;
-
-               struct motion_vector_s reverse;
-               reverse.dx = -here->dx; 
-               reverse.dy = -here->dy;
-               reverse.msad = here->msad;
-
-               // calculate psuedo block coordinates
-               from_x += here->dx;
-               from_y += here->dy;
-
-               n = 0;
-#endif
-
-               // Calculate the real block closest to our psuedo block
-#if 0
-               int ri = ( ABS( here->dx ) + c->macroblock_width/2 ) / c->macroblock_width;
-               if ( ri != 0 ) ri *= here->dx / ABS(here->dx);  // Recover sign
-               ri += i;
-               if ( ri < 0 ) ri = 0;
-               else if ( ri >= c->mv_buffer_width ) ri = c->mv_buffer_width;
-
-               int rj = ( ABS( here->dy ) + c->macroblock_height/2 ) / c->macroblock_height;
-               if ( rj != 0 ) rj *= here->dy / ABS(here->dy);  // Recover sign
-               rj += j;
-               if ( rj < 0 ) rj = 0;
-               else if ( rj >= c->mv_buffer_height ) rj = c->mv_buffer_height;
-
-               /* Adjacent to collocated */
-               if( c->former_vectors_valid )
-               {
-                       // Top of colocated
-                       if( rj > c->prev_top_mb ){// && COL_TOP->valid ){
-                               candidates[n  ].dx = -FORMER(ri,rj-1)->dx;
-                               candidates[n++].dy = -FORMER(ri,rj-1)->dy;
-                       }
-       
-                       // Left of colocated
-                       if( ri > c->prev_left_mb ){// && COL_LEFT->valid ){
-                               candidates[n  ].dx = -FORMER(ri-1,rj)->dx;
-                               candidates[n++].dy = -FORMER(ri-1,rj)->dy;
-                       }
-       
-                       // Right of colocated
-                       if( ri < c->prev_right_mb ){// && COL_RIGHT->valid ){
-                               candidates[n  ].dx = -FORMER(ri+1,rj)->dx;
-                               candidates[n++].dy = -FORMER(ri+1,rj)->dy;
-                       }
-       
-                       // Bottom of colocated
-                       if( rj < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
-                               candidates[n  ].dx = -FORMER(ri,rj+1)->dx;
-                               candidates[n++].dy = -FORMER(ri,rj+1)->dy;
-                       }
-
-                       // And finally, colocated
-                       candidates[n  ].dx = -FORMER(ri,rj)->dx;
-                       candidates[n++].dy = -FORMER(ri,rj)->dy;
-               }
-#endif
-#if 0
-               // Zero vector
-               candidates[n].dx = 0;
-               candidates[n++].dy = 0;
-
-               check_candidates ( &to, &from, from_x, from_y, candidates, 1, 1, &reverse, c ); 
-
-               /* Scan for the best candidate */
-               while( n ) {
-                       n--;
-
-                       score = compare( to, from, from_x, from_y,      /* to and from are reversed */
-                                        from_x + candidates[n].dx,     /* to x */
-                                        from_y + candidates[n].dy,     /* to y */
-                                        c);                            /* context */
-
-                       if ( score < reverse.msad ) {
-                               reverse.dx = candidates[n].dx;
-                               reverse.dy = candidates[n].dy;
-                               reverse.msad = score;
-                               if ( score < c->initial_thresh )
-                                       n=0;            // Simplified version of early termination thresh
-                       }
-               }
-
-//             if ( reverse.msad == here->msad)        // If nothing better was found
-//             {                                       // this is an opportunity
-//                                                     // to skip 4 block comparisons
-//                     continue;                       // in the diamond search
-//             }
-
-
-               diamond_search( &to, &from, from_x, from_y, &reverse, c); /* to and from are reversed */
-
-               if ( ABS( reverse.dx + here->dx ) + ABS( reverse.dy + here->dy ) > 5  )
-//             if ( here->msad > reverse.msad + c->initial_thresh*10   )
-               {
-                       here->valid = 2;
-               }
-
-#endif
         } /* End column loop */
        } /* End row loop */
 
@@ -829,9 +742,18 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
        // Get the motion_est context object
        struct motion_est_context_s *c = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL);
 
+               #ifdef BENCHMARK
+               struct timeval start; gettimeofday(&start, NULL );
+               #endif
+
        // Get the new image and frame number
        int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
 
+               #ifdef BENCHMARK
+               struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
+               fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference);
+               #endif
+
        if( error != 0 )
                mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr );
 
@@ -873,6 +795,9 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
                if( mlt_properties_get( properties, "check_chroma" ) != NULL )
                        c->check_chroma = mlt_properties_get_int( properties, "check_chroma" );
 
+               if( mlt_properties_get( properties, "denoise" ) != NULL )
+                       c->denoise = mlt_properties_get_int( properties, "denoise" );
+
                init_optimizations( c );
 
                // Calculate the dimensions in macroblock units
@@ -939,6 +864,7 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
 
 
                c->former_frame_position = c->current_frame_position;
+               c->previous_msad = 0;
 
                c->initialized = 1;
        }
@@ -973,9 +899,6 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
        // If video is advancing, run motion vector algorithm and etc...        
        if( c->former_frame_position + 1 == c->current_frame_position )
        {
-               #ifdef BENCHMARK
-               struct timeval start; gettimeofday(&start, NULL );
-               #endif
 
                // Swap the motion vector buffers and reuse allocated memory
                struct motion_vector_s *temp = c->current_vectors;
@@ -1017,20 +940,16 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
                //collect_pre_statistics( context, *image );
                search( c->current_image, c->former_image, c );
 
-               //median_denoise( c->current_vectors, c );
-
                collect_post_statistics( c );
 
-               #ifdef BENCHMARK
-               struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
-               fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference);
-               #endif
 
 
 
                // Detect shot changes
-               if( c->comparison_average > 12 * c->macroblock_width * c->macroblock_height ) {
-                       //fprintf(stderr, " - SAD: %d   <<Shot change>>\n", c->comparison_average);
+               if( c->comparison_average > 10 * c->macroblock_width * c->macroblock_height &&
+                   c->comparison_average > c->previous_msad * 2 )
+               {
+                       fprintf(stderr, " - SAD: %d   <<Shot change>>\n", c->comparison_average);
                        mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1);
                //      c->former_vectors_valid = 0; // Invalidate the previous frame's predictors
                        c->shot_change = 1;
@@ -1041,7 +960,14 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
                        //fprintf(stderr, " - SAD: %d\n", c->comparison_average);
                }
 
+               c->previous_msad = c->comparison_average;
+
                if( c->comparison_average != 0 ) {
+
+                       // denoise the vector buffer
+                       if( c->denoise )
+                               median_denoise( c->current_vectors, c );
+
                        // Pass the new vector data into the frame
                        mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
                                         (void*)c->current_vectors, c->mv_size, NULL, NULL );
@@ -1079,6 +1005,7 @@ static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format
        // Remember which frame this is
        c->former_frame_position = c->current_frame_position;
 
+
        if ( c->check_chroma == 0 )
                memcpy( c->former_image.y, *image, *width * *height * c->xstride );
 
@@ -1133,6 +1060,7 @@ mlt_filter filter_motion_est_init( char *arg )
                context->limit_y = 64;
                context->search_method = DIAMOND_SEARCH;
                context->check_chroma = 0;
+               context->denoise = 1;
 
                /* reference functions that may have optimized versions */
                context->compare_reference = sad_reference;