research.m1stereo.tv/gitweb/ - melted/blob - src/modules/motion_est/filter_motion_est.c

   1 /*
   2  *      /brief fast motion estimation filter
   3  *      /author Zachary Drew, Copyright 2005
   4  *
   5  *      Currently only uses Gamma data for comparisonon (bug or feature?)
   6  *      SSE optimized where available.
   7  *
   8  *      Vector orientation: The vector data that is generated for the current frame specifies
   9  *      the motion from the previous frame to the current frame. To know how a macroblock
  10  *      in the current frame will move in the future, the next frame is needed.
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or
  15  * (at your option) any later version.
  16  *
  17  * This program is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with this program; if not, write to the Free Software Foundation,
  24  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25  */
  26
  27
  28 #include "filter_motion_est.h"
  29 #include <framework/mlt.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <math.h>
  33 #include <string.h>
  34 #include <sys/time.h>
  35 #include <unistd.h>
  36
  37 #ifdef USE_SSE
  38 #include "sad_sse.h"
  39 #endif
  40
  41 #define NDEBUG
  42 #include <assert.h>
  43
  44 #undef DEBUG
  45 #undef DEBUG_ASM
  46 #undef BENCHMARK
  47 #undef COUNT_COMPARES
  48
  49 #define DIAMOND_SEARCH 0x0
  50 #define FULL_SEARCH 0x1
  51 #define SHIFT 8
  52 #define MIN(a,b) ((a) > (b) ? (b) : (a))
  53 #define ABS(a) ((a) >= 0 ? (a) : (-(a)))
  54
  55
  56 struct motion_est_context_s
  57 {
  58         int initialized;                                // true if filter has been initialized
  59
  60 #ifdef COUNT_COMPARES
  61         int compares;
  62 #endif
  63
  64         /* same as mlt_frame's parameters */
  65         int width, height;
  66
  67         /* Operational details */
  68         int mb_w, mb_h;
  69         int xstride, ystride;
  70         uint8_t *cache_image;                   // Copy of current frame
  71         uint8_t *former_image;                  // Copy of former frame
  72         int search_method;
  73         int skip_prediction;
  74         int shot_change;
  75         int limit_x, limit_y;                   // max x and y of a motion vector
  76         int initial_thresh;
  77         int check_chroma;                       // if check_chroma == 1 then compare chroma
  78         int denoise;
  79         int previous_msad;
  80         int show_reconstruction;
  81         int toggle_when_paused;
  82         int show_residual;
  83
  84         /* bounds */
  85         struct mlt_geometry_item_s bounds;      // Current bounds (from filters crop_detect, autotrack rectangle, or other)
  86
  87         /* bounds in macroblock units; macroblocks are completely contained within the boundry */
  88         int left_mb, prev_left_mb, right_mb, prev_right_mb;
  89         int top_mb, prev_top_mb, bottom_mb, prev_bottom_mb;
  90
  91         /* size of our vector buffers */
  92         int mv_buffer_height, mv_buffer_width, mv_size;
  93
  94         /* vector buffers */
  95         int former_vectors_valid;               //<! true if the previous frame's buffered motion vector data is valid
  96         motion_vector *former_vectors;
  97         motion_vector *current_vectors;
  98         motion_vector *denoise_vectors;
  99         mlt_position former_frame_position, current_frame_position;
 100
 101         /* diagnostic metrics */
 102         float predictive_misses;                // How often do the prediction motion vectors fail?
 103         int comparison_average;                 // How far does the best estimation deviate from a perfect comparison?
 104         int bad_comparisons;
 105         int average_length;
 106         int average_x, average_y;
 107
 108         /* run-time configurable comparison functions */
 109         int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int);
 110         int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int);
 111
 112 };
 113
 114 // This is used to constrains pixel operations between two blocks to be within the image boundry
 115 inline static int constrain(    int *x, int *y, int *w, int *h,
 116                                 const int dx, const int dy,
 117                                 const int left, const int right,
 118                                 const int top, const int bottom)
 119 {
 120         uint32_t penalty = 1 << SHIFT;                  // Retain a few extra bits of precision
 121         int x2 = *x + dx;
 122         int y2 = *y + dy;
 123         int w_remains = *w;
 124         int h_remains = *h;
 125
 126         // Origin of macroblock moves left of image boundy
 127         if( *x < left || x2 < left ) {
 128                 w_remains = *w - left + ((*x < x2) ?  *x : x2);
 129                 *x += *w - w_remains;
 130         }
 131         // Portion of macroblock moves right of image boundry
 132         else if( *x + *w > right || x2 + *w > right )
 133                 w_remains = right - ((*x > x2) ? *x : x2);
 134
 135         // Origin of macroblock moves above image boundy
 136         if( *y < top || y2 < top ) {
 137                 h_remains = *h - top + ((*y < y2) ? *y : y2);
 138                 *y += *h - h_remains;
 139         }
 140         // Portion of macroblock moves bellow image boundry
 141         else if( *y + *h > bottom || y2 + *h > bottom )
 142                 h_remains = bottom - ((*y > y2) ?  *y : y2);
 143
 144         if( w_remains == *w && h_remains == *h ) return penalty;
 145         if( w_remains <= 0 || h_remains <= 0) return 0; // Block is clipped out of existance
 146         penalty = (*w * *h * penalty)
 147                 / ( w_remains * h_remains);             // Recipricol of the fraction of the block that remains
 148
 149         assert(*x >= left); assert(x2 + *w - w_remains >= left);
 150         assert(*y >= top); assert(y2 + *h - h_remains >= top);
 151         assert(*x + w_remains <= right); assert(x2 + w_remains <= right);
 152         assert(*y + h_remains <= bottom); assert(y2 + h_remains <= bottom);
 153
 154         *w = w_remains;                                 // Update the width and height
 155         *h = h_remains;
 156
 157         return penalty;
 158 }
 159
 160 /** /brief Reference Sum of Absolute Differences comparison function
 161 *
 162 */
 163 static int sad_reference( uint8_t *block1, uint8_t *block2, const int xstride, const int ystride, const int w, const int h )
 164 {
 165         int i, j, score = 0;
 166         for ( j = 0; j < h; j++ ){
 167                 for ( i = 0; i < w; i++ ){
 168                         score += ABS( block1[i*xstride] - block2[i*xstride] );
 169                 }
 170                 block1 += ystride;
 171                 block2 += ystride;
 172         }
 173
 174         return score;
 175 }
 176
 177
 178 /** /brief Abstracted block comparison function
 179 */
 180 inline static int block_compare( uint8_t *block1,
 181                            uint8_t *block2,
 182                            int x,
 183                            int y,
 184                            int dx,
 185                            int dy,
 186                            struct motion_est_context_s *c)
 187 {
 188
 189 #ifdef COUNT_COMPARES
 190         c->compares++;
 191 #endif
 192
 193         int score;
 194
 195         // Default comparison may be overridden by the slower, more capable reference comparison
 196         int (*cmp)(uint8_t *, uint8_t *, int, int, int, int) = c->compare_optimized;
 197
 198         // vector displacement limited has been exceeded
 199         if( ABS( dx ) >= c->limit_x || ABS( dy ) >= c->limit_y )
 200                 return MAX_MSAD;
 201
 202         int mb_w = c->mb_w;     // Some writeable local copies
 203         int mb_h = c->mb_h;
 204
 205         // Determine if either macroblock got clipped
 206         int penalty = constrain( &x, &y, &mb_w, &mb_h, dx, dy, 0, c->width, 0, c->height);
 207
 208         // Some gotchas
 209         if( penalty == 0 )                      // Clipped out of existance: Return worst score
 210                 return MAX_MSAD;
 211         else if( penalty != 1<<SHIFT )          // Nonstandard macroblock dimensions: Disable SIMD optimizizations.
 212                 cmp = c->compare_reference;
 213
 214         // Calculate the memory locations of the macroblocks
 215         block1 += x      * c->xstride + y       * c->ystride;
 216         block2 += (x+dx) * c->xstride + (y+dy)  * c->ystride;
 217
 218         #ifdef DEBUG_ASM
 219         if( penalty == 1<<SHIFT ){
 220                 score = c->compare_reference( block1, block2, c->xstride, c->ystride, mb_w, mb_h );
 221                 int score2 = c->compare_optimized( block1, block2, c->xstride, c->ystride, mb_w, mb_h );
 222                 if ( score != score2 )
 223                         fprintf(stderr, "Your assembly doesn't work! Reference: %d Asm: %d\n", score, score2);
 224         }
 225         else
 226         #endif
 227
 228         score = cmp( block1, block2, c->xstride, c->ystride, mb_w, mb_h );
 229
 230         return ( score * penalty ) >> SHIFT;                    // Ditch the extra precision
 231 }
 232
 233 static inline void check_candidates (   uint8_t *ref,
 234                                         uint8_t *candidate_base,
 235                                         const int x,
 236                                         const int y,
 237                                         const motion_vector *candidates,// Contains to_x & to_y
 238                                         const int count,                // Number of candidates
 239                                         const int unique,               // Sometimes we know the candidates are unique
 240                                         motion_vector *result,
 241                                         struct motion_est_context_s *c )
 242 {
 243                 int score, i, j;
 244                 /* Scan for the best candidate */
 245                 for ( i = 0; i < count; i++ )
 246                 {
 247                         // this little dohicky ignores duplicate candidates, if they are possible
 248                         if ( unique == 0 ) {
 249                                 j = 0;
 250                                 while ( j < i )
 251                                 {
 252                                         if ( candidates[j].dx == candidates[i].dx &&
 253                                              candidates[j].dy == candidates[i].dy )
 254                                                         goto next_for_loop;
 255
 256                                         j++;
 257                                 }
 258                         }
 259
 260                         // Luma
 261                         score = block_compare( ref, candidate_base,
 262                                                 x, y,
 263                                                 candidates[i].dx,       // from
 264                                                 candidates[i].dy,
 265                                                 c);
 266
 267                         if ( score < result->msad ) {   // New minimum
 268                                 result->dx = candidates[i].dx;
 269                                 result->dy = candidates[i].dy;
 270                                 result->msad = score;
 271                         }
 272                         next_for_loop:;
 273                 }
 274 }
 275
 276 /* /brief Diamond search
 277 * Operates on a single macroblock
 278 */
 279 static inline void diamond_search(
 280                         uint8_t *ref,                           //<! Image data from previous frame
 281                         uint8_t *candidate_base,                //<! Image data in current frame
 282                         const int x,                            //<! X upper left corner of macroblock
 283                         const int y,                            //<! U upper left corner of macroblock
 284                         struct motion_vector_s *result,         //<! Best predicted mv and eventual result
 285                         struct motion_est_context_s *c)         //<! motion estimation context
 286 {
 287
 288         // diamond search pattern
 289         motion_vector candidates[4];
 290
 291         // Keep track of best and former best candidates
 292         motion_vector best, former;
 293         best.dx = 0;
 294         best.dy = 0;
 295         former.dx = 0;
 296         former.dy = 0;
 297
 298         // The direction of the refinement needs to be known
 299         motion_vector current;
 300
 301         int i, first = 1;
 302
 303         // Loop through the search pattern
 304         while( 1 ) {
 305
 306                 current.dx = result->dx;
 307                 current.dy = result->dy;
 308
 309                 if ( first == 1 )       // Set the initial pattern
 310                 {
 311                         candidates[0].dx = result->dx + 1; candidates[0].dy = result->dy + 0;
 312                         candidates[1].dx = result->dx + 0; candidates[1].dy = result->dy + 1;
 313                         candidates[2].dx = result->dx - 1; candidates[2].dy = result->dy + 0;
 314                         candidates[3].dx = result->dx + 0; candidates[3].dy = result->dy - 1;
 315                         i = 4;
 316                 }
 317                 else     // Construct the next portion of the search pattern
 318                 {
 319                         candidates[0].dx = result->dx + best.dx;
 320                         candidates[0].dy = result->dy + best.dy;
 321                         if (best.dx == former.dx && best.dy == former.dy) {
 322                                 candidates[1].dx = result->dx + best.dy;
 323                                 candidates[1].dy = result->dy + best.dx;                //      Yes, the wires
 324                                 candidates[2].dx = result->dx - best.dy;                //      are crossed
 325                                 candidates[2].dy = result->dy - best.dx;
 326                                 i = 3;
 327                         } else {
 328                                 candidates[1].dx = result->dx + former.dx;
 329                                 candidates[1].dy = result->dy + former.dy;
 330                                 i = 2;
 331                         }
 332
 333                         former.dx = best.dx; former.dy = best.dy;       // Keep track of new former best
 334                 }
 335
 336                 check_candidates ( ref, candidate_base, x, y, candidates, i, 1, result, c );
 337
 338                 // Which candidate was the best?
 339                 best.dx = result->dx - current.dx;
 340                 best.dy = result->dy - current.dy;
 341
 342                 // A better canidate was not found
 343                 if ( best.dx == 0 && best.dy == 0 )
 344                         return;
 345
 346                 if ( first == 1 ){
 347                         first = 0;
 348                         former.dx = best.dx; former.dy = best.dy; // First iteration, sensible value for former.d*
 349                 }
 350         }
 351 }
 352
 353 /* /brief Full (brute) search
 354 * Operates on a single macroblock
 355 */
 356 __attribute__((used))
 357 static void full_search(
 358                         uint8_t *ref,                           //<! Image data from previous frame
 359                         uint8_t *candidate_base,                //<! Image data in current frame
 360                         int x,                                  //<! X upper left corner of macroblock
 361                         int y,                                  //<! U upper left corner of macroblock
 362                         struct motion_vector_s *result,         //<! Best predicted mv and eventual result
 363                         struct motion_est_context_s *c)         //<! motion estimation context
 364 {
 365         // Keep track of best candidate
 366         int i,j,score;
 367
 368         // Go loopy
 369         for( i = -c->mb_w; i <= c->mb_w; i++ ){
 370                 for( j = -c->mb_h; j <=  c->mb_h; j++ ){
 371
 372                         score = block_compare( ref, candidate_base,
 373                                                 x,
 374                                                 y,
 375                                                 x + i,
 376                                                 y + j,
 377                                                 c);
 378
 379                         if ( score < result->msad ) {
 380                                 result->dx = i;
 381                                 result->dy = j;
 382                                 result->msad = score;
 383                         }
 384                 }
 385         }
 386 }
 387
 388 // Macros for pointer calculations
 389 #define CURRENT(i,j)    ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
 390 #define FORMER(i,j)     ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
 391 #define DENOISE(i,j)    ( c->denoise_vectors + (j)*c->mv_buffer_width + (i) )
 392
 393 int ncompare (const void * a, const void * b)
 394 {
 395         return ( *(const int*)a - *(const int*)b );
 396 }
 397
 398 // motion vector denoising
 399 // for x and y components seperately,
 400 // change the vector to be the median value of the 9 adjacent vectors
 401 static void median_denoise( motion_vector *v, struct motion_est_context_s *c )
 402 {
 403         int xvalues[9], yvalues[9];
 404
 405         int i,j,n;
 406         for( j = c->top_mb; j <= c->bottom_mb; j++ )
 407                 for( i = c->left_mb; i <= c->right_mb; i++ ){
 408                 {
 409                         n = 0;
 410
 411                         xvalues[n  ] = CURRENT(i,j)->dx; // Center
 412                         yvalues[n++] = CURRENT(i,j)->dy;
 413
 414                         if( i > c->left_mb ) // Not in First Column
 415                         {
 416                                 xvalues[n  ] = CURRENT(i-1,j)->dx; // Left
 417                                 yvalues[n++] = CURRENT(i-1,j)->dy;
 418
 419                                 if( j > c->top_mb ) {
 420                                         xvalues[n  ] = CURRENT(i-1,j-1)->dx; // Upper Left
 421                                         yvalues[n++] = CURRENT(i-1,j-1)->dy;
 422                                 }
 423
 424                                 if( j < c->bottom_mb ) {
 425                                         xvalues[n  ] = CURRENT(i-1,j+1)->dx; // Bottom Left
 426                                         yvalues[n++] = CURRENT(i-1,j+1)->dy;
 427                                 }
 428                         }
 429                         if( i < c->right_mb ) // Not in Last Column
 430                         {
 431                                 xvalues[n  ] = CURRENT(i+1,j)->dx; // Right
 432                                 yvalues[n++] = CURRENT(i+1,j)->dy;
 433
 434
 435                                 if( j > c->top_mb ) {
 436                                         xvalues[n  ] = CURRENT(i+1,j-1)->dx; // Upper Right
 437                                         yvalues[n++] = CURRENT(i+1,j-1)->dy;
 438                                 }
 439
 440                                 if( j < c->bottom_mb ) {
 441                                         xvalues[n  ] = CURRENT(i+1,j+1)->dx; // Bottom Right
 442                                         yvalues[n++] = CURRENT(i+1,j+1)->dy;
 443                                 }
 444                         }
 445                         if( j > c->top_mb ) // Not in First Row
 446                         {
 447                                 xvalues[n  ] = CURRENT(i,j-1)->dx; // Top
 448                                 yvalues[n++] = CURRENT(i,j-1)->dy;
 449                         }
 450
 451                         if( j < c->bottom_mb ) // Not in Last Row
 452                         {
 453                                 xvalues[n  ] = CURRENT(i,j+1)->dx; // Bottom
 454                                 yvalues[n++] = CURRENT(i,j+1)->dy;
 455                         }
 456
 457                         qsort (xvalues, n, sizeof(int), ncompare);
 458                         qsort (yvalues, n, sizeof(int), ncompare);
 459
 460                         if( n % 2 == 1 ) {
 461                                 DENOISE(i,j)->dx = xvalues[n/2];
 462                                 DENOISE(i,j)->dy = yvalues[n/2];
 463                         }
 464                         else {
 465                                 DENOISE(i,j)->dx = (xvalues[n/2] + xvalues[n/2+1])/2;
 466                                 DENOISE(i,j)->dy = (yvalues[n/2] + yvalues[n/2+1])/2;
 467                         }
 468                 }
 469         }
 470
 471         motion_vector *t = c->current_vectors;
 472         c->current_vectors = c->denoise_vectors;
 473         c->denoise_vectors = t;
 474
 475 }
 476
 477 // Credits: ffmpeg
 478 // return the median
 479 static inline int median_predictor(int a, int b, int c) {
 480         if ( a > b ){
 481                 if ( c > b ){
 482                         if ( c > a  ) b = a;
 483                         else      b = c;
 484                 }
 485         } else {
 486                 if ( b > c ){
 487                         if ( c > a ) b = c;
 488                         else     b = a;
 489                 }
 490         }
 491         return b;
 492 }
 493
 494
 495 /** /brief Motion search
 496 *
 497 * For each macroblock in the current frame, estimate the block from the last frame that
 498 * matches best.
 499 *
 500 * Vocab: Colocated - the pixel in the previous frame at the current position
 501 *
 502 * Based on enhanced predictive zonal search. [Tourapis 2002]
 503 */
 504 static void motion_search( uint8_t *from,                       //<! Image data.
 505                            uint8_t *to,                         //<! Image data. Rigid grid.
 506                            struct motion_est_context_s *c)      //<! The context
 507 {
 508
 509 #ifdef COUNT_COMPARES
 510         compares = 0;
 511 #endif
 512
 513         motion_vector candidates[10];
 514         motion_vector *here;            // This one gets used alot (about 30 times per macroblock)
 515         int n = 0;
 516
 517         int i, j, count=0;
 518
 519         // For every macroblock, perform motion vector estimation
 520         for( i = c->left_mb; i <= c->right_mb; i++ ){
 521          for( j = c->top_mb; j <= c->bottom_mb; j++ ){
 522
 523                 here = CURRENT(i,j);
 524                 here->valid = 1;
 525                 here->color = 100;
 526                 here->msad = MAX_MSAD;
 527                 count++;
 528                 n = 0;
 529
 530
 531                 /* Stack the predictors [i.e. checked in reverse order] */
 532
 533                 /* Adjacent to collocated */
 534                 if( c->former_vectors_valid )
 535                 {
 536                         // Top of colocated
 537                         if( j > c->prev_top_mb ){// && COL_TOP->valid ){
 538                                 candidates[n  ].dx = FORMER(i,j-1)->dx;
 539                                 candidates[n++].dy = FORMER(i,j-1)->dy;
 540                         }
 541
 542                         // Left of colocated
 543                         if( i > c->prev_left_mb ){// && COL_LEFT->valid ){
 544                                 candidates[n  ].dx = FORMER(i-1,j)->dx;
 545                                 candidates[n++].dy = FORMER(i-1,j)->dy;
 546                         }
 547
 548                         // Right of colocated
 549                         if( i < c->prev_right_mb ){// && COL_RIGHT->valid ){
 550                                 candidates[n  ].dx = FORMER(i+1,j)->dx;
 551                                 candidates[n++].dy = FORMER(i+1,j)->dy;
 552                         }
 553
 554                         // Bottom of colocated
 555                         if( j < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
 556                                 candidates[n  ].dx = FORMER(i,j+1)->dx;
 557                                 candidates[n++].dy = FORMER(i,j+1)->dy;
 558                         }
 559
 560                         // And finally, colocated
 561                         candidates[n  ].dx = FORMER(i,j)->dx;
 562                         candidates[n++].dy = FORMER(i,j)->dy;
 563                 }
 564
 565                 // For macroblocks not in the top row
 566                 if ( j > c->top_mb) {
 567
 568                         // Top if ( TOP->valid ) {
 569                                 candidates[n  ].dx = CURRENT(i,j-1)->dx;
 570                                 candidates[n++].dy = CURRENT(i,j-1)->dy;
 571                         //}
 572
 573                         // Top-Right, macroblocks not in the right row
 574                         if ( i < c->right_mb ){// && TOP_RIGHT->valid ) {
 575                                 candidates[n  ].dx = CURRENT(i+1,j-1)->dx;
 576                                 candidates[n++].dy = CURRENT(i+1,j-1)->dy;
 577                         }
 578                 }
 579
 580                 // Left, Macroblocks not in the left column
 581                 if ( i > c->left_mb ){// && LEFT->valid ) {
 582                         candidates[n  ].dx = CURRENT(i-1,j)->dx;
 583                         candidates[n++].dy = CURRENT(i-1,j)->dy;
 584                 }
 585
 586                 /* Median predictor vector (median of left, top, and top right adjacent vectors) */
 587                 if ( i > c->left_mb && j > c->top_mb && i < c->right_mb
 588                          )//&& LEFT->valid && TOP->valid && TOP_RIGHT->valid )
 589                 {
 590                         candidates[n  ].dx = median_predictor( CURRENT(i-1,j)->dx, CURRENT(i,j-1)->dx, CURRENT(i+1,j-1)->dx);
 591                         candidates[n++].dy = median_predictor( CURRENT(i-1,j)->dy, CURRENT(i,j-1)->dy, CURRENT(i+1,j-1)->dy);
 592                 }
 593
 594                 // Zero vector
 595                 candidates[n  ].dx = 0;
 596                 candidates[n++].dy = 0;
 597
 598                 int x = i * c->mb_w;
 599                 int y = j * c->mb_h;
 600                 check_candidates ( to, from, x, y, candidates, n, 0, here, c );
 601
 602
 603 #ifndef FULLSEARCH
 604                 diamond_search( to, from, x, y, here, c);
 605 #else
 606                 full_search( to, from, x, y, here, c);
 607 #endif
 608
 609                 assert( x + c->mb_w + here->dx > 0 );   // All macroblocks must have area > 0
 610                 assert( y + c->mb_h + here->dy > 0 );
 611                 assert( x + here->dx < c->width );
 612                 assert( y + here->dy < c->height );
 613
 614          } /* End column loop */
 615         } /* End row loop */
 616
 617 #ifdef USE_SSE
 618         asm volatile ( "emms" );
 619 #endif
 620
 621 #ifdef COUNT_COMPARES
 622         fprintf(stderr, "%d comparisons per block were made", compares/count);
 623 #endif
 624         return;
 625 }
 626
 627 void collect_post_statistics( struct motion_est_context_s *c ) {
 628
 629         c->comparison_average = 0;
 630         c->average_length = 0;
 631         c->average_x = 0;
 632         c->average_y = 0;
 633
 634         int i, j, count = 0;
 635
 636         for ( i = c->left_mb; i <= c->right_mb; i++ ){
 637          for ( j = c->top_mb; j <= c->bottom_mb; j++ ){
 638
 639                 count++;
 640                 c->comparison_average += CURRENT(i,j)->msad;
 641                 c->average_x += CURRENT(i,j)->dx;
 642                 c->average_y += CURRENT(i,j)->dy;
 643
 644
 645          }
 646         }
 647
 648         if ( count > 0 )
 649         {
 650                 c->comparison_average /= count;
 651                 c->average_x /= count;
 652                 c->average_y /= count;
 653                 c->average_length = sqrt( c->average_x * c->average_x + c->average_y * c->average_y );
 654         }
 655
 656 }
 657
 658 static void init_optimizations( struct motion_est_context_s *c )
 659 {
 660         switch(c->mb_w){
 661 #ifdef USE_SSE
 662                 case 4:  if(c->mb_h == 4)       c->compare_optimized = sad_sse_422_luma_4x4;
 663                          else                           c->compare_optimized = sad_sse_422_luma_4w;
 664                          break;
 665                 case 8:  if(c->mb_h == 8)  c->compare_optimized = sad_sse_422_luma_8x8;
 666                          else                           c->compare_optimized = sad_sse_422_luma_8w;
 667                          break;
 668                 case 16: if(c->mb_h == 16) c->compare_optimized = sad_sse_422_luma_16x16;
 669                          else                           c->compare_optimized = sad_sse_422_luma_16w;
 670                          break;
 671                 case 32: if(c->mb_h == 32) c->compare_optimized = sad_sse_422_luma_32x32;
 672                          else                           c->compare_optimized = sad_sse_422_luma_32w;
 673                          break;
 674                 case 64: c->compare_optimized = sad_sse_422_luma_64w;
 675                          break;
 676 #endif
 677                 default: c->compare_optimized = sad_reference;
 678                          break;
 679         }
 680 }
 681
 682 inline static void set_red(uint8_t *image, struct motion_est_context_s *c)
 683 {
 684         int n;
 685         for( n = 0; n < c->width * c->height * 2; n+=4 )
 686         {
 687                 image[n]   = 79;
 688                 image[n+1] = 91;
 689                 image[n+2] = 79;
 690                 image[n+3] = 237;
 691         }
 692
 693 }
 694
 695 static void show_residual( uint8_t *result,  struct motion_est_context_s *c )
 696 {
 697         int i, j;
 698         int x,y,w,h;
 699         int dx, dy;
 700         int tx,ty;
 701         uint8_t *b, *r;
 702
 703 //      set_red(result,c);
 704
 705         for( j = c->top_mb; j <= c->bottom_mb; j++ ){
 706          for( i = c->left_mb; i <= c->right_mb; i++ ){
 707
 708                 dx = CURRENT(i,j)->dx;
 709                 dy = CURRENT(i,j)->dy;
 710                 w = c->mb_w;
 711                 h = c->mb_h;
 712                 x = i * w;
 713                 y = j * h;
 714
 715                 // Denoise function caused some blocks to be completely clipped, ignore them
 716                 if (constrain( &x, &y, &w, &h, dx, dy, 0, c->width, 0, c->height) == 0 )
 717                         continue;
 718
 719                 for( ty = y; ty < y + h ; ty++ ){
 720                  for( tx = x; tx < x + w ; tx++ ){
 721
 722                         b = c->former_image +  (tx+dx)*c->xstride + (ty+dy)*c->ystride;
 723                         r = result +            tx*c->xstride + ty*c->ystride;
 724
 725                         r[0] = 16 + ABS( r[0] - b[0] );
 726
 727                         if( dx % 2 == 0 )
 728                                 r[1] = 128 + ABS( r[1] - b[1] );
 729                         else
 730                                 // FIXME: may exceed boundies
 731                                 r[1] = 128 + ABS( r[1] - ( *(b-1) + b[3] ) /2 );
 732                  }
 733                 }
 734          }
 735         }
 736 }
 737
 738 static void show_reconstruction( uint8_t *result, struct motion_est_context_s *c )
 739 {
 740         int i, j;
 741         int x,y,w,h;
 742         int dx,dy;
 743         uint8_t *r, *s;
 744         int tx,ty;
 745
 746         for( i = c->left_mb; i <= c->right_mb; i++ ){
 747          for( j = c->top_mb; j <= c->bottom_mb; j++ ){
 748
 749                 dx = CURRENT(i,j)->dx;
 750                 dy = CURRENT(i,j)->dy;
 751                 w = c->mb_w;
 752                 h = c->mb_h;
 753                 x = i * w;
 754                 y = j * h;
 755
 756                 // Denoise function caused some blocks to be completely clipped, ignore them
 757                 if (constrain( &x, &y, &w, &h, dx, dy, 0, c->width, 0, c->height) == 0 )
 758                         continue;
 759
 760                 for( ty = y; ty < y + h ; ty++ ){
 761                  for( tx = x; tx < x + w ; tx++ ){
 762
 763                         r = result +          tx*c->xstride + ty*c->ystride;
 764                         s = c->former_image + (tx+dx)*c->xstride + (ty+dy)*c->ystride;
 765
 766                         r[0] = s[0];
 767
 768                         if( dx % 2 == 0 )
 769                                 r[1] = s[1];
 770                         else
 771                                 // FIXME: may exceed boundies
 772                                 r[1] = ( *(s-1) + s[3] ) /2;
 773                  }
 774                 }
 775          }
 776         }
 777 }
 778
 779 // Image stack(able) method
 780 static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
 781 {
 782         // Get the filter
 783         mlt_filter filter = mlt_frame_pop_service( frame );
 784
 785         // Get the motion_est context object
 786         struct motion_est_context_s *c = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL);
 787
 788
 789         // Get the new image and frame number
 790         int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
 791
 792         #ifdef BENCHMARK
 793         struct timeval start; gettimeofday(&start, NULL );
 794         #endif
 795
 796
 797         if( error != 0 )
 798                 mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr );
 799
 800         c->current_frame_position = mlt_frame_get_position( frame );
 801
 802         /* Context Initialization */
 803         if ( c->initialized == 0 ) {
 804
 805                 // Get the filter properties object
 806                 mlt_properties properties = mlt_filter_properties( filter );
 807
 808                 c->width = *width;
 809                 c->height = *height;
 810
 811                 /* Get parameters that may have been overridden */
 812                 if( mlt_properties_get( properties, "macroblock_width") != NULL )
 813                         c->mb_w = mlt_properties_get_int( properties, "macroblock_width");
 814
 815                 if( mlt_properties_get( properties, "macroblock_height") != NULL )
 816                         c->mb_h = mlt_properties_get_int( properties, "macroblock_height");
 817
 818                 if( mlt_properties_get( properties, "prediction_thresh") != NULL )
 819                         c->initial_thresh = mlt_properties_get_int( properties, "prediction_thresh" );
 820                 else
 821                         c->initial_thresh = c->mb_w * c->mb_h;
 822
 823                 if( mlt_properties_get( properties, "search_method") != NULL )
 824                         c->search_method = mlt_properties_get_int( properties, "search_method");
 825
 826                 if( mlt_properties_get( properties, "skip_prediction") != NULL )
 827                         c->skip_prediction = mlt_properties_get_int( properties, "skip_prediction");
 828
 829                 if( mlt_properties_get( properties, "limit_x") != NULL )
 830                         c->limit_x = mlt_properties_get_int( properties, "limit_x");
 831
 832                 if( mlt_properties_get( properties, "limit_y") != NULL )
 833                         c->limit_y = mlt_properties_get_int( properties, "limit_y");
 834
 835                 if( mlt_properties_get( properties, "check_chroma" ) != NULL )
 836                         c->check_chroma = mlt_properties_get_int( properties, "check_chroma" );
 837
 838                 if( mlt_properties_get( properties, "denoise" ) != NULL )
 839                         c->denoise = mlt_properties_get_int( properties, "denoise" );
 840
 841                 if( mlt_properties_get( properties, "show_reconstruction" ) != NULL )
 842                         c->show_reconstruction = mlt_properties_get_int( properties, "show_reconstruction" );
 843
 844                 if( mlt_properties_get( properties, "show_residual" ) != NULL )
 845                         c->show_residual = mlt_properties_get_int( properties, "show_residual" );
 846
 847                 if( mlt_properties_get( properties, "toggle_when_paused" ) != NULL )
 848                         c->toggle_when_paused = mlt_properties_get_int( properties, "toggle_when_paused" );
 849
 850                 init_optimizations( c );
 851
 852                 // Calculate the dimensions in macroblock units
 853                 c->mv_buffer_width = (*width / c->mb_w);
 854                 c->mv_buffer_height = (*height / c->mb_h);
 855
 856                 // Size of the motion vector buffer
 857                 c->mv_size =  c->mv_buffer_width * c->mv_buffer_height * sizeof(struct motion_vector_s);
 858
 859                 // Allocate the motion vector buffers
 860                 c->former_vectors = mlt_pool_alloc( c->mv_size );
 861                 c->current_vectors = mlt_pool_alloc( c->mv_size );
 862                 c->denoise_vectors = mlt_pool_alloc( c->mv_size );
 863
 864                 // Register motion buffers for destruction
 865                 mlt_properties_set_data( properties, "current_motion_vectors", (void *)c->current_vectors, 0, mlt_pool_release, NULL );
 866                 mlt_properties_set_data( properties, "former_motion_vectors", (void *)c->former_vectors, 0, mlt_pool_release, NULL );
 867                 mlt_properties_set_data( properties, "denoise_motion_vectors", (void *)c->denoise_vectors, 0, mlt_pool_release, NULL );
 868
 869                 c->former_vectors_valid = 0;
 870                 memset( c->former_vectors, 0, c->mv_size );
 871
 872                 // Calculate the size of our steps (the number of bytes that seperate adjacent pixels in X and Y direction)
 873                 switch( *format ) {
 874                         case mlt_image_yuv422:
 875                                 c->xstride = 2;
 876                                 c->ystride = c->xstride * *width;
 877                                 break;
 878                         default:
 879                                 // I don't know
 880                                 fprintf(stderr, "\"I am unfamiliar with your new fangled pixel format!\" -filter_motion_est\n");
 881                                 return -1;
 882                 }
 883
 884                 // Allocate a cache for the previous frame's image
 885                 c->former_image = mlt_pool_alloc( *width * *height * 2 );
 886                 c->cache_image = mlt_pool_alloc( *width * *height * 2 );
 887
 888                 // Register for destruction
 889                 mlt_properties_set_data( properties, "cache_image", (void *)c->cache_image, 0, mlt_pool_release, NULL );
 890                 mlt_properties_set_data( properties, "former_image", (void *)c->former_image, 0, mlt_pool_release, NULL );
 891
 892                 c->former_frame_position = c->current_frame_position;
 893                 c->previous_msad = 0;
 894
 895                 c->initialized = 1;
 896         }
 897
 898         /* Check to see if somebody else has given us bounds */
 899         struct mlt_geometry_item_s *bounds = mlt_properties_get_data( MLT_FRAME_PROPERTIES( frame ), "bounds", NULL );
 900
 901         if( bounds != NULL ) {
 902                 // translate pixel units (from bounds) to macroblock units
 903                 // make sure whole macroblock stays within bounds
 904                 c->left_mb = ( bounds->x + c->mb_w - 1 ) / c->mb_w;
 905                 c->top_mb = ( bounds->y + c->mb_h - 1 ) / c->mb_h;
 906                 c->right_mb = ( bounds->x + bounds->w ) / c->mb_w - 1;
 907                 c->bottom_mb = ( bounds->y + bounds->h ) / c->mb_h - 1;
 908                 c->bounds.x = bounds->x;
 909                 c->bounds.y = bounds->y;
 910                 c->bounds.w = bounds->w;
 911                 c->bounds.h = bounds->h;
 912         } else {
 913                 c->left_mb = c->prev_left_mb = 0;
 914                 c->top_mb = c->prev_top_mb = 0;
 915                 c->right_mb = c->prev_right_mb = c->mv_buffer_width - 1;        // Zero indexed
 916                 c->bottom_mb = c->prev_bottom_mb = c->mv_buffer_height - 1;
 917                 c->bounds.x = 0;
 918                 c->bounds.y = 0;
 919                 c->bounds.w = *width;
 920                 c->bounds.h = *height;
 921         }
 922
 923         // If video is advancing, run motion vector algorithm and etc...
 924         if( c->former_frame_position + 1 == c->current_frame_position )
 925         {
 926
 927                 // Swap the motion vector buffers and reuse allocated memory
 928                 struct motion_vector_s *temp = c->current_vectors;
 929                 c->current_vectors = c->former_vectors;
 930                 c->former_vectors = temp;
 931
 932                 // This is done because filter_vismv doesn't pay attention to frame boundry
 933                 memset( c->current_vectors, 0, c->mv_size );
 934
 935                 // Perform the motion search
 936                 motion_search( c->cache_image, *image, c );
 937
 938                 collect_post_statistics( c );
 939
 940
 941                 // Detect shot changes
 942                 if( c->comparison_average > 10 * c->mb_w * c->mb_h &&
 943                     c->comparison_average > c->previous_msad * 2 )
 944                 {
 945                         fprintf(stderr, " - SAD: %d   <<Shot change>>\n", c->comparison_average);
 946                         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1);
 947                 //      c->former_vectors_valid = 0; // Invalidate the previous frame's predictors
 948                         c->shot_change = 1;
 949                 }
 950                 else {
 951                         c->former_vectors_valid = 1;
 952                         c->shot_change = 0;
 953                         //fprintf(stderr, " - SAD: %d\n", c->comparison_average);
 954                 }
 955
 956                 c->previous_msad = c->comparison_average;
 957
 958                 if( c->comparison_average != 0 ) { // If the frame is not a duplicate of the previous frame
 959
 960                         // denoise the vector buffer
 961                         if( c->denoise )
 962                                 median_denoise( c->current_vectors, c );
 963
 964                         // Pass the new vector data into the frame
 965                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
 966                                          (void*)c->current_vectors, c->mv_size, NULL, NULL );
 967
 968                         // Cache the frame's image. Save the old cache. Reuse memory.
 969                         // After this block, exactly two unique frames will be cached
 970                         uint8_t *timg = c->cache_image;
 971                         c->cache_image = c->former_image;
 972                         c->former_image = timg;
 973                         memcpy( c->cache_image, *image, *width * *height * c->xstride );
 974
 975
 976                 }
 977                 else {
 978                         // Undo the Swap, This fixes the ugliness caused by a duplicate frame
 979                         temp = c->current_vectors;
 980                         c->current_vectors = c->former_vectors;
 981                         c->former_vectors = temp;
 982                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
 983                                          (void*)c->former_vectors, c->mv_size, NULL, NULL );
 984                 }
 985
 986
 987                 if( c->shot_change == 1)
 988                         ;
 989                 else if( c->show_reconstruction )
 990                         show_reconstruction( *image, c );
 991                 else if( c->show_residual )
 992                         show_residual( *image, c );
 993
 994         }
 995         // paused
 996         else if( c->former_frame_position == c->current_frame_position )
 997         {
 998                 // Pass the old vector data into the frame if it's valid
 999                 if( c->former_vectors_valid == 1 ) {
1000                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1001                                          (void*)c->current_vectors, c->mv_size, NULL, NULL );
1002
1003                         if( c->shot_change == 1)
1004                                 ;
1005                         else if( c->toggle_when_paused == 1 ) {
1006                                 if( c->show_reconstruction )
1007                                         show_reconstruction( *image, c );
1008                                 else if( c->show_residual )
1009                                         show_residual( *image, c );
1010                                 c->toggle_when_paused = 2;
1011                         }
1012                         else if( c->toggle_when_paused == 2 )
1013                                 c->toggle_when_paused = 1;
1014                         else {
1015                                 if( c->show_reconstruction )
1016                                         show_reconstruction( *image, c );
1017                                 else if( c->show_residual )
1018                                         show_residual( *image, c );
1019                         }
1020
1021                 }
1022
1023                 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", c->shot_change);
1024         }
1025         // there was jump in frame number
1026         else {
1027 //              fprintf(stderr, "Warning: there was a frame number jumped from %d to %d.\n", c->former_frame_position, c->current_frame_position);
1028                 c->former_vectors_valid = 0;
1029         }
1030
1031
1032         // Cache our bounding geometry for the next frame's processing
1033         c->prev_left_mb = c->left_mb;
1034         c->prev_top_mb = c->top_mb;
1035         c->prev_right_mb = c->right_mb;
1036         c->prev_bottom_mb = c->bottom_mb;
1037
1038         // Remember which frame this is
1039         c->former_frame_position = c->current_frame_position;
1040
1041
1042         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_width", c->mb_w );
1043         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_height", c->mb_h );
1044         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.left_mb", c->left_mb );
1045         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.right_mb", c->right_mb );
1046         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.top_mb", c->top_mb );
1047         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.bottom_mb", c->bottom_mb );
1048
1049         #ifdef BENCHMARK
1050         struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
1051         fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference);
1052         #endif
1053
1054
1055         return error;
1056 }
1057
1058
1059
1060 /** filter processing.
1061 */
1062
1063 static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
1064 {
1065
1066         // Keeps tabs on the filter object
1067         mlt_frame_push_service( frame, this);
1068
1069         // Push the frame filter
1070         mlt_frame_push_get_image( frame, filter_get_image );
1071
1072         return frame;
1073 }
1074
1075 /** Constructor for the filter.
1076 */
1077 mlt_filter filter_motion_est_init( mlt_profile profile, mlt_service_type type, const char *id, char *arg )
1078 {
1079         mlt_filter this = mlt_filter_new( );
1080         if ( this != NULL )
1081         {
1082                 // Get the properties object
1083                 mlt_properties properties = MLT_FILTER_PROPERTIES( this );
1084
1085                 // Initialize the motion estimation context
1086                 struct motion_est_context_s *context;
1087                 context = mlt_pool_alloc( sizeof(struct motion_est_context_s) );
1088                 mlt_properties_set_data( properties, "context", (void *)context, sizeof( struct motion_est_context_s ),
1089                                         mlt_pool_release, NULL );
1090
1091
1092                 // Register the filter
1093                 this->process = filter_process;
1094
1095                 /* defaults that may be overridden */
1096                 context->mb_w = 16;
1097                 context->mb_h = 16;
1098                 context->skip_prediction = 0;
1099                 context->limit_x = 64;
1100                 context->limit_y = 64;
1101                 context->search_method = DIAMOND_SEARCH;        // FIXME: not used
1102                 context->check_chroma = 0;
1103                 context->denoise = 1;
1104                 context->show_reconstruction = 0;
1105                 context->show_residual = 0;
1106                 context->toggle_when_paused = 0;
1107
1108                 /* reference functions that may have optimized versions */
1109                 context->compare_reference = sad_reference;
1110
1111                 // The rest of the buffers will be initialized when the filter is first processed
1112                 context->initialized = 0;
1113         }
1114         return this;
1115 }