research.m1stereo.tv/gitweb/ - melted/blob - src/modules/motion_est/filter_motion_est.c

   1 /*
   2  *      /brief fast motion estimation filter
   3  *      /author Zachary Drew, Copyright 2005
   4  *
   5  *      Currently only uses Gamma data for comparisonon (bug or feature?)
   6  *      Vector optimization coming soon.
   7  *
   8  *      Vector orientation: The vector data that is generated for the current frame specifies
   9  *      the motion from the previous frame to the current frame. Thus, to know how a macroblock
  10  *      in the current frame will move in the future, the next frame is needed.
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or
  15  * (at your option) any later version.
  16  *
  17  * This program is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with this program; if not, write to the Free Software Foundation,
  24  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25  */
  26
  27
  28 #include "filter_motion_est.h"
  29 #include <framework/mlt.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <math.h>
  33 #include <string.h>
  34 #include <sys/time.h>
  35 #include <assert.h>
  36
  37 #include "sad_sse.h"
  38
  39
  40 #undef DEBUG
  41 #undef DEBUG_ASM
  42 #undef BENCHMARK
  43 #undef COUNT_COMPARES
  44
  45 #define DIAMOND_SEARCH 0x0
  46 #define FULL_SEARCH 0x1
  47 #define SHIFT 8
  48 #define MIN(a,b) ((a) > (b) ? (b) : (a))
  49 #define ABS(a) ((a) >= 0 ? (a) : (-(a)))
  50
  51 #ifdef COUNT_COMPARES
  52         int compares;
  53 #endif
  54
  55 typedef struct motion_vector_s motion_vector;
  56
  57 struct yuv_data
  58 {
  59         uint8_t *y;
  60         uint8_t *u;
  61         uint8_t *v;
  62
  63 };
  64
  65 struct motion_est_context_s
  66 {
  67         int initialized;                                //<! true if filter has been initialized
  68
  69         /* same as mlt_frame's parameters */
  70         int width, height;
  71
  72         /* Operational details */
  73         int macroblock_width, macroblock_height;
  74         int xstride, ystride;
  75         //uint8_t *former_image;                        //<! Copy of the previous frame's image
  76         struct yuv_data former_image, current_image;
  77         int search_method, skip_prediction, shot_change;
  78         int limit_x, limit_y;                   //<! max x and y of a motion vector
  79         int edge_blocks_x, edge_blocks_y;
  80         int initial_thresh;
  81         int check_chroma;                       // if check_chroma == 1 then compare chroma
  82
  83         /* bounds */
  84         struct mlt_geometry_item_s prev_bounds; // Cache last frame's bounds (needed for predictor vectors validity)
  85         struct mlt_geometry_item_s *bounds;     // Current bounds
  86
  87         /* bounds in macroblock units */
  88         int left_mb, prev_left_mb, right_mb, prev_right_mb;
  89         int top_mb, prev_top_mb, bottom_mb, prev_bottom_mb;
  90
  91         /* size of our vector buffers */
  92         int mv_buffer_height, mv_buffer_width, mv_size;
  93
  94         /* vector buffers */
  95         int former_vectors_valid;               //<! true if the previous frame's buffered motion vector data is valid
  96         motion_vector *former_vectors, *current_vectors;
  97         motion_vector *denoise_vectors;
  98         mlt_position former_frame_position, current_frame_position;
  99
 100         /* two metrics for diagnostics. lower is a better estimation but beware of local minima  */
 101         float predictive_misses;                // How often do the prediction metion vectors fail?
 102         int comparison_average;                 // How far does the best estimation deviate from a perfect comparison?
 103         int bad_comparisons;
 104         int average_length;
 105         int average_x, average_y;
 106
 107         /* run-time configurable comparison functions */
 108         int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int);
 109         int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int);
 110         //int (*vert_deviation_reference)(uint8_t *, int, int, int, int);
 111         //int (*horiz_deviation_reference)(uint8_t *, int, int, int, int);
 112
 113 };
 114
 115
 116 // Clip the macroblocks as required. Only used for blocks at the edge of the picture
 117 // "from" is assumed to be unclipped
 118 inline static int clip( int *from_x,
 119                         int *from_y,
 120                         int *to_x,
 121                         int *to_y,
 122                         int *w,                 //<! macroblock width
 123                         int *h,                 //<! macroblock height
 124                         int width,              //<! image width
 125                         int height)             //<! image height
 126 {
 127
 128         uint32_t penalty = 1 << SHIFT;  // Retain a few extra bits of precision minus floating-point's blemishes
 129         int diff;
 130
 131         // Origin of macroblock moves left of absolute boundy
 132         if( *to_x < 0 ) {
 133                 if( *to_x + *w <= 0) return 0;                  // Clipped out of existance
 134                 penalty = (*w * penalty) / (*w + *to_x);         // Recipricol of the fraction of the block that remains
 135                 *from_x -= *to_x;
 136                 *w += *to_x;
 137                 *to_x = 0;
 138         }
 139         // Portion of macroblock moves right of absolute boundry
 140         else if( *to_x + *w > width ) {
 141                 if(*to_x >= width) return 0;                    // Clipped out of existance
 142                 diff  = *to_x + *w - width;                     // Width of area clipped (0 < diff <  macroblock width)
 143                 penalty = (*w * penalty) / (*w - diff);         // Recipricol of the fraction of the block that remains
 144                 *w -= diff;
 145         }
 146         // Origin of macroblock moves above absolute boundy
 147         if( *to_y < 0 ) {
 148                 if( *to_y + *h <= 0) return 0;                  // Clipped out of existance
 149                 penalty = (*h * penalty) / (*h + *to_y);        // Recipricol of the fraction of the block that remains
 150                 *from_y -= *to_y;
 151                 *h += *to_y;
 152                 *to_y = 0;
 153         }
 154         // Portion of macroblock moves bellow absolute boundry
 155         else if( *to_y + *h > height ) {
 156                 if(*to_y >= height) return 0;                   // Clipped out of existance
 157                 diff = *to_y + *h - height;                     // Height of area clipped (0 < diff < macroblock height)
 158                 penalty = (*h * penalty) / (*h - diff);         // Recipricol of the fraction of the block that is clipped
 159                 *h -= diff;
 160         }
 161         return penalty;
 162 }
 163
 164
 165 /** /brief Reference Sum of Absolute Differences comparison function
 166 *
 167 */
 168 inline static int sad_reference( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
 169 {
 170         int i, j, score = 0;
 171         for ( j = 0; j < h; j++ ){
 172                 for ( i = 0; i < w; i++ ){
 173                         score += ABS( block1[i*xstride] - block2[i*xstride] );
 174                 }
 175                 block1 += ystride;
 176                 block2 += ystride;
 177         }
 178
 179         return score;
 180 }
 181
 182 inline static void change_422_to_444_planar_rep( uint8_t *image, struct yuv_data yuv, struct motion_est_context_s *c )
 183 {
 184         register uint8_t *p = image;
 185         register uint8_t *q = image + c->width * c->height * 2;
 186         while ( *p != *q ) {
 187                 *(yuv.y++) = *(p ++);
 188                 *(yuv.u++) = *p;
 189                 *(yuv.u++) = *(p ++);
 190                 *(yuv.y++) = *(p ++);
 191                 *(yuv.v++) = *p;
 192                 *(yuv.v++) = *(p ++);
 193         }
 194 }
 195
 196 // broken
 197 inline static void change_420p_to_444_planar_rep( uint8_t *image, struct yuv_data yuv, struct motion_est_context_s *c )
 198 {
 199         uint8_t *p = image + c->width * c->height;
 200         uint8_t *q = p + c->width*c->height/2;
 201         uint8_t *u2, *v2;
 202         while( *p != *q ) {
 203                 u2 = yuv.u + c->width;
 204                 *yuv.u  ++ = *p;
 205                 *yuv.u  ++ = *p;
 206                 *u2 ++ = *p;
 207                 *u2 ++ = *p ++;
 208         }
 209
 210         *q += c->width*c->height/2;
 211         while( *p != *q ) {
 212                 v2 = yuv.v + c->width;
 213                 *yuv.v  ++ = *p;
 214                 *yuv.v  ++ = *p;
 215                 *v2 ++ = *p;
 216                 *v2 ++ = *p ++;
 217         }
 218
 219 }
 220
 221 /** /brief Abstracted block comparison function
 222 */
 223 inline static int compare( uint8_t *from,
 224                            uint8_t *to,
 225                            int from_x,
 226                            int from_y,
 227                            int to_x,
 228                            int to_y,
 229                            struct motion_est_context_s *c)
 230 {
 231 #ifdef COUNT_COMPARES
 232         compares++;
 233 #endif
 234
 235         if( ABS(from_x - to_x) >= c->limit_x || ABS(from_y - to_y) >= c->limit_y )
 236                 return MAX_MSAD;
 237
 238         int score;
 239         int (*cmp)(uint8_t *, uint8_t *, int, int, int, int) = c->compare_optimized;
 240
 241         int mb_w = c->macroblock_width;
 242         int mb_h = c->macroblock_height;
 243
 244         int penalty = clip(&from_x, &from_y, &to_x, &to_y, &mb_w, &mb_h, c->width, c->height);
 245         if ( penalty == 1<<SHIFT)
 246             penalty = clip(&to_x, &to_y, &from_x, &from_y, &mb_w, &mb_h, c->width, c->height);
 247
 248         if( penalty == 0 )                      // Clipped out of existance
 249                 return MAX_MSAD;
 250         else if( penalty != 1<<SHIFT )          // SIMD optimized comparison won't work
 251                 cmp = c->compare_reference;
 252
 253         uint8_t *from_block = from + from_x * c->xstride + from_y * c->ystride;
 254         uint8_t *to_block = to + to_x * c->xstride + to_y * c->ystride;
 255
 256         #ifdef DEBUG_ASM
 257         if( penalty == 1<<SHIFT ){
 258                 score = c->compare_reference( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
 259                 int score2 = c->compare_optimized( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
 260                 if ( score != score2 )
 261                         fprintf(stderr, "Your assembly doesn't work! Reference: %d Asm: %d\n", score, score2);
 262         }
 263         else
 264         #endif
 265
 266         score = cmp( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
 267
 268         return ( score * penalty ) >> SHIFT;                    // The extra precision is no longer wanted
 269 }
 270
 271 static inline void check_candidates (   struct yuv_data *from, struct yuv_data *to,
 272                                         int from_x, int from_y,
 273                                         motion_vector *candidates, int count, int unique,
 274                                         motion_vector *result,
 275                                         struct motion_est_context_s *c )
 276 {
 277                 int score, i, j;
 278                 /* Scan for the best candidate */
 279                 for ( i = 0; i < count; i++ )
 280                 {
 281                         // this little dohicky ignores duplicate candidates, if they are possible
 282                         if ( unique == 0 ) {
 283                                 j = 0;
 284                                 while ( j < i )
 285                                 {
 286                                         if ( candidates[j].dx == candidates[i].dx &&
 287                                              candidates[j].dy == candidates[i].dy )
 288                                                         goto next_for_loop;
 289
 290                                         j++;
 291                                 }
 292                         }
 293
 294                         // Luma
 295                         score = compare( from->y, to->y, from_x, from_y,
 296                                          from_x + candidates[i].dx,     /* to x */
 297                                          from_y + candidates[i].dy,     /* to y */
 298                                          c);
 299
 300                         if ( c->check_chroma ) {
 301                                 if ( score >= result->msad )    // Early term
 302                                         continue;
 303
 304                                 // Chroma - U
 305                                 score += compare( from->u, to->u, from_x, from_y,
 306                                                  from_x + candidates[i].dx,     /* to x */
 307                                                  from_y + candidates[i].dy,     /* to y */
 308                                                  c);
 309
 310                                 if ( score >= result->msad )    // Early term
 311                                         continue;
 312
 313                                 // Chroma - V
 314                                 score += compare( from->v, to->v, from_x, from_y,
 315                                                  from_x + candidates[i].dx,     /* to x */
 316                                                  from_y + candidates[i].dy,     /* to y */
 317                                                  c);
 318                         }
 319
 320                         if ( score < result->msad ) {   // New minimum
 321                                 result->dx = candidates[i].dx;
 322                                 result->dy = candidates[i].dy;
 323                                 result->msad = score;
 324                         }
 325                         next_for_loop:;
 326                 }
 327 }
 328
 329 /* /brief Diamond search
 330 * Operates on a single macroblock
 331 */
 332 static inline void diamond_search(
 333                         struct yuv_data *from,                  //<! Image data from previous frame
 334                         struct yuv_data *to,                    //<! Image data in current frame
 335                         int mb_x,                       //<! X upper left corner of macroblock
 336                         int mb_y,                       //<! U upper left corner of macroblock
 337                         struct motion_vector_s *result, //<! Best predicted mv and eventual result
 338                         struct motion_est_context_s *c) //<! motion estimation context
 339 {
 340
 341         // diamond search pattern
 342         motion_vector candidates[4];
 343
 344         // Keep track of best and former best candidates
 345         motion_vector best, former;
 346
 347         // The direction of the refinement needs to be known
 348         motion_vector current;
 349
 350         int i, first = 1;
 351
 352         // Loop through the search pattern
 353         while( 1 ) {
 354
 355                 current.dx = result->dx;
 356                 current.dy = result->dy;
 357
 358                 if ( first == 1 )       // Set the initial pattern
 359                 {
 360                         candidates[0].dx = result->dx + 1; candidates[0].dy = result->dy + 0;
 361                         candidates[1].dx = result->dx + 0; candidates[1].dy = result->dy + 1;
 362                         candidates[2].dx = result->dx - 1; candidates[2].dy = result->dy + 0;
 363                         candidates[3].dx = result->dx + 0; candidates[3].dy = result->dy - 1;
 364                         i = 4;
 365                 }
 366                 else     // Construct the next portion of the search pattern
 367                 {
 368                         candidates[0].dx = result->dx + best.dx;
 369                         candidates[0].dy = result->dy + best.dy;
 370                         if (best.dx == former.dx && best.dy == former.dy) {
 371                                 candidates[1].dx = result->dx + best.dy;
 372                                 candidates[1].dy = result->dy + best.dx;                //      Yes, the wires
 373                                 candidates[2].dx = result->dx - best.dy;                //      are crossed
 374                                 candidates[2].dy = result->dy - best.dx;
 375                                 i = 3;
 376                         } else {
 377                                 candidates[1].dx = result->dx + former.dx;
 378                                 candidates[1].dy = result->dy + former.dy;
 379                                 i = 2;
 380                         }
 381
 382                         former.dx = best.dx; former.dy = best.dy;       // Keep track of new former best
 383                 }
 384
 385                 check_candidates ( from, to, mb_x, mb_y, candidates, i, 1, result, c );
 386                 best.dx = result->dx - current.dx;
 387                 best.dy = result->dy - current.dy;
 388
 389                 if ( best.dx == 0 && best.dy == 0 )
 390                         return;
 391
 392                 if ( first == 1 ){
 393                         first = 0;
 394                         former.dx = best.dx; former.dy = best.dy; // First iteration, sensible value for former_d*
 395                 }
 396         }
 397 }
 398
 399 /* /brief Full (brute) search
 400 * Operates on a single macroblock
 401 */
 402 static void full_search(
 403                         struct yuv_data *from,                  //<! Image data from previous frame
 404                         struct yuv_data *to,                    //<! Image data in current frame
 405                         int mb_x,                       //<! X upper left corner of macroblock
 406                         int mb_y,                       //<! U upper left corner of macroblock
 407                         struct motion_vector_s *result, //<! Best predicted mv and eventual result
 408                         struct motion_est_context_s *c) //<! motion estimation context
 409 {
 410         // Keep track of best candidate
 411         int i,j,score;
 412
 413         // Go loopy
 414         for( i = -c->macroblock_width; i <= c->macroblock_width; i++ ){
 415                 for( j = -c->macroblock_height; j <=  c->macroblock_height; j++ ){
 416
 417                         score = compare( from->y, to->y,
 418                                          mb_x,          /* from x */
 419                                          mb_y,          /* from y */
 420                                          mb_x + i,      /* to x */
 421                                          mb_y + j,      /* to y */
 422                                          c);            /* context */
 423
 424                         if ( score < result->msad ) {
 425                                 result->dx = i;
 426                                 result->dy = j;
 427                                 result->msad = score;
 428                         }
 429                 }
 430         }
 431 }
 432
 433 // Credits: ffmpeg
 434 // return the median
 435 static inline int median_predictor(int a, int b, int c) {
 436         if ( a > b ){
 437                 if ( c > b ){
 438                         if ( c > a  ) b = a;
 439                         else      b = c;
 440                 }
 441         } else {
 442                 if ( b > c ){
 443                         if ( c > a ) b = c;
 444                         else     b = a;
 445                 }
 446         }
 447         return b;
 448 }
 449
 450 #if 0
 451 inline static int vertical_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
 452 {
 453         int i, j, average, deviation = 0;
 454         for ( i = 0; i < w; i++ ){
 455                 average = 0;
 456                 for ( j = 0; j < h; j++ ){
 457                         average += *(block + i*xstride + j*ystride);
 458                 }
 459                 average /= h;
 460                 for ( j = 0; j < h; j++ ){
 461                         deviation += ABS(average - block[i*xstride + j*ystride]);
 462                 }
 463         }
 464
 465         return deviation;
 466 }
 467 #endif
 468
 469 #if 0
 470 inline static int horizontal_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
 471 {
 472         int i, j, average, deviation = 0;
 473         for ( j = 0; j < h; j++ ){
 474                 average = 0;
 475                 for ( i = 0; i < w; i++ ){
 476                         average += block[i*xstride + j*ystride];
 477                 }
 478                 average /= w;
 479                 for ( i = 0; i < w; i++ ){
 480                         deviation += ABS(average - block[i*xstride + j*ystride]);
 481                 }
 482         }
 483
 484         return deviation;
 485 }
 486 #endif
 487
 488 // Macros for pointer calculations
 489 #define CURRENT(i,j)    ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
 490 #define FORMER(i,j)     ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
 491
 492 #if 0
 493 void collect_pre_statistics( struct motion_est_context_s *c, uint8_t *image ) {
 494
 495         int i, j, count = 0;
 496         uint8_t *p;
 497
 498         for ( i = c->left_mb; i <= c->right_mb; i++ ){
 499          for ( j = c->top_mb; j <= c->bottom_mb; j++ ){
 500                 count++;
 501                 p = image + i * c->macroblock_width * c->xstride + j * c->macroblock_height * c->ystride;
 502                 CURRENT(i,j)->vert_dev = c->vert_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
 503                 CURRENT(i,j)->horiz_dev = c->horiz_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
 504          }
 505         }
 506 }
 507 #endif
 508
 509 static void median_denoise( motion_vector *v, struct motion_est_context_s *c )
 510 {
 511 //      for ( int i = 0; i++
 512
 513 }
 514
 515 /** /brief Motion search
 516 *
 517 *
 518 * Search for the Vector that best represents the motion *from the last frame *to the current frame
 519 * Vocab: Colocated - the pixel in the previous frame at the current position
 520 *
 521 * Based on enhanced predictive zonal search. [Tourapis 2002]
 522 */
 523 static void search( struct yuv_data from,                       //<! Image data. Motion vector source in previous frame
 524                     struct yuv_data to,                 //<! Image data. Motion vector destination current
 525                     struct motion_est_context_s *c)     //<! The context
 526 {
 527
 528 #ifdef COUNT_COMPARES
 529         compares = 0;
 530 #endif
 531
 532         motion_vector candidates[10];
 533         motion_vector *here;            // This one gets used alot (about 30 times per macroblock)
 534         int n = 0;
 535
 536         int i, j, count=0;
 537
 538         // For every macroblock, perform motion vector estimation
 539         for( i = c->left_mb; i <= c->right_mb; i++ ){
 540          for( j = c->top_mb; j <= c->bottom_mb; j++ ){
 541
 542                 here = CURRENT(i,j);
 543                 here->valid = 1;
 544                 here->color = 100;
 545                 here->msad = MAX_MSAD;
 546                 count++;
 547                 n = 0;
 548
 549
 550                 /* Stack the predictors [i.e. checked in reverse order] */
 551
 552                 /* Adjacent to collocated */
 553                 if( c->former_vectors_valid )
 554                 {
 555                         // Top of colocated
 556                         if( j > c->prev_top_mb ){// && COL_TOP->valid ){
 557                                 candidates[n  ].dx = FORMER(i,j-1)->dx;
 558                                 candidates[n++].dy = FORMER(i,j-1)->dy;
 559                         }
 560
 561                         // Left of colocated
 562                         if( i > c->prev_left_mb ){// && COL_LEFT->valid ){
 563                                 candidates[n  ].dx = FORMER(i-1,j)->dx;
 564                                 candidates[n++].dy = FORMER(i-1,j)->dy;
 565                         }
 566
 567                         // Right of colocated
 568                         if( i < c->prev_right_mb ){// && COL_RIGHT->valid ){
 569                                 candidates[n  ].dx = FORMER(i+1,j)->dx;
 570                                 candidates[n++].dy = FORMER(i+1,j)->dy;
 571                         }
 572
 573                         // Bottom of colocated
 574                         if( j < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
 575                                 candidates[n  ].dx = FORMER(i,j+1)->dx;
 576                                 candidates[n++].dy = FORMER(i,j+1)->dy;
 577                         }
 578
 579                         // And finally, colocated
 580                         candidates[n  ].dx = FORMER(i,j)->dx;
 581                         candidates[n++].dy = FORMER(i,j)->dy;
 582                 }
 583
 584                 // For macroblocks not in the top row
 585                 if ( j > c->top_mb) {
 586
 587                         // Top if ( TOP->valid ) {
 588                                 candidates[n  ].dx = CURRENT(i,j-1)->dx;
 589                                 candidates[n++].dy = CURRENT(i,j-1)->dy;
 590                         //}
 591
 592                         // Top-Right, macroblocks not in the right row
 593                         if ( i < c->right_mb ){// && TOP_RIGHT->valid ) {
 594                                 candidates[n  ].dx = CURRENT(i+1,j-1)->dx;
 595                                 candidates[n++].dy = CURRENT(i+1,j-1)->dy;
 596                         }
 597                 }
 598
 599                 // Left, Macroblocks not in the left column
 600                 if ( i > c->left_mb ){// && LEFT->valid ) {
 601                         candidates[n  ].dx = CURRENT(i-1,j)->dx;
 602                         candidates[n++].dy = CURRENT(i-1,j)->dy;
 603                 }
 604
 605                 /* Median predictor vector (median of left, top, and top right adjacent vectors) */
 606                 if ( i > c->left_mb && j > c->top_mb && i < c->right_mb
 607                          )//&& LEFT->valid && TOP->valid && TOP_RIGHT->valid )
 608                 {
 609                         candidates[n  ].dx = median_predictor( CURRENT(i-1,j)->dx, CURRENT(i,j-1)->dx, CURRENT(i+1,j-1)->dx);
 610                         candidates[n++].dy = median_predictor( CURRENT(i-1,j)->dy, CURRENT(i,j-1)->dy, CURRENT(i+1,j-1)->dy);
 611                 }
 612
 613                 // Zero vector
 614                 candidates[n  ].dx = 0;
 615                 candidates[n++].dy = 0;
 616
 617                 int from_x = i * c->macroblock_width;
 618                 int from_y = j * c->macroblock_height;
 619                 check_candidates ( &from, &to, from_x, from_y, candidates, n, 0, here, c );
 620
 621
 622 #ifndef FULLSEARCH
 623                 diamond_search( &from, &to, from_x, from_y, here, c);
 624 #else
 625                 full_search( from, to, from_x, from_y, here, c);
 626 #endif
 627
 628
 629                 /* Do things in Reverse
 630                  * Check for occlusions. A block from last frame becomes obscured this frame.
 631                  * A bogus motion vector will result. To look for this, run the search in reverse
 632                  * and see if the vector is good backwards and forwards. Most occlusions won't be.
 633                  * The new source block may not correspond exactly to blocks in the vector buffer
 634                  * The opposite case, a block being revealed is inherently ignored.
 635                  */
 636 #if 0
 637                 if ( here->msad < c->initial_thresh )           // The vector is probably good.
 638                         continue;
 639
 640                 struct motion_vector_s reverse;
 641                 reverse.dx = -here->dx;
 642                 reverse.dy = -here->dy;
 643                 reverse.msad = here->msad;
 644
 645                 // calculate psuedo block coordinates
 646                 from_x += here->dx;
 647                 from_y += here->dy;
 648
 649                 n = 0;
 650 #endif
 651
 652                 // Calculate the real block closest to our psuedo block
 653 #if 0
 654                 int ri = ( ABS( here->dx ) + c->macroblock_width/2 ) / c->macroblock_width;
 655                 if ( ri != 0 ) ri *= here->dx / ABS(here->dx);  // Recover sign
 656                 ri += i;
 657                 if ( ri < 0 ) ri = 0;
 658                 else if ( ri >= c->mv_buffer_width ) ri = c->mv_buffer_width;
 659
 660                 int rj = ( ABS( here->dy ) + c->macroblock_height/2 ) / c->macroblock_height;
 661                 if ( rj != 0 ) rj *= here->dy / ABS(here->dy);  // Recover sign
 662                 rj += j;
 663                 if ( rj < 0 ) rj = 0;
 664                 else if ( rj >= c->mv_buffer_height ) rj = c->mv_buffer_height;
 665
 666                 /* Adjacent to collocated */
 667                 if( c->former_vectors_valid )
 668                 {
 669                         // Top of colocated
 670                         if( rj > c->prev_top_mb ){// && COL_TOP->valid ){
 671                                 candidates[n  ].dx = -FORMER(ri,rj-1)->dx;
 672                                 candidates[n++].dy = -FORMER(ri,rj-1)->dy;
 673                         }
 674
 675                         // Left of colocated
 676                         if( ri > c->prev_left_mb ){// && COL_LEFT->valid ){
 677                                 candidates[n  ].dx = -FORMER(ri-1,rj)->dx;
 678                                 candidates[n++].dy = -FORMER(ri-1,rj)->dy;
 679                         }
 680
 681                         // Right of colocated
 682                         if( ri < c->prev_right_mb ){// && COL_RIGHT->valid ){
 683                                 candidates[n  ].dx = -FORMER(ri+1,rj)->dx;
 684                                 candidates[n++].dy = -FORMER(ri+1,rj)->dy;
 685                         }
 686
 687                         // Bottom of colocated
 688                         if( rj < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
 689                                 candidates[n  ].dx = -FORMER(ri,rj+1)->dx;
 690                                 candidates[n++].dy = -FORMER(ri,rj+1)->dy;
 691                         }
 692
 693                         // And finally, colocated
 694                         candidates[n  ].dx = -FORMER(ri,rj)->dx;
 695                         candidates[n++].dy = -FORMER(ri,rj)->dy;
 696                 }
 697 #endif
 698 #if 0
 699                 // Zero vector
 700                 candidates[n].dx = 0;
 701                 candidates[n++].dy = 0;
 702
 703                 check_candidates ( &to, &from, from_x, from_y, candidates, 1, 1, &reverse, c );
 704
 705                 /* Scan for the best candidate */
 706                 while( n ) {
 707                         n--;
 708
 709                         score = compare( to, from, from_x, from_y,      /* to and from are reversed */
 710                                          from_x + candidates[n].dx,     /* to x */
 711                                          from_y + candidates[n].dy,     /* to y */
 712                                          c);                            /* context */
 713
 714                         if ( score < reverse.msad ) {
 715                                 reverse.dx = candidates[n].dx;
 716                                 reverse.dy = candidates[n].dy;
 717                                 reverse.msad = score;
 718                                 if ( score < c->initial_thresh )
 719                                         n=0;            // Simplified version of early termination thresh
 720                         }
 721                 }
 722
 723 //              if ( reverse.msad == here->msad)        // If nothing better was found
 724 //              {                                       // this is an opportunity
 725 //                                                      // to skip 4 block comparisons
 726 //                      continue;                       // in the diamond search
 727 //              }
 728
 729
 730                 diamond_search( &to, &from, from_x, from_y, &reverse, c); /* to and from are reversed */
 731
 732                 if ( ABS( reverse.dx + here->dx ) + ABS( reverse.dy + here->dy ) > 5  )
 733 //              if ( here->msad > reverse.msad + c->initial_thresh*10   )
 734                 {
 735                         here->valid = 2;
 736                 }
 737
 738 #endif
 739          } /* End column loop */
 740         } /* End row loop */
 741
 742         asm volatile ( "emms" );
 743
 744 #ifdef COUNT_COMPARES
 745         fprintf(stderr, "%d comparisons per block were made", compares/count);
 746 #endif
 747         return;
 748 }
 749
 750 void collect_post_statistics( struct motion_est_context_s *c ) {
 751
 752         c->comparison_average = 0;
 753         c->average_length = 0;
 754         c->average_x = 0;
 755         c->average_y = 0;
 756
 757         int i, j, count = 0;
 758
 759         for ( i = c->left_mb; i <= c->right_mb; i++ ){
 760          for ( j = c->top_mb; j <= c->bottom_mb; j++ ){
 761
 762                 count++;
 763                 c->comparison_average += CURRENT(i,j)->msad;
 764                 c->average_x += CURRENT(i,j)->dx;
 765                 c->average_y += CURRENT(i,j)->dy;
 766
 767
 768          }
 769         }
 770
 771         if ( count > 0 )
 772         {
 773                 c->comparison_average /= count;
 774                 c->average_x /= count;
 775                 c->average_y /= count;
 776                 c->average_length = sqrt( c->average_x * c->average_x + c->average_y * c->average_y );
 777         }
 778
 779 }
 780
 781 static void init_optimizations( struct motion_est_context_s *c )
 782 {
 783         if ( c->check_chroma ) {
 784                 switch(c->macroblock_width){
 785                         case 8:  if(c->macroblock_height == 8)  c->compare_optimized = sad_sse_8x8;
 786                                  else                           c->compare_optimized = sad_sse_8w;
 787                                  break;
 788                         case 16: if(c->macroblock_height == 16) c->compare_optimized = sad_sse_16x16;
 789                                  else                           c->compare_optimized = sad_sse_16w;
 790                                  break;
 791                         case 32: if(c->macroblock_height == 32) c->compare_optimized = sad_sse_32x32;
 792                                  else                           c->compare_optimized = sad_sse_32w;
 793                                  break;
 794                         case 64: c->compare_optimized = sad_sse_64w;
 795                                  break;
 796                         default: c->compare_optimized = sad_reference;
 797                                  break;
 798                 }
 799         }
 800         else
 801         {
 802                 switch(c->macroblock_width){
 803                         case 4:  if(c->macroblock_height == 4)  c->compare_optimized = sad_sse_422_luma_4x4;
 804                                  else                           c->compare_optimized = sad_sse_422_luma_4w;
 805                                  break;
 806                         case 8:  if(c->macroblock_height == 8)  c->compare_optimized = sad_sse_422_luma_8x8;
 807                                  else                           c->compare_optimized = sad_sse_422_luma_8w;
 808                                  break;
 809                         case 16: if(c->macroblock_height == 16) c->compare_optimized = sad_sse_422_luma_16x16;
 810                                  else                           c->compare_optimized = sad_sse_422_luma_16w;
 811                                  break;
 812                         case 32: if(c->macroblock_height == 32) c->compare_optimized = sad_sse_422_luma_32x32;
 813                                  else                           c->compare_optimized = sad_sse_422_luma_32w;
 814                                  break;
 815                         case 64: c->compare_optimized = sad_sse_422_luma_64w;
 816                                  break;
 817                         default: c->compare_optimized = sad_reference;
 818                                  break;
 819                 }
 820         }
 821 }
 822
 823 // Image stack(able) method
 824 static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
 825 {
 826         // Get the filter
 827         mlt_filter filter = mlt_frame_pop_service( frame );
 828
 829         // Get the motion_est context object
 830         struct motion_est_context_s *c = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL);
 831
 832         // Get the new image and frame number
 833         int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
 834
 835         if( error != 0 )
 836                 mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr );
 837
 838         c->current_frame_position = mlt_frame_get_position( frame );
 839
 840         /* Context Initialization */
 841         if ( c->initialized == 0 ) {
 842
 843                 // Get the filter properties object
 844                 mlt_properties properties = mlt_filter_properties( filter );
 845
 846                 c->width = *width;
 847                 c->height = *height;
 848
 849                 /* Get parameters that may have been overridden */
 850                 if( mlt_properties_get( properties, "macroblock_width") != NULL )
 851                         c->macroblock_width = mlt_properties_get_int( properties, "macroblock_width");
 852
 853                 if( mlt_properties_get( properties, "macroblock_height") != NULL )
 854                         c->macroblock_height = mlt_properties_get_int( properties, "macroblock_height");
 855
 856                 if( mlt_properties_get( properties, "prediction_thresh") != NULL )
 857                         c->initial_thresh = mlt_properties_get_int( properties, "prediction_thresh" );
 858                 else
 859                         c->initial_thresh = c->macroblock_width * c->macroblock_height;
 860
 861                 if( mlt_properties_get( properties, "search_method") != NULL )
 862                         c->search_method = mlt_properties_get_int( properties, "search_method");
 863
 864                 if( mlt_properties_get( properties, "skip_prediction") != NULL )
 865                         c->skip_prediction = mlt_properties_get_int( properties, "skip_prediction");
 866
 867                 if( mlt_properties_get( properties, "limit_x") != NULL )
 868                         c->limit_x = mlt_properties_get_int( properties, "limit_x");
 869
 870                 if( mlt_properties_get( properties, "limit_y") != NULL )
 871                         c->limit_y = mlt_properties_get_int( properties, "limit_y");
 872
 873                 if( mlt_properties_get( properties, "check_chroma" ) != NULL )
 874                         c->check_chroma = mlt_properties_get_int( properties, "check_chroma" );
 875
 876                 init_optimizations( c );
 877
 878                 // Calculate the dimensions in macroblock units
 879                 c->mv_buffer_width = (*width / c->macroblock_width);
 880                 c->mv_buffer_height = (*height / c->macroblock_height);
 881
 882                 // Size of the motion vector buffer
 883                 c->mv_size =  c->mv_buffer_width * c->mv_buffer_height * sizeof(struct motion_vector_s);
 884
 885                 // Allocate the motion vector buffers
 886                 c->former_vectors = mlt_pool_alloc( c->mv_size );
 887                 c->current_vectors = mlt_pool_alloc( c->mv_size );
 888                 c->denoise_vectors = mlt_pool_alloc( c->mv_size );
 889
 890                 // Register motion buffers for destruction
 891                 mlt_properties_set_data( properties, "current_motion_vectors", (void *)c->current_vectors, 0, mlt_pool_release, NULL );
 892                 mlt_properties_set_data( properties, "former_motion_vectors", (void *)c->former_vectors, 0, mlt_pool_release, NULL );
 893                 mlt_properties_set_data( properties, "denoise_motion_vectors", (void *)c->denoise_vectors, 0, mlt_pool_release, NULL );
 894
 895
 896                 c->former_vectors_valid = 0;
 897                 memset( c->former_vectors, 0, c->mv_size );
 898
 899                 // Figure out how many blocks should be considered edge blocks
 900                 c->edge_blocks_x = (c->limit_x + c->macroblock_width - 1) / c->macroblock_width;
 901                 c->edge_blocks_y = (c->limit_y + c->macroblock_height - 1) / c->macroblock_height;
 902
 903                 // Calculate the size of our steps (the number of bytes that seperate adjacent pixels in X and Y direction)
 904                 switch( *format ) {
 905                         case mlt_image_yuv422:
 906                                 if ( c->check_chroma )
 907                                         c->xstride = 1;
 908                                 else
 909                                         c->xstride = 2;
 910                                 c->ystride = c->xstride * *width;
 911                                 break;
 912 /*                      case mlt_image_yuv420p:
 913                                 c->xstride = 1;
 914                                 c->ystride = c->xstride * *width;
 915                                 break;
 916 */                      default:
 917                                 // I don't know
 918                                 fprintf(stderr, "\"I am unfamiliar with your new fangled pixel format!\" -filter_motion_est\n");
 919                                 return -1;
 920                 }
 921
 922                 if ( c->check_chroma ) {
 923                         // Allocate memory for the 444 images
 924                         c->former_image.y = mlt_pool_alloc( *width * *height * 3 );
 925                         c->current_image.y = mlt_pool_alloc( *width * *height * 3 );
 926                         c->current_image.u = c->current_image.y + *width * *height;
 927                         c->current_image.v = c->current_image.u + *width * *height;
 928                         c->former_image.u = c->former_image.y + *width * *height;
 929                         c->former_image.v = c->former_image.u + *width * *height;
 930                         // Register for destruction
 931                         mlt_properties_set_data( properties, "current_image", (void *)c->current_image.y, 0, mlt_pool_release, NULL );
 932                 }
 933                 else
 934                 {
 935                         c->former_image.y = mlt_pool_alloc( *width * *height * 2 );
 936                 }
 937                 // Register for destruction
 938                 mlt_properties_set_data( properties, "former_image", (void *)c->former_image.y, 0, mlt_pool_release, NULL );
 939
 940
 941                 c->former_frame_position = c->current_frame_position;
 942
 943                 c->initialized = 1;
 944         }
 945
 946         /* Check to see if somebody else has given us bounds */
 947         c->bounds = mlt_properties_get_data( MLT_FRAME_PROPERTIES( frame ), "bounds", NULL );
 948
 949         /* no bounds were given, they won't change next frame, so use a convient storage place */
 950         if( c->bounds == NULL ) {
 951                 c->bounds = &c->prev_bounds;
 952                 c->bounds->x = 0;
 953                 c->bounds->y = 0;
 954                 c->bounds->w = *width - 1;      // Zero indexed
 955                 c->bounds->h = *height - 1;     // Zero indexed
 956         }
 957
 958         // translate pixel units (from bounds) to macroblock units
 959         // make sure whole macroblock stays within bounds
 960         c->left_mb = ( c->bounds->x + c->macroblock_width - 1 ) / c->macroblock_width;
 961         c->top_mb = ( c->bounds->y + c->macroblock_height - 1 ) / c->macroblock_height;
 962         c->right_mb = ( c->bounds->x + c->bounds->w ) / c->macroblock_width - 1;
 963         c->bottom_mb = ( c->bounds->y + c->bounds->h ) / c->macroblock_height - 1;
 964
 965         // Do the same thing for the previous frame's geometry
 966         // This will be used for determining validity of predictors
 967         c->prev_left_mb = ( c->prev_bounds.x + c->macroblock_width - 1) / c->macroblock_width;
 968         c->prev_top_mb = ( c->prev_bounds.y + c->macroblock_height - 1) / c->macroblock_height;
 969         c->prev_right_mb = ( c->prev_bounds.x + c->prev_bounds.w ) / c->macroblock_width - 1;
 970         c->prev_bottom_mb = ( c->prev_bounds.y + c->prev_bounds.h ) / c->macroblock_height - 1;
 971
 972
 973         // If video is advancing, run motion vector algorithm and etc...
 974         if( c->former_frame_position + 1 == c->current_frame_position )
 975         {
 976                 #ifdef BENCHMARK
 977                 struct timeval start; gettimeofday(&start, NULL );
 978                 #endif
 979
 980                 // Swap the motion vector buffers and reuse allocated memory
 981                 struct motion_vector_s *temp = c->current_vectors;
 982                 c->current_vectors = c->former_vectors;
 983                 c->former_vectors = temp;
 984
 985                 // Swap the image buffers
 986                 if ( c->check_chroma ) {
 987                         uint8_t *temp_yuv;
 988                         temp_yuv = c->current_image.y;
 989                         c->current_image.y = c->former_image.y;
 990                         c->former_image.y = temp_yuv;
 991                         temp_yuv = c->current_image.u;
 992                         c->current_image.u = c->former_image.u;
 993                         c->former_image.u = temp_yuv;
 994                         temp_yuv = c->current_image.v;
 995                         c->current_image.v = c->former_image.v;
 996                         c->former_image.v = temp_yuv;
 997
 998                         switch ( *format ) {
 999                                 case mlt_image_yuv422:
1000                                         change_422_to_444_planar_rep( *image, c->current_image, c );
1001                                         break;
1002                                 case mlt_image_yuv420p:
1003                                         change_420p_to_444_planar_rep( *image, c->current_image, c );
1004                                         break;
1005                                 default:
1006                                         break;
1007                         }
1008                 }
1009                 else
1010                         c->current_image.y = *image;
1011
1012                 // Find a better place for this
1013                 memset( c->current_vectors, 0, c->mv_size );
1014
1015                 // Perform the motion search
1016
1017                 //collect_pre_statistics( context, *image );
1018                 search( c->current_image, c->former_image, c );
1019
1020                 //median_denoise( c->current_vectors, c );
1021
1022                 collect_post_statistics( c );
1023
1024                 #ifdef BENCHMARK
1025                 struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
1026                 fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference);
1027                 #endif
1028
1029
1030
1031                 // Detect shot changes
1032                 if( c->comparison_average > 12 * c->macroblock_width * c->macroblock_height ) {
1033                         //fprintf(stderr, " - SAD: %d   <<Shot change>>\n", c->comparison_average);
1034                         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1);
1035                 //      c->former_vectors_valid = 0; // Invalidate the previous frame's predictors
1036                         c->shot_change = 1;
1037                 }
1038                 else {
1039                         c->former_vectors_valid = 1;
1040                         c->shot_change = 0;
1041                         //fprintf(stderr, " - SAD: %d\n", c->comparison_average);
1042                 }
1043
1044                 if( c->comparison_average != 0 ) {
1045                         // Pass the new vector data into the frame
1046                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1047                                          (void*)c->current_vectors, c->mv_size, NULL, NULL );
1048
1049                 }
1050                 else {
1051                         // This fixes the ugliness caused by a duplicate frame
1052                         temp = c->current_vectors;
1053                         c->current_vectors = c->former_vectors;
1054                         c->former_vectors = temp;
1055                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1056                                          (void*)c->former_vectors, c->mv_size, NULL, NULL );
1057                 }
1058
1059         }
1060         // paused
1061         else if( c->former_frame_position == c->current_frame_position )
1062         {
1063                 // Pass the old vector data into the frame if it's valid
1064                 if( c->former_vectors_valid == 1 )
1065                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1066                                          (void*)c->current_vectors, c->mv_size, NULL, NULL );
1067
1068                 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", c->shot_change);
1069         }
1070         // there was jump in frame number
1071         else
1072                 c->former_vectors_valid = 0;
1073
1074
1075         // Cache our bounding geometry for the next frame's processing
1076         if( c->bounds != &c->prev_bounds )
1077                 memcpy( &c->prev_bounds, c->bounds, sizeof( struct mlt_geometry_item_s ) );
1078
1079         // Remember which frame this is
1080         c->former_frame_position = c->current_frame_position;
1081
1082         if ( c->check_chroma == 0 )
1083                 memcpy( c->former_image.y, *image, *width * *height * c->xstride );
1084
1085         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_width", c->macroblock_width );
1086         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_height", c->macroblock_height );
1087
1088         return error;
1089 }
1090
1091
1092
1093 /** filter processing.
1094 */
1095
1096 static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
1097 {
1098
1099         // Keeps tabs on the filter object
1100         mlt_frame_push_service( frame, this);
1101
1102         // Push the frame filter
1103         mlt_frame_push_get_image( frame, filter_get_image );
1104
1105         return frame;
1106 }
1107
1108 /** Constructor for the filter.
1109 */
1110 mlt_filter filter_motion_est_init( char *arg )
1111 {
1112         mlt_filter this = mlt_filter_new( );
1113         if ( this != NULL )
1114         {
1115                 // Get the properties object
1116                 mlt_properties properties = MLT_FILTER_PROPERTIES( this );
1117
1118                 // Initialize the motion estimation context
1119                 struct motion_est_context_s *context;
1120                 context = mlt_pool_alloc( sizeof(struct motion_est_context_s) );
1121                 mlt_properties_set_data( properties, "context", (void *)context, sizeof( struct motion_est_context_s ),
1122                                         mlt_pool_release, NULL );
1123
1124
1125                 // Register the filter
1126                 this->process = filter_process;
1127
1128                 /* defaults that may be overridden */
1129                 context->macroblock_width = 16;
1130                 context->macroblock_height = 16;
1131                 context->skip_prediction = 0;
1132                 context->limit_x = 64;
1133                 context->limit_y = 64;
1134                 context->search_method = DIAMOND_SEARCH;
1135                 context->check_chroma = 0;
1136
1137                 /* reference functions that may have optimized versions */
1138                 context->compare_reference = sad_reference;
1139                 //context->vert_deviation_reference = vertical_gradient_reference;
1140                 //context->horiz_deviation_reference = horizontal_gradient_reference;
1141
1142                 // The rest of the buffers will be initialized when the filter is first processed
1143                 context->initialized = 0;
1144         }
1145         return this;
1146 }
1147
1148 /** This source code will self destruct in 5...4...3... */