Initial import of the motion estimation filter.
[melted] / src / modules / motion_est / filter_motion_est.c
1 /*
2 * /brief fast motion estimation filter
3 * /author Zachary Drew, Copyright 2005
4 *
5 * Currently only uses Gamma data for comparisonon (bug or feature?)
6 * Vector optimization coming soon.
7 *
8 * Vector orientation: The vector data that is generated for the current frame specifies
9 * the motion from the previous frame to the current frame. Thus, to know how a macroblock
10 * in the current frame will move in the future, the next frame is needed.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software Foundation,
24 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 */
26
27
28 #include "filter_motion_est.h"
29 #include <framework/mlt.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <math.h>
33 #include <string.h>
34 #include <sys/time.h>
35 #include <assert.h>
36
37 #include "sad_sse.h"
38
39
40 #undef DEBUG
41 #undef DEBUG_ASM
42 #undef BENCHMARK
43 #undef COUNT_COMPARES
44
45 #define DIAMOND_SEARCH 0x0
46 #define FULL_SEARCH 0x1
47 #define SHIFT 8
48 #define MIN(a,b) ((a) > (b) ? (b) : (a))
49 #define ABS(a) ((a) >= 0 ? (a) : (-(a)))
50
51 #ifdef COUNT_COMPARES
52 int compares;
53 #endif
54
55 typedef struct motion_vector_s motion_vector;
56
57 struct yuv_data
58 {
59 uint8_t *y;
60 uint8_t *u;
61 uint8_t *v;
62
63 };
64
65 struct motion_est_context_s
66 {
67 int initialized; //<! true if filter has been initialized
68
69 /* same as mlt_frame's parameters */
70 int width, height;
71
72 /* Operational details */
73 int macroblock_width, macroblock_height;
74 int xstride, ystride;
75 //uint8_t *former_image; //<! Copy of the previous frame's image
76 struct yuv_data former_image, current_image;
77 int search_method, skip_prediction, shot_change;
78 int limit_x, limit_y; //<! max x and y of a motion vector
79 int edge_blocks_x, edge_blocks_y;
80 int initial_thresh;
81 int check_chroma; // if check_chroma == 1 then compare chroma
82
83 /* bounds */
84 struct mlt_geometry_item_s prev_bounds; // Cache last frame's bounds (needed for predictor vectors validity)
85 struct mlt_geometry_item_s *bounds; // Current bounds
86
87 /* bounds in macroblock units */
88 int left_mb, prev_left_mb, right_mb, prev_right_mb;
89 int top_mb, prev_top_mb, bottom_mb, prev_bottom_mb;
90
91 /* size of our vector buffers */
92 int mv_buffer_height, mv_buffer_width, mv_size;
93
94 /* vector buffers */
95 int former_vectors_valid; //<! true if the previous frame's buffered motion vector data is valid
96 motion_vector *former_vectors, *current_vectors;
97 motion_vector *bizarro_vectors;
98 mlt_position former_frame_position, current_frame_position;
99
100 /* two metrics for diagnostics. lower is a better estimation but beware of local minima */
101 float predictive_misses; // How often do the prediction metion vectors fail?
102 int comparison_average; // How far does the best estimation deviate from a perfect comparison?
103 int bad_comparisons;
104 int average_length;
105 int average_x, average_y;
106
107 /* run-time configurable comparison functions */
108 int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int);
109 int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int);
110 int (*vert_deviation_reference)(uint8_t *, int, int, int, int);
111 int (*horiz_deviation_reference)(uint8_t *, int, int, int, int);
112
113 };
114
115
116 // Clip the macroblocks as required. Only used for blocks at the edge of the picture
117 // "from" is assumed to be unclipped
118 inline static int clip( int *from_x,
119 int *from_y,
120 int *to_x,
121 int *to_y,
122 int *w, //<! macroblock width
123 int *h, //<! macroblock height
124 int width, //<! image width
125 int height) //<! image height
126 {
127
128 uint32_t penalty = 1 << SHIFT; // Retain a few extra bits of precision minus floating-point's blemishes
129 int diff;
130
131 // Origin of macroblock moves left of absolute boundy
132 if( *to_x < 0 ) {
133 if( *to_x + *w <= 0) return 0; // Clipped out of existance
134 penalty = (*w * penalty) / (*w + *to_x); // Recipricol of the fraction of the block that remains
135 *from_x -= *to_x;
136 *w += *to_x;
137 *to_x = 0;
138 }
139 // Portion of macroblock moves right of absolute boundry
140 else if( *to_x + *w > width ) {
141 if(*to_x >= width) return 0; // Clipped out of existance
142 diff = *to_x + *w - width; // Width of area clipped (0 < diff < macroblock width)
143 penalty = (*w * penalty) / (*w - diff); // Recipricol of the fraction of the block that remains
144 *w -= diff;
145 }
146 // Origin of macroblock moves above absolute boundy
147 if( *to_y < 0 ) {
148 if( *to_y + *h <= 0) return 0; // Clipped out of existance
149 penalty = (*h * penalty) / (*h + *to_y); // Recipricol of the fraction of the block that remains
150 *from_y -= *to_y;
151 *h += *to_y;
152 *to_y = 0;
153 }
154 // Portion of macroblock moves bellow absolute boundry
155 else if( *to_y + *h > height ) {
156 if(*to_y >= height) return 0; // Clipped out of existance
157 diff = *to_y + *h - height; // Height of area clipped (0 < diff < macroblock height)
158 penalty = (*h * penalty) / (*h - diff); // Recipricol of the fraction of the block that is clipped
159 *h -= diff;
160 }
161 return penalty;
162 }
163
164
165 /** /brief Reference Sum of Absolute Differences comparison function
166 *
167 */
168 inline static int sad_reference( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
169 {
170 int i, j, score = 0;
171 for ( j = 0; j < h; j++ ){
172 for ( i = 0; i < w; i++ ){
173 score += ABS( block1[i*xstride] - block2[i*xstride] );
174 }
175 block1 += ystride;
176 block2 += ystride;
177 }
178
179 return score;
180 }
181
182 inline static void change_422_to_444_planar_rep( uint8_t *image, struct yuv_data yuv, struct motion_est_context_s *c )
183 {
184 register uint8_t *p = image;
185 register uint8_t *q = image + c->width * c->height * 2;
186 while ( *p != *q ) {
187 *(yuv.y++) = *(p ++);
188 *(yuv.u++) = *p;
189 *(yuv.u++) = *(p ++);
190 *(yuv.y++) = *(p ++);
191 *(yuv.v++) = *p;
192 *(yuv.v++) = *(p ++);
193 }
194 }
195
196 // broken
197 inline static void change_420p_to_444_planar_rep( uint8_t *image, struct yuv_data yuv, struct motion_est_context_s *c )
198 {
199 uint8_t *p = image + c->width * c->height;
200 uint8_t *q = p + c->width*c->height/2;
201 uint8_t *u2, *v2;
202 while( *p != *q ) {
203 u2 = yuv.u + c->width;
204 *yuv.u ++ = *p;
205 *yuv.u ++ = *p;
206 *u2 ++ = *p;
207 *u2 ++ = *p ++;
208 }
209
210 *q += c->width*c->height/2;
211 while( *p != *q ) {
212 v2 = yuv.v + c->width;
213 *yuv.v ++ = *p;
214 *yuv.v ++ = *p;
215 *v2 ++ = *p;
216 *v2 ++ = *p ++;
217 }
218
219 }
220
221 /** /brief Abstracted block comparison function
222 */
223 inline static int compare( uint8_t *from,
224 uint8_t *to,
225 int from_x,
226 int from_y,
227 int to_x,
228 int to_y,
229 struct motion_est_context_s *c)
230 {
231 #ifdef COUNT_COMPARES
232 compares++;
233 #endif
234
235 if( ABS(from_x - to_x) >= c->limit_x || ABS(from_y - to_y) >= c->limit_y )
236 return MAX_MSAD;
237
238 int score;
239 int (*cmp)(uint8_t *, uint8_t *, int, int, int, int) = c->compare_optimized;
240
241 int mb_w = c->macroblock_width;
242 int mb_h = c->macroblock_height;
243
244 int penalty = clip(&from_x, &from_y, &to_x, &to_y, &mb_w, &mb_h, c->width, c->height);
245 if ( penalty == 1<<SHIFT)
246 penalty = clip(&to_x, &to_y, &from_x, &from_y, &mb_w, &mb_h, c->width, c->height);
247
248 if( penalty == 0 ) // Clipped out of existance
249 return MAX_MSAD;
250 else if( penalty != 1<<SHIFT ) // SIMD optimized comparison won't work
251 cmp = c->compare_reference;
252
253 uint8_t *from_block = from + from_x * c->xstride + from_y * c->ystride;
254 uint8_t *to_block = to + to_x * c->xstride + to_y * c->ystride;
255
256 #ifdef DEBUG_ASM
257 if( penalty == 1<<SHIFT ){
258 score = c->compare_reference( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
259 int score2 = c->compare_optimized( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
260 if ( score != score2 )
261 fprintf(stderr, "Your assembly doesn't work! Reference: %d Asm: %d\n", score, score2);
262 }
263 else
264 #endif
265
266 score = cmp( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
267
268 return ( score * penalty ) >> SHIFT; // The extra precision is no longer wanted
269 }
270
271 static inline void check_candidates ( struct yuv_data *from, struct yuv_data *to,
272 int from_x, int from_y,
273 motion_vector *candidates, int count, int unique,
274 motion_vector *result,
275 struct motion_est_context_s *c )
276 {
277 int score, i, j;
278 /* Scan for the best candidate */
279 for ( i = 0; i < count; i++ )
280 {
281 // this little dohicky ignores duplicate candidates, if they are possible
282 if ( unique == 0 ) {
283 j = 0;
284 while ( j < i )
285 {
286 if ( candidates[j].dx == candidates[i].dx &&
287 candidates[j].dy == candidates[i].dy )
288 goto next_for_loop;
289
290 j++;
291 }
292 }
293
294 // Luma
295 score = compare( from->y, to->y, from_x, from_y,
296 from_x + candidates[i].dx, /* to x */
297 from_y + candidates[i].dy, /* to y */
298 c);
299
300 if ( c->check_chroma ) {
301 if ( score >= result->msad ) // Early term
302 continue;
303
304 // Chroma - U
305 score += compare( from->u, to->u, from_x, from_y,
306 from_x + candidates[i].dx, /* to x */
307 from_y + candidates[i].dy, /* to y */
308 c);
309
310 if ( score >= result->msad ) // Early term
311 continue;
312
313 // Chroma - V
314 score += compare( from->v, to->v, from_x, from_y,
315 from_x + candidates[i].dx, /* to x */
316 from_y + candidates[i].dy, /* to y */
317 c);
318 }
319
320 if ( score < result->msad ) { // New minimum
321 result->dx = candidates[i].dx;
322 result->dy = candidates[i].dy;
323 result->msad = score;
324 }
325 next_for_loop:;
326 }
327 }
328
329 /* /brief Diamond search
330 * Operates on a single macroblock
331 */
332 static inline void diamond_search(
333 struct yuv_data *from, //<! Image data from previous frame
334 struct yuv_data *to, //<! Image data in current frame
335 int mb_x, //<! X upper left corner of macroblock
336 int mb_y, //<! U upper left corner of macroblock
337 struct motion_vector_s *result, //<! Best predicted mv and eventual result
338 struct motion_est_context_s *c) //<! motion estimation context
339 {
340
341 // diamond search pattern
342 motion_vector candidates[4];
343
344 // Keep track of best and former best candidates
345 motion_vector best, former;
346
347 // The direction of the refinement needs to be known
348 motion_vector current;
349
350 int i, first = 1;
351
352 // Loop through the search pattern
353 while( 1 ) {
354
355 current.dx = result->dx;
356 current.dy = result->dy;
357
358 if ( first == 1 ) // Set the initial pattern
359 {
360 candidates[0].dx = result->dx + 1; candidates[0].dy = result->dy + 0;
361 candidates[1].dx = result->dx + 0; candidates[1].dy = result->dy + 1;
362 candidates[2].dx = result->dx - 1; candidates[2].dy = result->dy + 0;
363 candidates[3].dx = result->dx + 0; candidates[3].dy = result->dy - 1;
364 i = 4;
365 }
366 else // Construct the next portion of the search pattern
367 {
368 candidates[0].dx = result->dx + best.dx;
369 candidates[0].dy = result->dy + best.dy;
370 if (best.dx == former.dx && best.dy == former.dy) {
371 candidates[1].dx = result->dx + best.dy;
372 candidates[1].dy = result->dy + best.dx; // Yes, the wires
373 candidates[2].dx = result->dx - best.dy; // are crossed
374 candidates[2].dy = result->dy - best.dx;
375 i = 3;
376 } else {
377 candidates[1].dx = result->dx + former.dx;
378 candidates[1].dy = result->dy + former.dy;
379 i = 2;
380 }
381
382 former.dx = best.dx; former.dy = best.dy; // Keep track of new former best
383 }
384
385 check_candidates ( from, to, mb_x, mb_y, candidates, i, 1, result, c );
386 best.dx = result->dx - current.dx;
387 best.dy = result->dy - current.dy;
388
389 if ( best.dx == 0 && best.dy == 0 )
390 return;
391
392 if ( first == 1 ){
393 first = 0;
394 former.dx = best.dx; former.dy = best.dy; // First iteration, sensible value for former_d*
395 }
396 }
397 }
398
399 /* /brief Full (brute) search
400 * Operates on a single macroblock
401 */
402 static void full_search(
403 struct yuv_data *from, //<! Image data from previous frame
404 struct yuv_data *to, //<! Image data in current frame
405 int mb_x, //<! X upper left corner of macroblock
406 int mb_y, //<! U upper left corner of macroblock
407 struct motion_vector_s *result, //<! Best predicted mv and eventual result
408 struct motion_est_context_s *c) //<! motion estimation context
409 {
410 // Keep track of best candidate
411 int i,j,score;
412
413 // Go loopy
414 for( i = -c->macroblock_width; i <= c->macroblock_width; i++ ){
415 for( j = -c->macroblock_height; j <= c->macroblock_height; j++ ){
416
417 score = compare( from->y, to->y,
418 mb_x, /* from x */
419 mb_y, /* from y */
420 mb_x + i, /* to x */
421 mb_y + j, /* to y */
422 c); /* context */
423
424 if ( score < result->msad ) {
425 result->dx = i;
426 result->dy = j;
427 result->msad = score;
428 }
429 }
430 }
431 }
432
433 // Credits: ffmpeg
434 // return the median
435 static inline int median_predictor(int a, int b, int c) {
436 if ( a > b ){
437 if ( c > b ){
438 if ( c > a ) b = a;
439 else b = c;
440 }
441 } else {
442 if ( b > c ){
443 if ( c > a ) b = c;
444 else b = a;
445 }
446 }
447 return b;
448 }
449
450 inline static int vertical_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
451 {
452 int i, j, average, deviation = 0;
453 for ( i = 0; i < w; i++ ){
454 average = 0;
455 for ( j = 0; j < h; j++ ){
456 average += *(block + i*xstride + j*ystride);
457 }
458 average /= h;
459 for ( j = 0; j < h; j++ ){
460 deviation += ABS(average - block[i*xstride + j*ystride]);
461 }
462 }
463
464 return deviation;
465 }
466
467 inline static int horizontal_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
468 {
469 int i, j, average, deviation = 0;
470 for ( j = 0; j < h; j++ ){
471 average = 0;
472 for ( i = 0; i < w; i++ ){
473 average += block[i*xstride + j*ystride];
474 }
475 average /= w;
476 for ( i = 0; i < w; i++ ){
477 deviation += ABS(average - block[i*xstride + j*ystride]);
478 }
479 }
480
481 return deviation;
482 }
483
484 // Macros for pointer calculations
485 #define CURRENT(i,j) ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
486 #define FORMER(i,j) ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
487
488 void collect_pre_statistics( struct motion_est_context_s *c, uint8_t *image ) {
489
490 int i, j, count = 0;
491 uint8_t *p;
492
493 for ( i = c->left_mb; i <= c->right_mb; i++ ){
494 for ( j = c->top_mb; j <= c->bottom_mb; j++ ){
495 count++;
496 p = image + i * c->macroblock_width * c->xstride + j * c->macroblock_height * c->ystride;
497 CURRENT(i,j)->vert_dev = c->vert_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
498 CURRENT(i,j)->horiz_dev = c->horiz_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
499 }
500 }
501 }
502
503
504
505 /** /brief Motion search
506 *
507 *
508 * Search for the Vector that best represents the motion *from the last frame *to the current frame
509 * Vocab: Colocated - the pixel in the previous frame at the current position
510 *
511 * Based on enhanced predictive zonal search. [Tourapis 2002]
512 */
513 static void search( struct yuv_data from, //<! Image data. Motion vector source in previous frame
514 struct yuv_data to, //<! Image data. Motion vector destination current
515 struct motion_est_context_s *c) //<! The context
516 {
517
518 #ifdef COUNT_COMPARES
519 compares = 0;
520 #endif
521
522 motion_vector candidates[10];
523 motion_vector *here; // This one gets used alot (about 30 times per macroblock)
524 int n = 0;
525
526 int i, j, count=0;
527
528 // For every macroblock, perform motion vector estimation
529 for( i = c->left_mb; i <= c->right_mb; i++ ){
530 for( j = c->top_mb; j <= c->bottom_mb; j++ ){
531
532 here = CURRENT(i,j);
533 here->valid = 1;
534 here->color = 100;
535 here->msad = MAX_MSAD;
536 count++;
537 n = 0;
538
539 /* Stack the predictors [i.e. checked in reverse order] */
540
541 /* Adjacent to collocated */
542 if( c->former_vectors_valid )
543 {
544 // Top of colocated
545 if( j > c->prev_top_mb ){// && COL_TOP->valid ){
546 candidates[n ].dx = FORMER(i,j-1)->dx;
547 candidates[n++].dy = FORMER(i,j-1)->dy;
548 }
549
550 // Left of colocated
551 if( i > c->prev_left_mb ){// && COL_LEFT->valid ){
552 candidates[n ].dx = FORMER(i-1,j)->dx;
553 candidates[n++].dy = FORMER(i-1,j)->dy;
554 }
555
556 // Right of colocated
557 if( i < c->prev_right_mb ){// && COL_RIGHT->valid ){
558 candidates[n ].dx = FORMER(i+1,j)->dx;
559 candidates[n++].dy = FORMER(i+1,j)->dy;
560 }
561
562 // Bottom of colocated
563 if( j < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
564 candidates[n ].dx = FORMER(i,j+1)->dx;
565 candidates[n++].dy = FORMER(i,j+1)->dy;
566 }
567
568 // And finally, colocated
569 candidates[n ].dx = FORMER(i,j)->dx;
570 candidates[n++].dy = FORMER(i,j)->dy;
571 }
572
573 // For macroblocks not in the top row
574 if ( j > c->top_mb) {
575
576 // Top if ( TOP->valid ) {
577 candidates[n ].dx = CURRENT(i,j-1)->dx;
578 candidates[n++].dy = CURRENT(i,j-1)->dy;
579 //}
580
581 // Top-Right, macroblocks not in the right row
582 if ( i < c->right_mb ){// && TOP_RIGHT->valid ) {
583 candidates[n ].dx = CURRENT(i+1,j-1)->dx;
584 candidates[n++].dy = CURRENT(i+1,j-1)->dy;
585 }
586 }
587
588 // Left, Macroblocks not in the left column
589 if ( i > c->left_mb ){// && LEFT->valid ) {
590 candidates[n ].dx = CURRENT(i-1,j)->dx;
591 candidates[n++].dy = CURRENT(i-1,j)->dy;
592 }
593
594 /* Median predictor vector (median of left, top, and top right adjacent vectors) */
595 if ( i > c->left_mb && j > c->top_mb && i < c->right_mb
596 )//&& LEFT->valid && TOP->valid && TOP_RIGHT->valid )
597 {
598 candidates[n ].dx = median_predictor( CURRENT(i-1,j)->dx, CURRENT(i,j-1)->dx, CURRENT(i+1,j-1)->dx);
599 candidates[n++].dy = median_predictor( CURRENT(i-1,j)->dy, CURRENT(i,j-1)->dy, CURRENT(i+1,j-1)->dy);
600 }
601
602 // Zero vector
603 candidates[n ].dx = 0;
604 candidates[n++].dy = 0;
605
606 int from_x = i * c->macroblock_width;
607 int from_y = j * c->macroblock_height;
608 check_candidates ( &from, &to, from_x, from_y, candidates, n, 0, here, c );
609
610
611 #ifndef FULLSEARCH
612 diamond_search( &from, &to, from_x, from_y, here, c);
613 #else
614 full_search( from, to, from_x, from_y, here, c);
615 #endif
616
617
618 /* Do things in Reverse
619 * Check for occlusions. A block from last frame becomes obscured this frame.
620 * A bogus motion vector will result. To look for this, run the search in reverse
621 * and see if the vector is good backwards and forwards. Most occlusions won't be.
622 * The new source block may not correspond exactly to blocks in the vector buffer
623 * The opposite case, a block being revealed is inherently ignored.
624 */
625 #if 0
626 if ( here->msad < c->initial_thresh ) // The vector is probably good.
627 continue;
628
629 struct motion_vector_s reverse;
630 reverse.dx = -here->dx;
631 reverse.dy = -here->dy;
632 reverse.msad = here->msad;
633
634 // calculate psuedo block coordinates
635 from_x += here->dx;
636 from_y += here->dy;
637
638 n = 0;
639 #endif
640
641 // Calculate the real block closest to our psuedo block
642 #if 0
643 int ri = ( ABS( here->dx ) + c->macroblock_width/2 ) / c->macroblock_width;
644 if ( ri != 0 ) ri *= here->dx / ABS(here->dx); // Recover sign
645 ri += i;
646 if ( ri < 0 ) ri = 0;
647 else if ( ri >= c->mv_buffer_width ) ri = c->mv_buffer_width;
648
649 int rj = ( ABS( here->dy ) + c->macroblock_height/2 ) / c->macroblock_height;
650 if ( rj != 0 ) rj *= here->dy / ABS(here->dy); // Recover sign
651 rj += j;
652 if ( rj < 0 ) rj = 0;
653 else if ( rj >= c->mv_buffer_height ) rj = c->mv_buffer_height;
654
655 /* Adjacent to collocated */
656 if( c->former_vectors_valid )
657 {
658 // Top of colocated
659 if( rj > c->prev_top_mb ){// && COL_TOP->valid ){
660 candidates[n ].dx = -FORMER(ri,rj-1)->dx;
661 candidates[n++].dy = -FORMER(ri,rj-1)->dy;
662 }
663
664 // Left of colocated
665 if( ri > c->prev_left_mb ){// && COL_LEFT->valid ){
666 candidates[n ].dx = -FORMER(ri-1,rj)->dx;
667 candidates[n++].dy = -FORMER(ri-1,rj)->dy;
668 }
669
670 // Right of colocated
671 if( ri < c->prev_right_mb ){// && COL_RIGHT->valid ){
672 candidates[n ].dx = -FORMER(ri+1,rj)->dx;
673 candidates[n++].dy = -FORMER(ri+1,rj)->dy;
674 }
675
676 // Bottom of colocated
677 if( rj < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
678 candidates[n ].dx = -FORMER(ri,rj+1)->dx;
679 candidates[n++].dy = -FORMER(ri,rj+1)->dy;
680 }
681
682 // And finally, colocated
683 candidates[n ].dx = -FORMER(ri,rj)->dx;
684 candidates[n++].dy = -FORMER(ri,rj)->dy;
685 }
686 #endif
687 #if 0
688 // Zero vector
689 candidates[n].dx = 0;
690 candidates[n++].dy = 0;
691
692 check_candidates ( &to, &from, from_x, from_y, candidates, 1, 1, &reverse, c );
693
694 /* Scan for the best candidate */
695 while( n ) {
696 n--;
697
698 score = compare( to, from, from_x, from_y, /* to and from are reversed */
699 from_x + candidates[n].dx, /* to x */
700 from_y + candidates[n].dy, /* to y */
701 c); /* context */
702
703 if ( score < reverse.msad ) {
704 reverse.dx = candidates[n].dx;
705 reverse.dy = candidates[n].dy;
706 reverse.msad = score;
707 if ( score < c->initial_thresh )
708 n=0; // Simplified version of early termination thresh
709 }
710 }
711
712 // if ( reverse.msad == here->msad) // If nothing better was found
713 // { // this is an opportunity
714 // // to skip 4 block comparisons
715 // continue; // in the diamond search
716 // }
717
718
719 diamond_search( &to, &from, from_x, from_y, &reverse, c); /* to and from are reversed */
720
721 if ( ABS( reverse.dx + here->dx ) + ABS( reverse.dy + here->dy ) > 5 )
722 // if ( here->msad > reverse.msad + c->initial_thresh*10 )
723 {
724 here->valid = 2;
725 }
726
727 #endif
728 } /* End column loop */
729 } /* End row loop */
730
731 asm volatile ( "emms" );
732
733 #ifdef COUNT_COMPARES
734 fprintf(stderr, "%d comparisons per block were made", compares/count);
735 #endif
736 return;
737 }
738
739 void collect_post_statistics( struct motion_est_context_s *c ) {
740
741 c->comparison_average = 0;
742 c->average_length = 0;
743 c->average_x = 0;
744 c->average_y = 0;
745
746 int i, j, count = 0;
747
748 for ( i = c->left_mb; i <= c->right_mb; i++ ){
749 for ( j = c->top_mb; j <= c->bottom_mb; j++ ){
750
751 count++;
752 c->comparison_average += CURRENT(i,j)->msad;
753 c->average_x += CURRENT(i,j)->dx;
754 c->average_y += CURRENT(i,j)->dy;
755
756
757 }
758 }
759
760 if ( count > 0 )
761 {
762 c->comparison_average /= count;
763 c->average_x /= count;
764 c->average_y /= count;
765 c->average_length = sqrt( c->average_x * c->average_x + c->average_y * c->average_y );
766 }
767
768 }
769
770 static void init_optimizations( struct motion_est_context_s *c )
771 {
772 if ( c->check_chroma ) {
773 switch(c->macroblock_width){
774 case 8: if(c->macroblock_height == 8) c->compare_optimized = sad_sse_8x8;
775 else c->compare_optimized = sad_sse_8w;
776 break;
777 case 16: if(c->macroblock_height == 16) c->compare_optimized = sad_sse_16x16;
778 else c->compare_optimized = sad_sse_16w;
779 break;
780 case 32: if(c->macroblock_height == 32) c->compare_optimized = sad_sse_32x32;
781 else c->compare_optimized = sad_sse_32w;
782 break;
783 case 64: c->compare_optimized = sad_sse_64w;
784 break;
785 default: c->compare_optimized = sad_reference;
786 break;
787 }
788 }
789 else
790 {
791 switch(c->macroblock_width){
792 case 4: if(c->macroblock_height == 4) c->compare_optimized = sad_sse_422_luma_4x4;
793 else c->compare_optimized = sad_sse_422_luma_4w;
794 break;
795 case 8: if(c->macroblock_height == 8) c->compare_optimized = sad_sse_422_luma_8x8;
796 else c->compare_optimized = sad_sse_422_luma_8w;
797 break;
798 case 16: if(c->macroblock_height == 16) c->compare_optimized = sad_sse_422_luma_16x16;
799 else c->compare_optimized = sad_sse_422_luma_16w;
800 break;
801 case 32: if(c->macroblock_height == 32) c->compare_optimized = sad_sse_422_luma_32x32;
802 else c->compare_optimized = sad_sse_422_luma_32w;
803 break;
804 case 64: c->compare_optimized = sad_sse_422_luma_64w;
805 break;
806 default: c->compare_optimized = sad_reference;
807 break;
808 }
809 }
810 }
811
812 // Image stack(able) method
813 static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
814 {
815 // Get the filter
816 mlt_filter filter = mlt_frame_pop_service( frame );
817
818 // Get the motion_est context object
819 struct motion_est_context_s *context = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL);
820
821 // Get the new image and frame number
822 int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
823
824 if( error != 0 )
825 mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr );
826
827 context->current_frame_position = mlt_frame_get_position( frame );
828
829 /* Context Initialization */
830 if ( context->initialized == 0 ) {
831
832 // Get the filter properties object
833 mlt_properties properties = mlt_filter_properties( filter );
834
835 context->width = *width;
836 context->height = *height;
837
838 /* Get parameters that may have been overridden */
839 if( mlt_properties_get( properties, "macroblock_width") != NULL )
840 context->macroblock_width = mlt_properties_get_int( properties, "macroblock_width");
841
842 if( mlt_properties_get( properties, "macroblock_height") != NULL )
843 context->macroblock_height = mlt_properties_get_int( properties, "macroblock_height");
844
845 if( mlt_properties_get( properties, "prediction_thresh") != NULL )
846 context->initial_thresh = mlt_properties_get_int( properties, "prediction_thresh" );
847 else
848 context->initial_thresh = context->macroblock_width * context->macroblock_height;
849
850 if( mlt_properties_get( properties, "search_method") != NULL )
851 context->search_method = mlt_properties_get_int( properties, "search_method");
852
853 if( mlt_properties_get( properties, "skip_prediction") != NULL )
854 context->skip_prediction = mlt_properties_get_int( properties, "skip_prediction");
855
856 if( mlt_properties_get( properties, "limit_x") != NULL )
857 context->limit_x = mlt_properties_get_int( properties, "limit_x");
858
859 if( mlt_properties_get( properties, "limit_y") != NULL )
860 context->limit_y = mlt_properties_get_int( properties, "limit_y");
861
862 if( mlt_properties_get( properties, "check_chroma" ) != NULL )
863 context->check_chroma = mlt_properties_get_int( properties, "check_chroma" );
864
865 init_optimizations( context );
866
867 // Calculate the dimensions in macroblock units
868 context->mv_buffer_width = (*width / context->macroblock_width);
869 context->mv_buffer_height = (*height / context->macroblock_height);
870
871 // Size of the motion vector buffer
872 context->mv_size = context->mv_buffer_width * context->mv_buffer_height * sizeof(struct motion_vector_s);
873
874 // Allocate the motion vector buffers
875 context->former_vectors = mlt_pool_alloc( context->mv_size );
876 context->current_vectors = mlt_pool_alloc( context->mv_size );
877
878 // Register motion buffers for destruction
879 mlt_properties_set_data( properties, "current_motion_vectors", (void *)context->current_vectors, 0, mlt_pool_release, NULL );
880 mlt_properties_set_data( properties, "former_motion_vectors", (void *)context->former_vectors, 0, mlt_pool_release, NULL );
881
882
883 context->former_vectors_valid = 0;
884 memset( context->former_vectors, 0, context->mv_size );
885
886 // Figure out how many blocks should be considered edge blocks
887 context->edge_blocks_x = (context->limit_x + context->macroblock_width - 1) / context->macroblock_width;
888 context->edge_blocks_y = (context->limit_y + context->macroblock_height - 1) / context->macroblock_height;
889
890 // Calculate the size of our steps (the number of bytes that seperate adjacent pixels in X and Y direction)
891 switch( *format ) {
892 case mlt_image_yuv422:
893 if ( context->check_chroma )
894 context->xstride = 1;
895 else
896 context->xstride = 2;
897 context->ystride = context->xstride * *width;
898 break;
899 /* case mlt_image_yuv420p:
900 context->xstride = 1;
901 context->ystride = context->xstride * *width;
902 break;
903 */ default:
904 // I don't know
905 fprintf(stderr, "\"I am unfamiliar with your new fangled pixel format!\" -filter_motion_est\n");
906 return -1;
907 }
908
909 if ( context->check_chroma ) {
910 // Allocate memory for the 444 images
911 context->former_image.y = mlt_pool_alloc( *width * *height * 3 );
912 context->current_image.y = mlt_pool_alloc( *width * *height * 3 );
913 context->current_image.u = context->current_image.y + *width * *height;
914 context->current_image.v = context->current_image.u + *width * *height;
915 context->former_image.u = context->former_image.y + *width * *height;
916 context->former_image.v = context->former_image.u + *width * *height;
917 // Register for destruction
918 mlt_properties_set_data( properties, "current_image", (void *)context->current_image.y, 0, mlt_pool_release, NULL );
919 }
920 else
921 {
922 context->former_image.y = mlt_pool_alloc( *width * *height * 2 );
923 }
924 // Register for destruction
925 mlt_properties_set_data( properties, "former_image", (void *)context->former_image.y, 0, mlt_pool_release, NULL );
926
927
928 context->former_frame_position = context->current_frame_position;
929
930 context->initialized = 1;
931 }
932
933 /* Check to see if somebody else has given us bounds */
934 context->bounds = mlt_properties_get_data( MLT_FRAME_PROPERTIES( frame ), "bounds", NULL );
935
936 /* no bounds were given, they won't change next frame, so use a convient storage place */
937 if( context->bounds == NULL ) {
938 context->bounds = &context->prev_bounds;
939 context->bounds->x = 0;
940 context->bounds->y = 0;
941 context->bounds->w = *width - 1; // Zero indexed
942 context->bounds->h = *height - 1; // Zero indexed
943 }
944
945 // translate pixel units (from bounds) to macroblock units
946 // make sure whole macroblock stays within bounds
947 context->left_mb = (context->bounds->x + context->macroblock_width - 1) / context->macroblock_width;
948 context->top_mb = (context->bounds->y + context->macroblock_height - 1) / context->macroblock_height;
949 context->right_mb = (context->bounds->x + context->bounds->w - context->macroblock_width + 1) / context->macroblock_width;
950 context->bottom_mb = (context->bounds->y + context->bounds->h - context->macroblock_height + 1) / context->macroblock_height;
951
952 // Do the same thing for the previous frame's geometry
953 // This will be used for determining validity of predictors
954 context->prev_left_mb = (context->prev_bounds.x + context->macroblock_width - 1) / context->macroblock_width;
955 context->prev_top_mb = (context->prev_bounds.y + context->macroblock_height - 1) / context->macroblock_height;
956 context->prev_right_mb = (context->prev_bounds.x + context->prev_bounds.w - context->macroblock_width - 1)
957 / context->macroblock_width;
958 context->prev_bottom_mb = (context->prev_bounds.y + context->prev_bounds.h - context->macroblock_height - 1)
959 / context->macroblock_height;
960
961
962 // If video is advancing, run motion vector algorithm and etc...
963 if( context->former_frame_position + 1 == context->current_frame_position )
964 {
965 #ifdef BENCHMARK
966 struct timeval start; gettimeofday(&start, NULL );
967 #endif
968
969 // Swap the motion vector buffers and reuse allocated memory
970 struct motion_vector_s *temp = context->current_vectors;
971 context->current_vectors = context->former_vectors;
972 context->former_vectors = temp;
973
974 // Swap the image buffers
975 if ( context->check_chroma ) {
976 uint8_t *temp_yuv;
977 temp_yuv = context->current_image.y;
978 context->current_image.y = context->former_image.y;
979 context->former_image.y = temp_yuv;
980 temp_yuv = context->current_image.u;
981 context->current_image.u = context->former_image.u;
982 context->former_image.u = temp_yuv;
983 temp_yuv = context->current_image.v;
984 context->current_image.v = context->former_image.v;
985 context->former_image.v = temp_yuv;
986
987 switch ( *format ) {
988 case mlt_image_yuv422:
989 change_422_to_444_planar_rep( *image, context->current_image, context );
990 break;
991 case mlt_image_yuv420p:
992 change_420p_to_444_planar_rep( *image, context->current_image, context );
993 break;
994 default:
995 break;
996 }
997 }
998 else
999 context->current_image.y = *image;
1000
1001 // Find a better place for this
1002 memset( context->current_vectors, 0, context->mv_size );
1003
1004 // Perform the motion search
1005
1006 //collect_pre_statistics( context, *image );
1007 search( context->current_image, context->former_image, context );
1008 collect_post_statistics( context );
1009
1010 #ifdef BENCHMARK
1011 struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
1012 fprintf(stderr, " in frame %d:%d usec\n", context->current_frame_position, difference);
1013 #endif
1014
1015
1016
1017 // Detect shot changes
1018 if( context->comparison_average > 12 * context->macroblock_width * context->macroblock_height ) {
1019 //fprintf(stderr, " - SAD: %d <<Shot change>>\n", context->comparison_average);
1020 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1);
1021 // context->former_vectors_valid = 0; // Invalidate the previous frame's predictors
1022 context->shot_change = 1;
1023 }
1024 else {
1025 context->former_vectors_valid = 1;
1026 context->shot_change = 0;
1027 //fprintf(stderr, " - SAD: %d\n", context->comparison_average);
1028 }
1029
1030 if( context->comparison_average != 0 ) {
1031 // Pass the new vector data into the frame
1032 mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1033 (void*)context->current_vectors, context->mv_size, NULL, NULL );
1034
1035 }
1036 else {
1037 // This fixes the ugliness caused by a duplicate frame
1038 temp = context->current_vectors;
1039 context->current_vectors = context->former_vectors;
1040 context->former_vectors = temp;
1041 mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1042 (void*)context->former_vectors, context->mv_size, NULL, NULL );
1043 }
1044
1045 }
1046 // paused
1047 else if( context->former_frame_position == context->current_frame_position )
1048 {
1049 // Pass the old vector data into the frame if it's valid
1050 if( context->former_vectors_valid == 1 )
1051 mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
1052 (void*)context->current_vectors, context->mv_size, NULL, NULL );
1053
1054 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", context->shot_change);
1055 }
1056 // there was jump in frame number
1057 else
1058 context->former_vectors_valid = 0;
1059
1060
1061 // Cache our bounding geometry for the next frame's processing
1062 if( context->bounds != &context->prev_bounds )
1063 memcpy( &context->prev_bounds, context->bounds, sizeof( struct mlt_geometry_item_s ) );
1064
1065 // Remember which frame this is
1066 context->former_frame_position = context->current_frame_position;
1067
1068 if ( context->check_chroma == 0 )
1069 memcpy( context->former_image.y, *image, *width * *height * context->xstride );
1070
1071 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_width", context->macroblock_width );
1072 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_height", context->macroblock_height );
1073
1074 return error;
1075 }
1076
1077
1078
1079 /** filter processing.
1080 */
1081
1082 static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
1083 {
1084
1085 // Keeps tabs on the filter object
1086 mlt_frame_push_service( frame, this);
1087
1088 // Push the frame filter
1089 mlt_frame_push_get_image( frame, filter_get_image );
1090
1091 return frame;
1092 }
1093
1094 /** Constructor for the filter.
1095 */
1096 mlt_filter filter_motion_est_init( char *arg )
1097 {
1098 mlt_filter this = mlt_filter_new( );
1099 if ( this != NULL )
1100 {
1101 // Get the properties object
1102 mlt_properties properties = MLT_FILTER_PROPERTIES( this );
1103
1104 // Initialize the motion estimation context
1105 struct motion_est_context_s *context;
1106 context = mlt_pool_alloc( sizeof(struct motion_est_context_s) );
1107 mlt_properties_set_data( properties, "context", (void *)context, sizeof( struct motion_est_context_s ),
1108 mlt_pool_release, NULL );
1109
1110
1111 // Register the filter
1112 this->process = filter_process;
1113
1114 /* defaults that may be overridden */
1115 context->macroblock_width = 16;
1116 context->macroblock_height = 16;
1117 context->skip_prediction = 0;
1118 context->limit_x = 64;
1119 context->limit_y = 64;
1120 context->search_method = DIAMOND_SEARCH;
1121 context->check_chroma = 0;
1122
1123 /* reference functions that may have optimized versions */
1124 context->compare_reference = sad_reference;
1125 context->vert_deviation_reference = vertical_gradient_reference;
1126 context->horiz_deviation_reference = horizontal_gradient_reference;
1127
1128 // The rest of the buffers will be initialized when the filter is first processed
1129 context->initialized = 0;
1130 }
1131 return this;
1132 }
1133
1134 /** This source code will self destruct in 5...4...3... */