Initial import of the motion estimation filter.
authordezeroex <dezeroex@d19143bc-622f-0410-bfdd-b5b2a6649095>
Fri, 8 Jul 2005 23:37:42 +0000 (23:37 +0000)
committerdezeroex <dezeroex@d19143bc-622f-0410-bfdd-b5b2a6649095>
Fri, 8 Jul 2005 23:37:42 +0000 (23:37 +0000)
git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@751 d19143bc-622f-0410-bfdd-b5b2a6649095

src/modules/motion_est/Makefile [new file with mode: 0644]
src/modules/motion_est/arrow_code.c [new file with mode: 0644]
src/modules/motion_est/arrow_code.h [new file with mode: 0644]
src/modules/motion_est/configure [new file with mode: 0755]
src/modules/motion_est/factory.c [new file with mode: 0644]
src/modules/motion_est/filter_autotrack_rectangle.c [new file with mode: 0644]
src/modules/motion_est/filter_crop_detect.c [new file with mode: 0644]
src/modules/motion_est/filter_motion_est.c [new file with mode: 0644]
src/modules/motion_est/filter_motion_est.h [new file with mode: 0644]
src/modules/motion_est/filter_vismv.c [new file with mode: 0644]
src/modules/motion_est/sad_sse.h [new file with mode: 0644]

diff --git a/src/modules/motion_est/Makefile b/src/modules/motion_est/Makefile
new file mode 100644 (file)
index 0000000..5b26b4f
--- /dev/null
@@ -0,0 +1,57 @@
+include ../../../config.mak
+
+TARGET = ../libmltmotion_est.so
+
+OBJS = factory.o \
+          filter_motion_est.o \
+          filter_benchmark.o \
+          filter_crop_detect.o \
+          filter_autotrack_rectangle.o \
+          arrow_code.o \
+          filter_vismv.o
+
+CFLAGS += -I../.. 
+
+LDFLAGS += -rdynamic
+
+LDFLAGS+=-L../../framework -lmlt
+
+SRCS := $(OBJS:.o=.c)
+
+all:   $(TARGET)
+
+$(TARGET): $(OBJS)
+               $(CC) -shared -o $@ $(OBJS) $(LDFLAGS)
+
+depend:        $(SRCS)
+               $(CC) -MM $(CFLAGS) $^ 1>.depend
+
+dist-clean:    clean
+               rm -f .depend
+
+clean: 
+               rm -f $(OBJS) $(TARGET)
+
+install: all
+       install -m 755 $(TARGET) "$(prefix)/share/mlt/modules"
+
+test: $(TARGET)
+       ~/mlt-devel/mlt/src/inigo/inigo -filter motion_est -filter vismv -filter benchmark -consumer sdl rescale=none real_time=0 audio_off=1 silent=1 /media/cdrecorder/BBC.The.Private.Life.Of.Plants.Pt5.Living.Together.DivX505.AC3.www.MVGroup.org.uk.avi in=50000
+
+hist: $(TARGET)
+       ~/mlt-devel/mlt/src/inigo/inigo -filter motion_est -filter histogram -consumer sdl rescale=none real_time=0 audio_off=1 silent=1 /media/cdrecorder/BBC.The.Private.Life.Of.Plants.Pt5.Living.Together.DivX505.AC3.www.MVGroup.org.uk.avi in=40000
+
+
+test2: $(TARGET)
+       inigo colour:black -filter watermark:"+mello.txt" composite.geometry="0,0:10%x10%;99=90%,90%" composite.out=99 -filter crop_detect -filter motion_est -filter vismv
+
+realtime: $(TARGET)
+       ~/mlt-devel/mlt/src/inigo/inigo -filter motion_est -filter vismv -consumer sdl rescale=none /media/cdrecorder/BBC.The.Private.Life.Of.Plants.Pt5.Living.Together.DivX505.AC3.www.MVGroup.org.uk.avi in=30000
+
+testhist: $(TARGET)
+       ~/mlt-devel/mlt/src/inigo/inigo -consumer sdl rescale=none silent=1 -filter motion_est -filter histogram  -filter vismv /media/cdrecorder/BBC.The.Private.Life.Of.Plants.Pt5.Living.Together.DivX505.AC3.www.MVGroup.org.uk.avi in=10000
+
+
+ifneq ($(wildcard .depend),)
+include .depend
+endif
diff --git a/src/modules/motion_est/arrow_code.c b/src/modules/motion_est/arrow_code.c
new file mode 100644 (file)
index 0000000..08c0f69
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ *     /brief Draw arrows
+ *     /author Zachary Drew, Copyright 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <framework/mlt_frame.h>
+#include "arrow_code.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define MIN(a,b) ((a) > (b) ? (b) : (a))
+
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+
+
+static int w;
+static int h;
+static int xstride;
+static int ystride;
+static mlt_image_format format;
+
+int init_arrows( mlt_image_format *image_format, int width, int height )
+{
+       w = width;
+       h = height;
+       format = *image_format;
+       switch( *image_format ) {
+               case mlt_image_yuv422:
+                       xstride = 2;
+                       ystride = xstride * w;
+                       break; 
+               default:
+                       // I don't know
+                       return 0;
+       }
+       return 1;
+
+}
+
+// ffmpeg borrowed
+static inline int clip(int a, int amin, int amax)
+{
+    if (a < amin)
+        return amin;
+    else if (a > amax)
+        return amax;
+    else
+        return a;
+}
+
+
+/**
+ * draws an line from (ex, ey) -> (sx, sy).
+ * Credits: modified from ffmpeg project
+ * @param ystride stride/linesize of the image
+ * @param xstride stride/element size of the image
+ * @param color color of the arrow
+ */
+void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int color)
+{
+    int t, x, y, fr, f;
+
+
+    sx= clip(sx, 0, w-1);
+    sy= clip(sy, 0, h-1);
+    ex= clip(ex, 0, w-1);
+    ey= clip(ey, 0, h-1);
+
+    buf[sy*ystride + sx*xstride]+= color;
+
+    if(ABS(ex - sx) > ABS(ey - sy)){
+        if(sx > ex){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx*xstride + sy*ystride;
+        ex-= sx;
+        f= ((ey-sy)<<16)/ex;
+        for(x= 0; x <= ex; x++){
+            y = (x*f)>>16;
+            fr= (x*f)&0xFFFF;
+            buf[ y   *ystride + x*xstride]+= (color*(0x10000-fr))>>16;
+            buf[(y+1)*ystride + x*xstride]+= (color*         fr )>>16;
+        }
+    }else{
+        if(sy > ey){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx*xstride + sy*ystride;
+        ey-= sy;
+        if(ey) f= ((ex-sx)<<16)/ey;
+        else   f= 0;
+        for(y= 0; y <= ey; y++){
+            x = (y*f)>>16;
+            fr= (y*f)&0xFFFF;
+            buf[y*ystride + x    *xstride]+= (color*(0x10000-fr))>>16;;
+            buf[y*ystride + (x+1)*xstride]+= (color*         fr )>>16;;
+        }
+    }
+}
+
+void draw_rectangle_fill(uint8_t *buf, int x, int y, int w, int h, int color)
+{
+       int i,j;
+       for ( i = 0; i < w; i++ ) 
+               for ( j = 0; j < h; j++ )
+                       buf[ (y+j)*ystride + (x+i)*xstride] = color; 
+}
+
+void draw_rectangle_outline(uint8_t *buf, int x, int y, int w, int h, int color)
+{
+       int i,j;
+       for ( i = 0; i < w; i++ ) {
+               buf[ y*ystride + (x+i)*xstride ] += color; 
+               buf[ (y+h)*ystride + (x+i)*xstride ] += color; 
+       }
+       for ( j = 1; j < h+1; j++ ) {
+               buf[ (y+j)*ystride + x*xstride ] += color;
+               buf[ (y+j)*ystride + (x+w)*xstride ] += color; 
+       }
+}
+/**
+ * draws an arrow from (ex, ey) -> (sx, sy).
+ * Credits: modified from ffmpeg project
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int color){
+
+       int dx,dy;
+       dx= ex - sx;
+       dy= ey - sy;
+
+       if(dx*dx + dy*dy > 3*3){
+               int rx=  dx + dy;
+               int ry= -dx + dy;
+               int length= sqrt((rx*rx + ry*ry)<<8);
+
+               rx= ROUNDED_DIV(rx*3<<4, length);
+               ry= ROUNDED_DIV(ry*3<<4, length);
+
+               draw_line(buf, sx, sy, sx + rx, sy + ry, color);
+               draw_line(buf, sx, sy, sx - ry, sy + rx, color);
+       }
+       draw_line(buf, sx, sy, ex, ey, color);
+}
diff --git a/src/modules/motion_est/arrow_code.h b/src/modules/motion_est/arrow_code.h
new file mode 100644 (file)
index 0000000..0fe5219
--- /dev/null
@@ -0,0 +1,26 @@
+/**
+ *     file: arrow_code.h
+ *
+ *     /brief Misc functions to draw arrows
+ *     /author Zachary Drew, Copyright 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+extern int init_arrows( mlt_image_format *image_format, int width, int height );
+extern void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int color);
+extern void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int color);
+extern void draw_rectangle_fill(uint8_t *buf, int x, int y, int w, int h, int color);
+extern void draw_rectangle_outline(uint8_t *buf, int x, int y, int w, int h, int color);
diff --git a/src/modules/motion_est/configure b/src/modules/motion_est/configure
new file mode 100755 (executable)
index 0000000..62bd832
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+if [ "$help" != "1" ]
+then
+
+cat << EOF >> ../filters.dat
+motion_est             libmltmotion_est.so
+vismv                  libmltmotion_est.so
+benchmark              libmltmotion_est.so
+crop_detect            libmltmotion_est.so
+autotrack_rectangle    libmltmotion_est.so
+EOF
+
+fi
diff --git a/src/modules/motion_est/factory.c b/src/modules/motion_est/factory.c
new file mode 100644 (file)
index 0000000..742367d
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <string.h>
+
+#include "filter_motion_est.h"
+
+extern mlt_filter filter_motion_est_init(char *);
+extern mlt_filter filter_vismv_init(char *);
+extern mlt_filter filter_crop_detect_init(char *);
+extern mlt_filter filter_autotrack_rectangle_init(char *);
+
+void *mlt_create_filter( char *id, void *arg )
+{
+       if ( !strcmp( id, "motion_est" ) )
+               return filter_motion_est_init( arg );
+       if ( !strcmp( id, "vismv" ) )
+               return filter_vismv_init( arg );
+       if ( !strcmp( id, "crop_detect" ) )
+               return filter_crop_detect_init( arg );
+       if ( !strcmp( id, "autotrack_rectangle" ) )
+               return filter_autotrack_rectangle_init( arg );
+       return NULL;
+}
diff --git a/src/modules/motion_est/filter_autotrack_rectangle.c b/src/modules/motion_est/filter_autotrack_rectangle.c
new file mode 100644 (file)
index 0000000..90efadb
--- /dev/null
@@ -0,0 +1,401 @@
+/*
+ *     filter_autotrack_rectangle.c
+ *
+ *     /brief 
+ *     /author Zachary Drew, Copyright 2005
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "filter_motion_est.h"
+
+#include <framework/mlt.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define MIN(a,b) ((a) > (b) ? (b) : (a))
+
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+
+// ffmpeg borrowed
+static inline int clip(int a, int amin, int amax)
+{
+    if (a < amin)
+        return amin;
+    else if (a > amax)
+        return amax;
+    else
+        return a;
+}
+
+
+/**
+ * draws an line from (ex, ey) -> (sx, sy).
+ * Credits: modified from ffmpeg project
+ * @param ystride stride/linesize of the image
+ * @param xstride stride/element size of the image
+ * @param color color of the arrow
+ */
+static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int xstride, int ystride, int color){
+    int t, x, y, fr, f;
+
+//    buf[sy*ystride + sx*xstride]= color;
+    buf[sy*ystride + sx]+= color;
+
+    sx= clip(sx, 0, w-1);
+    sy= clip(sy, 0, h-1);
+    ex= clip(ex, 0, w-1);
+    ey= clip(ey, 0, h-1);
+
+    if(ABS(ex - sx) > ABS(ey - sy)){
+        if(sx > ex){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx*xstride + sy*ystride;
+        ex-= sx;
+        f= ((ey-sy)<<16)/ex;
+        for(x= 0; x <= ex; x++){
+            y = (x*f)>>16;
+            fr= (x*f)&0xFFFF;
+            buf[ y   *ystride + x*xstride]= (color*(0x10000-fr))>>16;
+            buf[(y+1)*ystride + x*xstride]= (color*         fr )>>16;
+        }
+    }else{
+        if(sy > ey){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx*xstride + sy*ystride;
+        ey-= sy;
+        if(ey) f= ((ex-sx)<<16)/ey;
+        else   f= 0;
+        for(y= 0; y <= ey; y++){
+            x = (y*f)>>16;
+            fr= (y*f)&0xFFFF;
+            buf[y*ystride + x    *xstride]= (color*(0x10000-fr))>>16;;
+            buf[y*ystride + (x+1)*xstride]= (color*         fr )>>16;;
+        }
+    }
+}
+
+/**
+ * draws an arrow from (ex, ey) -> (sx, sy).
+ * Credits: modified from ffmpeg project
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static __attribute__((used)) void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int xstride, int ystride, int color){
+    int dx,dy;
+
+//    sx= clip(sx, -100, w+100);
+//    sy= clip(sy, -100, h+100);
+//    ex= clip(ex, -100, w+100);
+//    ey= clip(ey, -100, h+100);
+
+       dx= ex - sx;
+       dy= ey - sy;
+
+       if(dx*dx + dy*dy > 3*3){
+               int rx=  dx + dy;
+               int ry= -dx + dy;
+               int length= sqrt((rx*rx + ry*ry)<<8);
+
+               //FIXME subpixel accuracy
+               rx= ROUNDED_DIV(rx*3<<4, length);
+               ry= ROUNDED_DIV(ry*3<<4, length);
+
+               draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, xstride, ystride, color);
+               draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, xstride, ystride, color);
+       }
+       draw_line(buf, sx, sy, ex, ey, w, h, xstride, ystride, color);
+}
+
+void caculate_motion( struct motion_vector_s *vectors,
+                     mlt_geometry_item boundry,
+                     int macroblock_width,
+                     int macroblock_height,
+                     int mv_buffer_width,
+                     int method )
+{
+
+
+       // translate pixel units (from bounds) to macroblock units
+       // make sure whole macroblock stay within bounds
+       // I know; it hurts.
+       int left_mb = boundry->x / macroblock_width;
+           left_mb += ( (int)boundry->x % macroblock_width == 0 ) ? 0 : 1 ;
+       int top_mb = boundry->y / macroblock_height;
+           top_mb += ( (int)boundry->y % macroblock_height == 0 ) ? 0 : 1 ;
+
+       int right_mb = (boundry->x + boundry->w + 1) / macroblock_width;
+           right_mb -= ( (int)(boundry->x + boundry->w + 1) % macroblock_width == 0 ) ? 0 : 1 ;
+       int bottom_mb = (boundry->y + boundry->h + 1) / macroblock_height;
+           bottom_mb -= ( (int)(boundry->y + boundry->h + 1) % macroblock_height == 0 ) ? 0 : 1 ;
+
+       int i, j, n = 0;
+
+       int average_x = 0, average_y = 0;
+
+       #define CURRENT         ( vectors + j*mv_buffer_width + i )
+
+       for( i = left_mb; i <= right_mb; i++ ){
+               for( j = top_mb; j <= bottom_mb; j++ ){
+
+                       n++;
+
+                       average_x += CURRENT->dx;
+                       average_y += CURRENT->dy;
+               }
+       }
+
+       if ( n == 0 )
+               return;
+
+       average_x /= n;
+       average_y /= n;
+
+       int average2_x = 0, average2_y = 0;
+       for( i = left_mb; i <= right_mb; i++ ){
+               for( j = top_mb; j <= bottom_mb; j++ ){
+
+                       if( ABS(CURRENT->dx - average_x) < 5 &&
+                           ABS(CURRENT->dy - average_y) < 5 )
+                       {
+                               average2_x += CURRENT->dx;
+                               average2_y += CURRENT->dy;
+                       }
+               }
+       }
+       
+       boundry->x -= average2_x/n;
+       boundry->y -= average2_y/n;
+
+
+}
+
+// Image stack(able) method
+static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
+{
+
+       // Get the filter object
+       mlt_filter filter = mlt_frame_pop_service( frame );
+
+       // Get the filter's property object
+       mlt_properties filter_properties = MLT_FILTER_PROPERTIES(filter);
+
+       // Get the frame properties
+       mlt_properties frame_properties = MLT_FRAME_PROPERTIES(frame);
+
+       // Get the frame position
+       mlt_position position = mlt_frame_get_position( frame );
+
+       // Get the new image
+       int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
+
+       if( error != 0 )
+               mlt_properties_debug( frame_properties, "error after mlt_frame_get_image() in autotrack_rectangle", stderr );
+
+       // Get the geometry object
+       mlt_geometry geometry = mlt_properties_get_data(filter_properties, "geometry", NULL);
+
+       // Get the current geometry item
+       struct mlt_geometry_item_s boundry;
+       mlt_geometry_fetch(geometry, &boundry, position);
+//fprintf(stderr, "process %d\n", position);
+
+       // Get the motion vectors
+       struct motion_vector_s *vectors = mlt_properties_get_data( frame_properties, "motion_est.vectors", NULL );
+                
+       // How did the rectangle move?
+       if( vectors != NULL ) {
+
+               int method = mlt_properties_get_int( filter_properties, "method" );
+
+               // Get the size of macroblocks in pixel units
+               int macroblock_height = mlt_properties_get_int( frame_properties, "motion_est.macroblock_height" );
+               int macroblock_width = mlt_properties_get_int( frame_properties, "motion_est.macroblock_width" );
+               int mv_buffer_width = *width / macroblock_width;
+
+               caculate_motion( vectors, &boundry, macroblock_width, macroblock_height, mv_buffer_width, method );
+
+       }
+
+       boundry.key = 1;
+
+       boundry.f[0] = 1;
+       boundry.f[1] = 1;
+       boundry.f[2] = 1;
+       boundry.f[3] = 1;
+       boundry.f[4] = 1;
+
+//     boundry.frame = position;
+       
+       mlt_geometry_insert(geometry, &boundry);
+
+
+       if( mlt_properties_get_int( filter_properties, "debug" ) == 1 )
+       {
+               int xstep, ystep;
+
+               // Calculate the size of our steps (the number of bytes that seperate adjacent pixels in X and Y direction)
+               switch( *format ) {
+                       case mlt_image_yuv422:
+                               xstep = 2;
+                               ystep = xstep * *width;
+                               break; 
+                       default:
+                               // I don't know
+                               return -1;
+                               break;
+               }
+
+               draw_line(*image, boundry.x, boundry.y, boundry.x, boundry.y + boundry.h, *width, *height, xstep, ystep, 0xff);
+               draw_line(*image, boundry.x, boundry.y + boundry.h, boundry.x + boundry.w, boundry.y + boundry.h, *width, *height, xstep, ystep, 0xff);
+               draw_line(*image, boundry.x + boundry.w, boundry.y + boundry.h, boundry.x + boundry.w, boundry.y, *width, *height, xstep, ystep, 0xff);
+               draw_line(*image, boundry.x + boundry.w, boundry.y, boundry.x, boundry.y, *width, *height, xstep, ystep, 0xff);
+
+       }
+       return error;
+}
+
+static int attach_boundry_to_frame( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
+{
+       // Get the filter object
+       mlt_filter filter = mlt_frame_pop_service( frame );
+
+       // Get the filter's property object
+       mlt_properties filter_properties = MLT_FILTER_PROPERTIES(filter);
+
+       // Get the frame properties
+       mlt_properties frame_properties = MLT_FRAME_PROPERTIES(frame);
+
+       // Get the frame position
+       mlt_position position = mlt_frame_get_position( frame );
+
+       // gEt the geometry object
+       mlt_geometry geometry = mlt_properties_get_data(filter_properties, "geometry", NULL);
+
+       // Get the current geometry item
+       mlt_geometry_item geometry_item = mlt_pool_alloc( sizeof( struct mlt_geometry_item_s ) );
+       mlt_geometry_fetch(geometry, geometry_item, position);
+//fprintf(stderr, "attach %d\n", position);
+
+       mlt_properties_set_data( frame_properties, "bounds", geometry_item, sizeof( struct mlt_geometry_item_s ), mlt_pool_release, NULL );
+
+       // Get the new image
+       int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
+
+       if( error != 0 )
+               mlt_properties_debug( frame_properties, "error after mlt_frame_get_image() in autotrack_rectangle attach_boundry_to_frame", stderr );
+
+       return error;
+}
+
+/** Filter processing.
+*/
+
+static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
+{
+
+       //mlt_properties_debug(MLT_SERVICE_PROPERTIES(mlt_service_consumer(mlt_filter_service(this))), "consumer!", stderr);
+
+
+        /* modify the frame with the current geometry */
+       mlt_frame_push_service( frame, this);
+       mlt_frame_push_get_image( frame, attach_boundry_to_frame );
+
+
+
+       /* apply the motion estimation filter */
+       mlt_filter motion_est = mlt_properties_get_data( MLT_FILTER_PROPERTIES(this), "_motion_est", NULL ); 
+       mlt_filter_process( motion_est, frame);
+
+
+
+       /* calculate the new geometry based on the motion */
+       mlt_frame_push_service( frame, this);
+       mlt_frame_push_get_image( frame, filter_get_image );
+
+
+       /* visualize the motion vectors */
+       if( mlt_properties_get_int( MLT_FILTER_PROPERTIES(this), "debug" ) == 1 )
+       {
+               mlt_filter vismv = mlt_properties_get_data( MLT_FILTER_PROPERTIES(this), "_vismv", NULL );
+               if( vismv == NULL ) {
+                       vismv = mlt_factory_filter( "vismv", NULL );
+                       mlt_properties_set_data( MLT_FILTER_PROPERTIES(this), "_vismv", vismv, 0, (mlt_destructor)mlt_filter_close, NULL );
+               }
+
+               mlt_filter_process( vismv, frame );
+       }
+
+
+       return frame;
+}
+
+/** Constructor for the filter.
+*/
+
+
+mlt_filter filter_autotrack_rectangle_init( char *arg )
+{
+       mlt_filter this = mlt_filter_new( );
+       if ( this != NULL )
+       {
+               this->process = filter_process;
+
+
+               mlt_geometry geometry = mlt_geometry_init();
+
+               // Initialize with the supplied geometry
+               if( arg != NULL ) {
+
+                       struct mlt_geometry_item_s item;
+
+                       mlt_geometry_parse_item( geometry, &item, arg  );
+
+                       item.frame = 0;
+                       item.key = 1;
+                       item.mix = 100;
+
+                       mlt_geometry_insert( geometry, &item );
+
+               }
+
+               mlt_properties_set_data( MLT_FILTER_PROPERTIES(this), "geometry", geometry, 0, (mlt_destructor)mlt_geometry_close, (mlt_serialiser)mlt_geometry_serialise );
+
+               mlt_filter motion_est = mlt_factory_filter("motion_est", NULL);
+               if( motion_est != NULL )
+                       mlt_properties_set_data( MLT_FILTER_PROPERTIES(this), "_motion_est", motion_est, 0, (mlt_destructor)mlt_filter_close, NULL );
+               else {
+                       mlt_filter_close( this );
+                       return NULL;
+               }
+
+               //mlt_events_init( this );
+               //mlt_events_listen(mlt_service_consumer(mlt_filter_service(this)
+       }
+
+       return this;
+}
+
+/** This source code will self destruct in 5...4...3...
+*/
diff --git a/src/modules/motion_est/filter_crop_detect.c b/src/modules/motion_est/filter_crop_detect.c
new file mode 100644 (file)
index 0000000..4ec5ed9
--- /dev/null
@@ -0,0 +1,329 @@
+/**
+ *     /brief Crop Detection filter
+ *
+ *     /author Zachary Drew, Copyright 2005
+ *
+ *     inspired by mplayer's cropdetect filter
+ *
+ *     Note: The goemetry generated is zero-indexed and is inclusive of the end values 
+ *
+ *     Options:
+ *     -filter crop_detect debug=1                     // Visualize crop
+ *     -filter crop_detect frequency=25                // Detect the crop once a second
+ *     -filter crop_detect frequency=0                 // Never detect unless the producer changes
+ *     -filter crop_detect thresh=100                  // Changes the threshold (default = 25)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define DEBUG
+#define DEFAULT_THRESH 20
+
+#include <framework/mlt.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+
+#ifdef DEBUG
+// ffmpeg borrowed
+static inline int clip(int a, int amin, int amax)
+{
+    if (a < amin)
+        return amin;
+    else if (a > amax)
+       return amax;
+    else
+        return a;
+}
+
+
+/**
+ * draws an line from (ex, ey) -> (sx, sy).
+ * Credits: modified from ffmpeg project
+ * @param ystride stride/linesize of the image
+ * @param xstride stride/element size of the image
+ * @param color color of the arrow
+ */
+static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int xstride, int ystride, int color){
+    int t, x, y, fr, f;
+
+//    buf[sy*ystride + sx*xstride]= color;
+    buf[sy*ystride + sx]+= color;
+
+    sx= clip(sx, 0, w-1);
+    sy= clip(sy, 0, h-1);
+    ex= clip(ex, 0, w-1);
+    ey= clip(ey, 0, h-1);
+
+    if(ABS(ex - sx) > ABS(ey - sy)){
+        if(sx > ex){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx*xstride + sy*ystride;
+        ex-= sx;
+        f= ((ey-sy)<<16)/ex;
+        for(x= 0; x <= ex; x++){
+            y = (x*f)>>16;
+            fr= (x*f)&0xFFFF;
+            buf[ y   *ystride + x*xstride]= (color*(0x10000-fr))>>16;
+            buf[(y+1)*ystride + x*xstride]= (color*         fr )>>16;
+        }
+    }else{
+        if(sy > ey){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx*xstride + sy*ystride;
+        ey-= sy;
+        if(ey) f= ((ex-sx)<<16)/ey;
+        else   f= 0;
+        for(y= 0; y <= ey; y++){
+            x = (y*f)>>16;
+            fr= (y*f)&0xFFFF;
+            buf[y*ystride + x    *xstride]= (color*(0x10000-fr))>>16;;
+            buf[y*ystride + (x+1)*xstride]= (color*         fr )>>16;;
+        }
+    }
+}
+
+/**
+ * draws an arrow from (ex, ey) -> (sx, sy).
+ * Credits: modified from ffmpeg project
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int xstride, int ystride, int color){
+    int dx,dy;
+
+       dx= ex - sx;
+       dy= ey - sy;
+
+       if(dx*dx + dy*dy > 3*3){
+               int rx=  dx + dy;
+               int ry= -dx + dy;
+               int length= sqrt((rx*rx + ry*ry)<<4);
+
+               //FIXME subpixel accuracy
+               rx= ROUNDED_DIV(rx*3<<4, length);
+               ry= ROUNDED_DIV(ry*3<<4, length);
+
+               draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, xstride, ystride, color);
+               draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, xstride, ystride, color);
+       }
+       draw_line(buf, sx, sy, ex, ey, w, h, xstride, ystride, color);
+}
+#endif
+
+// Image stack(able) method
+static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
+{
+
+       // Get the filter object and properties
+       mlt_filter filter = mlt_frame_pop_service( this );
+       mlt_properties properties = MLT_FILTER_PROPERTIES( filter );
+
+       // Get the new image
+       int error = mlt_frame_get_image( this, image, format, width, height, 1 );
+
+       if( error != 0 ) {
+               mlt_properties_debug( MLT_FRAME_PROPERTIES(this), "error after mlt_frame_get_image()", stderr );
+               return error;
+       }
+
+       // Parameter that describes how often to check for the crop
+       int frequency = mlt_properties_get_int( properties, "frequency");
+
+       // Producers may start with blank footage, by default we will skip, oh, 5 frames unless overridden
+       int skip = mlt_properties_get_int( properties, "skip");
+
+       // The result
+       mlt_geometry_item bounds = mlt_properties_get_data( properties, "bounds", NULL );
+
+       // Initialize if needed
+       if( bounds == NULL ) {
+               bounds = calloc( 1, sizeof( struct mlt_geometry_item_s ) );
+               bounds->w = *width;
+               bounds->h = *height;
+               mlt_properties_set_data( properties, "bounds", bounds, sizeof( struct mlt_geometry_item_s ), free, NULL );
+       }
+
+//     mlt_properties first = (mlt_properties)  mlt_deque_peek_front( MLT_FRAME_SERVICE_STACK(this) );
+//     int current_producer_id = mlt_properties_get_int( first, "_unique_id");
+//     int former_producer_id = mlt_properties_get_int(properties, "_former_unique_id");
+//     mlt_properties_set_int(properties, "_former_unique_id", current_producer_id);
+
+       // For periodic detection (with offset of 'skip')
+       if( frequency == 0 || (mlt_frame_get_position(this)+skip) % frequency != 0)
+       {
+
+               // Inject in stream 
+               mlt_properties_set_data( MLT_FRAME_PROPERTIES(this), "bounds", bounds, sizeof( struct mlt_geometry_item_s ), NULL, NULL );
+
+               return 0;
+       }
+       
+
+       // There is no way to detect a crop for sure, so make up an arbitrary one
+       int thresh = mlt_properties_get_int( properties, "thresh" );
+
+       int xstride, ystride;
+
+       switch( *format ) {
+               case mlt_image_yuv422:
+                       xstride = 2;
+                       ystride = 2 * *width;
+                       break;
+               default:
+                       fprintf(stderr, "image format not supported by filter_crop_detect\n");
+                       return -1;
+       }
+
+       int x, y, average_brightness, deviation; // Scratch variables
+
+       // Top crop
+       for( y = 0; y < *height/2; y++ ) {
+               average_brightness = 0;
+               deviation = 0;
+               bounds->y = y;
+               for( x = 0; x < *width; x++ )
+                       average_brightness += *(*image + y*ystride + x*xstride);
+
+               average_brightness /= *width;
+
+               for( x = 0; x < *width; x++ )
+                       deviation += abs(average_brightness - *(*image + y*ystride + x*xstride));
+
+               if( deviation >= thresh )
+                       break;
+       }
+
+       // Bottom crop
+       for( y = *height - 1; y >= *height/2; y-- ) {
+               average_brightness = 0;
+               deviation = 0;
+               bounds->h = y;
+               for( x = 0; x < *width; x++ )
+                       average_brightness += *(*image + y*ystride + x*xstride);
+
+               average_brightness /= *width;
+
+               for( x = 0; x < *width; x++ )
+                       deviation += abs(average_brightness - *(*image + y*ystride + x*xstride));
+
+               if( deviation >= thresh )
+                       break;
+       }
+
+       // Left crop    
+       for( x = 0; x < *width/2; x++ ) {
+               average_brightness = 0;
+               deviation = 0;
+               bounds->x = x;
+               for( y = 0; y < *height; y++ )
+                       average_brightness += *(*image + y*ystride + x*xstride);
+
+               average_brightness /= *height;
+
+               for( y = 0; y < *height; y++ )
+                       deviation += abs(average_brightness - *(*image + y*ystride + x*xstride));
+
+               if( deviation >= thresh )
+                       break;
+       }
+
+       // Right crop
+       for( x = *width - 1; x >= *width/2; x-- ) {
+               average_brightness = 0;
+               deviation = 0;
+               bounds->w = x;
+               for( y = 0; y < *height; y++ )
+                       average_brightness += *(*image + y*ystride + x*xstride);
+
+               average_brightness /= *height;
+
+               for( y = 0; y < *height; y++ )
+                       deviation += abs(average_brightness - *(*image + y*ystride + x*xstride));
+
+               if( deviation >= thresh )
+                       break;
+       }
+
+       /* Debug: Draw arrows to show crop */
+       if( mlt_properties_get_int( properties, "debug") == 1 )
+       {
+               draw_arrow(*image, bounds->x, *height/2, bounds->x+40, *height/2, *width, *height, xstride, ystride, 0xff);
+               draw_arrow(*image, *width/2, bounds->y, *width/2, bounds->y+40, *width, *height, xstride, ystride, 0xff);
+               draw_arrow(*image, bounds->w, *height/2, bounds->w-40, *height/2, *width, *height, xstride, ystride, 0xff);
+               draw_arrow(*image, *width/2, bounds->h, *width/2, bounds->h-40, *width, *height, xstride, ystride, 0xff);
+
+               fprintf(stderr, "Top:%f Left:%f Right:%f Bottom:%f\n", bounds->y, bounds->x, bounds->w, bounds->h);
+       }
+
+       bounds->w -= bounds->x;
+       bounds->h -= bounds->y;
+
+       /* inject into frame */
+       mlt_properties_set_data( MLT_FRAME_PROPERTIES(this), "bounds", bounds, sizeof( struct mlt_geometry_item_s ), NULL, NULL );
+
+
+       return error;
+}
+
+
+
+/** Filter processing.
+*/
+
+static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
+{
+
+       // Put the filter object somewhere we can find it
+       mlt_frame_push_service( frame, this);
+
+       // Push the frame filter
+       mlt_frame_push_get_image( frame, filter_get_image );
+
+       return frame;
+}
+
+/** Constructor for the filter.
+*/
+mlt_filter filter_crop_detect_init( char *arg )
+{
+       mlt_filter this = mlt_filter_new( );
+       if ( this != NULL )
+       {
+               this->process = filter_process;
+
+               /* defaults */
+               mlt_properties_set_int( MLT_FILTER_PROPERTIES(this), "frequency", 1);
+               mlt_properties_set_int( MLT_FILTER_PROPERTIES(this), "thresh", 25);
+               mlt_properties_set_int( MLT_FILTER_PROPERTIES(this), "clip", 5);
+               mlt_properties_set_int( MLT_FILTER_PROPERTIES(this), "former_producer_id", -1);
+
+       }
+
+       return this;
+}
+
+/** This source code will self destruct in 5...4...3...
+*/
+
diff --git a/src/modules/motion_est/filter_motion_est.c b/src/modules/motion_est/filter_motion_est.c
new file mode 100644 (file)
index 0000000..98c0898
--- /dev/null
@@ -0,0 +1,1134 @@
+/*
+ *     /brief fast motion estimation filter
+ *     /author Zachary Drew, Copyright 2005
+ *
+ *     Currently only uses Gamma data for comparisonon (bug or feature?)
+ *     Vector optimization coming soon. 
+ *
+ *     Vector orientation: The vector data that is generated for the current frame specifies
+ *     the motion from the previous frame to the current frame. Thus, to know how a macroblock
+ *     in the current frame will move in the future, the next frame is needed.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+
+#include "filter_motion_est.h"
+#include <framework/mlt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <sys/time.h>
+#include <assert.h>
+
+#include "sad_sse.h"
+
+
+#undef DEBUG
+#undef DEBUG_ASM
+#undef BENCHMARK
+#undef COUNT_COMPARES
+
+#define DIAMOND_SEARCH 0x0
+#define FULL_SEARCH 0x1
+#define SHIFT 8 
+#define MIN(a,b) ((a) > (b) ? (b) : (a))
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+
+#ifdef COUNT_COMPARES
+       int compares;
+#endif
+
+typedef struct motion_vector_s motion_vector;
+
+struct yuv_data
+{
+       uint8_t *y;
+       uint8_t *u;
+       uint8_t *v;
+
+};
+
+struct motion_est_context_s
+{
+       int initialized;                                //<! true if filter has been initialized
+
+       /* same as mlt_frame's parameters */
+       int width, height;
+
+       /* Operational details */
+       int macroblock_width, macroblock_height;
+       int xstride, ystride;
+       //uint8_t *former_image;                        //<! Copy of the previous frame's image
+       struct yuv_data former_image, current_image;
+       int search_method, skip_prediction, shot_change;
+       int limit_x, limit_y;                   //<! max x and y of a motion vector
+       int edge_blocks_x, edge_blocks_y;
+       int initial_thresh;
+       int check_chroma;                       // if check_chroma == 1 then compare chroma
+
+       /* bounds */
+       struct mlt_geometry_item_s prev_bounds; // Cache last frame's bounds (needed for predictor vectors validity)
+       struct mlt_geometry_item_s *bounds;     // Current bounds
+
+       /* bounds in macroblock units */
+       int left_mb, prev_left_mb, right_mb, prev_right_mb;
+       int top_mb, prev_top_mb, bottom_mb, prev_bottom_mb;
+
+       /* size of our vector buffers */
+       int mv_buffer_height, mv_buffer_width, mv_size;
+
+       /* vector buffers */
+       int former_vectors_valid;               //<! true if the previous frame's buffered motion vector data is valid
+       motion_vector *former_vectors, *current_vectors;
+       motion_vector *bizarro_vectors;
+       mlt_position former_frame_position, current_frame_position;
+
+       /* two metrics for diagnostics. lower is a better estimation but beware of local minima  */
+       float predictive_misses;                // How often do the prediction metion vectors fail?
+       int comparison_average;                 // How far does the best estimation deviate from a perfect comparison?
+       int bad_comparisons;
+       int average_length;
+       int average_x, average_y;
+
+       /* run-time configurable comparison functions */
+       int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int);
+       int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int);
+       int (*vert_deviation_reference)(uint8_t *, int, int, int, int);
+       int (*horiz_deviation_reference)(uint8_t *, int, int, int, int);
+
+};
+
+
+// Clip the macroblocks as required. Only used for blocks at the edge of the picture
+// "from" is assumed to be unclipped
+inline static int clip( int *from_x,
+                       int *from_y,
+                       int *to_x,
+                       int *to_y,
+                       int *w,                 //<! macroblock width
+                       int *h,                 //<! macroblock height
+                       int width,              //<! image width
+                       int height)             //<! image height
+{
+
+       uint32_t penalty = 1 << SHIFT;  // Retain a few extra bits of precision minus floating-point's blemishes
+       int diff;
+
+       // Origin of macroblock moves left of absolute boundy
+       if( *to_x < 0 ) {
+               if( *to_x + *w <= 0) return 0;                  // Clipped out of existance
+               penalty = (*w * penalty) / (*w + *to_x);         // Recipricol of the fraction of the block that remains
+               *from_x -= *to_x;
+               *w += *to_x; 
+               *to_x = 0;
+       }
+       // Portion of macroblock moves right of absolute boundry
+       else if( *to_x + *w > width ) {
+               if(*to_x >= width) return 0;                    // Clipped out of existance
+               diff  = *to_x + *w - width;                     // Width of area clipped (0 < diff <  macroblock width)
+               penalty = (*w * penalty) / (*w - diff);         // Recipricol of the fraction of the block that remains
+               *w -= diff;
+       }
+       // Origin of macroblock moves above absolute boundy
+       if( *to_y < 0 ) {
+               if( *to_y + *h <= 0) return 0;                  // Clipped out of existance
+               penalty = (*h * penalty) / (*h + *to_y);        // Recipricol of the fraction of the block that remains
+               *from_y -= *to_y;
+               *h += *to_y;
+               *to_y = 0;
+       }
+       // Portion of macroblock moves bellow absolute boundry
+       else if( *to_y + *h > height ) {
+               if(*to_y >= height) return 0;                   // Clipped out of existance
+               diff = *to_y + *h - height;                     // Height of area clipped (0 < diff < macroblock height)
+               penalty = (*h * penalty) / (*h - diff);         // Recipricol of the fraction of the block that is clipped
+               *h -= diff;
+       }
+       return penalty;
+}
+
+
+/** /brief Reference Sum of Absolute Differences comparison function
+*
+*/
+inline static int sad_reference( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int i, j, score = 0;
+       for ( j = 0; j < h; j++ ){
+               for ( i = 0; i < w; i++ ){
+                       score += ABS( block1[i*xstride] - block2[i*xstride] );
+               }
+               block1 += ystride;
+               block2 += ystride;
+       }
+
+       return score;
+}
+
+inline static void change_422_to_444_planar_rep( uint8_t *image, struct yuv_data yuv, struct motion_est_context_s *c )
+{
+       register uint8_t *p = image;
+       register uint8_t *q = image + c->width * c->height * 2;
+       while ( *p != *q ) {
+               *(yuv.y++) = *(p ++);
+               *(yuv.u++) = *p;
+               *(yuv.u++) = *(p ++);
+               *(yuv.y++) = *(p ++);
+               *(yuv.v++) = *p;
+               *(yuv.v++) = *(p ++);
+       }
+}
+
+// broken
+inline static void change_420p_to_444_planar_rep( uint8_t *image, struct yuv_data yuv, struct motion_est_context_s *c )
+{
+       uint8_t *p = image + c->width * c->height;
+       uint8_t *q = p + c->width*c->height/2;
+       uint8_t *u2, *v2;
+       while( *p != *q ) {
+               u2 = yuv.u + c->width;
+               *yuv.u  ++ = *p;
+               *yuv.u  ++ = *p;
+               *u2 ++ = *p;
+               *u2 ++ = *p ++;
+       }
+
+       *q += c->width*c->height/2;
+       while( *p != *q ) {
+               v2 = yuv.v + c->width;
+               *yuv.v  ++ = *p;
+               *yuv.v  ++ = *p;
+               *v2 ++ = *p;
+               *v2 ++ = *p ++;
+       }
+
+}
+
+/** /brief Abstracted block comparison function
+*/
+inline static int compare( uint8_t *from,
+                          uint8_t *to,
+                          int from_x,
+                          int from_y,
+                          int to_x,
+                          int to_y,
+                          struct motion_est_context_s *c)
+{
+#ifdef COUNT_COMPARES
+       compares++;
+#endif
+
+       if( ABS(from_x - to_x) >= c->limit_x || ABS(from_y - to_y) >= c->limit_y )
+               return MAX_MSAD;
+
+       int score;
+       int (*cmp)(uint8_t *, uint8_t *, int, int, int, int) = c->compare_optimized;
+
+       int mb_w = c->macroblock_width;
+       int mb_h = c->macroblock_height;
+
+       int penalty = clip(&from_x, &from_y, &to_x, &to_y, &mb_w, &mb_h, c->width, c->height);
+       if ( penalty == 1<<SHIFT)
+           penalty = clip(&to_x, &to_y, &from_x, &from_y, &mb_w, &mb_h, c->width, c->height);
+
+       if( penalty == 0 )                      // Clipped out of existance
+               return MAX_MSAD;
+       else if( penalty != 1<<SHIFT )          // SIMD optimized comparison won't work
+               cmp = c->compare_reference;
+
+       uint8_t *from_block = from + from_x * c->xstride + from_y * c->ystride; 
+       uint8_t *to_block = to + to_x * c->xstride + to_y * c->ystride; 
+
+       #ifdef DEBUG_ASM        
+       if( penalty == 1<<SHIFT ){
+               score = c->compare_reference( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
+               int score2 = c->compare_optimized( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
+               if ( score != score2 )
+                       fprintf(stderr, "Your assembly doesn't work! Reference: %d Asm: %d\n", score, score2);
+       }
+       else
+       #endif
+
+       score = cmp( from_block, to_block, c->xstride, c->ystride, mb_w, mb_h );
+
+       return ( score * penalty ) >> SHIFT;                    // The extra precision is no longer wanted
+}
+
+static inline void check_candidates (   struct yuv_data *from, struct yuv_data *to,
+                                       int from_x, int from_y,
+                                       motion_vector *candidates, int count, int unique,
+                                       motion_vector *result,
+                                       struct motion_est_context_s *c )
+{
+               int score, i, j;
+               /* Scan for the best candidate */
+               for ( i = 0; i < count; i++ )
+               {
+                       // this little dohicky ignores duplicate candidates, if they are possible
+                       if ( unique == 0 ) {
+                               j = 0;
+                               while ( j < i )
+                               {
+                                       if ( candidates[j].dx == candidates[i].dx &&
+                                            candidates[j].dy == candidates[i].dy )
+                                                       goto next_for_loop;
+
+                                       j++;
+                               }
+                       }
+
+                       // Luma
+                       score = compare( from->y, to->y, from_x, from_y,
+                                        from_x + candidates[i].dx,     /* to x */
+                                        from_y + candidates[i].dy,     /* to y */
+                                        c);
+
+                       if ( c->check_chroma ) {
+                               if ( score >= result->msad )    // Early term
+                                       continue;
+
+                               // Chroma - U
+                               score += compare( from->u, to->u, from_x, from_y,
+                                                from_x + candidates[i].dx,     /* to x */
+                                                from_y + candidates[i].dy,     /* to y */
+                                                c);
+       
+                               if ( score >= result->msad )    // Early term
+                                       continue;
+       
+                               // Chroma - V
+                               score += compare( from->v, to->v, from_x, from_y,
+                                                from_x + candidates[i].dx,     /* to x */
+                                                from_y + candidates[i].dy,     /* to y */
+                                                c);
+                       }
+
+                       if ( score < result->msad ) {   // New minimum
+                               result->dx = candidates[i].dx;
+                               result->dy = candidates[i].dy;
+                               result->msad = score;
+                       }
+                       next_for_loop:;
+               }
+}
+
+/* /brief Diamond search
+* Operates on a single macroblock
+*/
+static inline void diamond_search(
+                       struct yuv_data *from,                  //<! Image data from previous frame
+                       struct yuv_data *to,                    //<! Image data in current frame
+                       int mb_x,                       //<! X upper left corner of macroblock
+                       int mb_y,                       //<! U upper left corner of macroblock
+                       struct motion_vector_s *result, //<! Best predicted mv and eventual result
+                       struct motion_est_context_s *c) //<! motion estimation context
+{
+
+       // diamond search pattern
+       motion_vector candidates[4];
+
+       // Keep track of best and former best candidates
+       motion_vector best, former;
+
+       // The direction of the refinement needs to be known
+       motion_vector current;
+
+       int i, first = 1;
+
+       // Loop through the search pattern
+       while( 1 ) {
+       
+               current.dx = result->dx;
+               current.dy = result->dy;
+
+               if ( first == 1 )       // Set the initial pattern
+               {
+                       candidates[0].dx = result->dx + 1; candidates[0].dy = result->dy + 0;
+                       candidates[1].dx = result->dx + 0; candidates[1].dy = result->dy + 1;
+                       candidates[2].dx = result->dx - 1; candidates[2].dy = result->dy + 0;
+                       candidates[3].dx = result->dx + 0; candidates[3].dy = result->dy - 1;
+                       i = 4;
+               }
+               else     // Construct the next portion of the search pattern
+               {
+                       candidates[0].dx = result->dx + best.dx;
+                       candidates[0].dy = result->dy + best.dy;
+                       if (best.dx == former.dx && best.dy == former.dy) {
+                               candidates[1].dx = result->dx + best.dy;
+                               candidates[1].dy = result->dy + best.dx;                //      Yes, the wires
+                               candidates[2].dx = result->dx - best.dy;                //      are crossed
+                               candidates[2].dy = result->dy - best.dx;
+                               i = 3;
+                       } else {
+                               candidates[1].dx = result->dx + former.dx;
+                               candidates[1].dy = result->dy + former.dy;
+                               i = 2;
+                       }
+       
+                       former.dx = best.dx; former.dy = best.dy;       // Keep track of new former best
+               }
+       
+               check_candidates ( from, to, mb_x, mb_y, candidates, i, 1, result, c ); 
+               best.dx = result->dx - current.dx;
+               best.dy = result->dy - current.dy;
+
+               if ( best.dx == 0 && best.dy == 0 )
+                       return;
+
+               if ( first == 1 ){
+                       first = 0;
+                       former.dx = best.dx; former.dy = best.dy; // First iteration, sensible value for former_d*
+               }
+       }
+}
+
+/* /brief Full (brute) search 
+* Operates on a single macroblock
+*/
+static void full_search(
+                       struct yuv_data *from,                  //<! Image data from previous frame
+                       struct yuv_data *to,                    //<! Image data in current frame
+                       int mb_x,                       //<! X upper left corner of macroblock
+                       int mb_y,                       //<! U upper left corner of macroblock
+                       struct motion_vector_s *result, //<! Best predicted mv and eventual result
+                       struct motion_est_context_s *c) //<! motion estimation context
+{
+       // Keep track of best candidate
+       int i,j,score;
+
+       // Go loopy
+       for( i = -c->macroblock_width; i <= c->macroblock_width; i++ ){
+               for( j = -c->macroblock_height; j <=  c->macroblock_height; j++ ){
+
+                       score = compare( from->y, to->y,
+                                        mb_x,          /* from x */
+                                        mb_y,          /* from y */
+                                        mb_x + i,      /* to x */
+                                        mb_y + j,      /* to y */
+                                        c);            /* context */
+
+                       if ( score < result->msad ) {
+                               result->dx = i;
+                               result->dy = j;
+                               result->msad = score;
+                       }
+               }
+       }
+}
+
+// Credits: ffmpeg
+// return the median
+static inline int median_predictor(int a, int b, int c) {
+       if ( a > b ){
+               if ( c > b ){
+                       if ( c > a  ) b = a;
+                       else      b = c;
+               }
+       } else {
+               if ( b > c ){
+                       if ( c > a ) b = c;
+                       else     b = a;
+               }
+       }
+       return b;
+}
+
+inline static int vertical_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
+{
+       int i, j, average, deviation = 0;
+       for ( i = 0; i < w; i++ ){
+               average = 0;
+               for ( j = 0; j < h; j++ ){
+                       average += *(block + i*xstride + j*ystride);
+               }
+               average /= h;
+               for ( j = 0; j < h; j++ ){
+                       deviation += ABS(average - block[i*xstride + j*ystride]);
+               }
+       }
+
+       return deviation;
+}
+
+inline static int horizontal_gradient_reference( uint8_t *block, int xstride, int ystride, int w, int h )
+{
+       int i, j, average, deviation = 0;
+       for ( j = 0; j < h; j++ ){
+               average = 0;
+               for ( i = 0; i < w; i++ ){
+                       average += block[i*xstride + j*ystride];
+               }
+               average /= w;
+               for ( i = 0; i < w; i++ ){
+                       deviation += ABS(average - block[i*xstride + j*ystride]);
+               }
+       }
+
+       return deviation;
+}
+
+// Macros for pointer calculations
+#define CURRENT(i,j)   ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
+#define FORMER(i,j)    ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
+
+void collect_pre_statistics( struct motion_est_context_s *c, uint8_t *image ) {
+
+       int i, j, count = 0;
+       uint8_t *p;
+
+       for ( i = c->left_mb; i <= c->right_mb; i++ ){
+        for ( j = c->top_mb; j <= c->bottom_mb; j++ ){  
+               count++;
+               p = image + i * c->macroblock_width * c->xstride + j * c->macroblock_height * c->ystride;
+               CURRENT(i,j)->vert_dev = c->vert_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
+               CURRENT(i,j)->horiz_dev = c->horiz_deviation_reference( p, c->xstride, c->ystride, c->macroblock_width, c->macroblock_height );
+        }
+       }
+}
+
+
+
+/** /brief Motion search
+*
+*
+* Search for the Vector that best represents the motion *from the last frame *to the current frame
+* Vocab: Colocated - the pixel in the previous frame at the current position
+*
+* Based on enhanced predictive zonal search. [Tourapis 2002]
+*/
+static void search( struct yuv_data from,                      //<! Image data. Motion vector source in previous frame
+                   struct yuv_data to,                 //<! Image data. Motion vector destination current
+                   struct motion_est_context_s *c)     //<! The context
+{
+
+#ifdef COUNT_COMPARES
+       compares = 0;
+#endif
+
+       motion_vector candidates[10];
+       motion_vector *here;            // This one gets used alot (about 30 times per macroblock)
+       int n = 0;
+
+       int i, j, count=0;
+
+       // For every macroblock, perform motion vector estimation
+       for( i = c->left_mb; i <= c->right_mb; i++ ){
+        for( j = c->top_mb; j <= c->bottom_mb; j++ ){  
+
+               here = CURRENT(i,j);
+               here->valid = 1;
+               here->color = 100;
+               here->msad = MAX_MSAD;
+               count++;
+               n = 0;
+
+               /* Stack the predictors [i.e. checked in reverse order] */
+
+               /* Adjacent to collocated */
+               if( c->former_vectors_valid )
+               {
+                       // Top of colocated
+                       if( j > c->prev_top_mb ){// && COL_TOP->valid ){
+                               candidates[n  ].dx = FORMER(i,j-1)->dx;
+                               candidates[n++].dy = FORMER(i,j-1)->dy;
+                       }
+       
+                       // Left of colocated
+                       if( i > c->prev_left_mb ){// && COL_LEFT->valid ){
+                               candidates[n  ].dx = FORMER(i-1,j)->dx;
+                               candidates[n++].dy = FORMER(i-1,j)->dy;
+                       }
+       
+                       // Right of colocated
+                       if( i < c->prev_right_mb ){// && COL_RIGHT->valid ){
+                               candidates[n  ].dx = FORMER(i+1,j)->dx;
+                               candidates[n++].dy = FORMER(i+1,j)->dy;
+                       }
+       
+                       // Bottom of colocated
+                       if( j < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
+                               candidates[n  ].dx = FORMER(i,j+1)->dx;
+                               candidates[n++].dy = FORMER(i,j+1)->dy;
+                       }
+
+                       // And finally, colocated
+                       candidates[n  ].dx = FORMER(i,j)->dx;
+                       candidates[n++].dy = FORMER(i,j)->dy;
+               }
+
+               // For macroblocks not in the top row
+               if ( j > c->top_mb) {
+
+                       // Top if ( TOP->valid ) {
+                               candidates[n  ].dx = CURRENT(i,j-1)->dx;
+                               candidates[n++].dy = CURRENT(i,j-1)->dy;
+                       //}
+
+                       // Top-Right, macroblocks not in the right row
+                       if ( i < c->right_mb ){// && TOP_RIGHT->valid ) {
+                               candidates[n  ].dx = CURRENT(i+1,j-1)->dx;
+                               candidates[n++].dy = CURRENT(i+1,j-1)->dy;      
+                       }
+               }
+
+               // Left, Macroblocks not in the left column
+               if ( i > c->left_mb ){// && LEFT->valid ) {
+                       candidates[n  ].dx = CURRENT(i-1,j)->dx;
+                       candidates[n++].dy = CURRENT(i-1,j)->dy;
+               }
+
+               /* Median predictor vector (median of left, top, and top right adjacent vectors) */
+               if ( i > c->left_mb && j > c->top_mb && i < c->right_mb
+                        )//&& LEFT->valid && TOP->valid && TOP_RIGHT->valid )
+               { 
+                       candidates[n  ].dx = median_predictor( CURRENT(i-1,j)->dx, CURRENT(i,j-1)->dx, CURRENT(i+1,j-1)->dx);
+                       candidates[n++].dy = median_predictor( CURRENT(i-1,j)->dy, CURRENT(i,j-1)->dy, CURRENT(i+1,j-1)->dy);
+               }
+
+               // Zero vector
+               candidates[n  ].dx = 0;
+               candidates[n++].dy = 0;
+
+               int from_x = i * c->macroblock_width;
+               int from_y = j * c->macroblock_height;
+               check_candidates ( &from, &to, from_x, from_y, candidates, n, 0, here, c ); 
+
+
+#ifndef FULLSEARCH
+               diamond_search( &from, &to, from_x, from_y, here, c); 
+#else
+               full_search( from, to, from_x, from_y, here, c); 
+#endif
+
+
+               /* Do things in Reverse
+                * Check for occlusions. A block from last frame becomes obscured this frame.
+                * A bogus motion vector will result. To look for this, run the search in reverse
+                * and see if the vector is good backwards and forwards. Most occlusions won't be.
+                * The new source block may not correspond exactly to blocks in the vector buffer
+                * The opposite case, a block being revealed is inherently ignored.
+                */
+#if 0
+               if ( here->msad < c->initial_thresh )           // The vector is probably good.
+                       continue;
+
+               struct motion_vector_s reverse;
+               reverse.dx = -here->dx; 
+               reverse.dy = -here->dy;
+               reverse.msad = here->msad;
+
+               // calculate psuedo block coordinates
+               from_x += here->dx;
+               from_y += here->dy;
+
+               n = 0;
+#endif
+
+               // Calculate the real block closest to our psuedo block
+#if 0
+               int ri = ( ABS( here->dx ) + c->macroblock_width/2 ) / c->macroblock_width;
+               if ( ri != 0 ) ri *= here->dx / ABS(here->dx);  // Recover sign
+               ri += i;
+               if ( ri < 0 ) ri = 0;
+               else if ( ri >= c->mv_buffer_width ) ri = c->mv_buffer_width;
+
+               int rj = ( ABS( here->dy ) + c->macroblock_height/2 ) / c->macroblock_height;
+               if ( rj != 0 ) rj *= here->dy / ABS(here->dy);  // Recover sign
+               rj += j;
+               if ( rj < 0 ) rj = 0;
+               else if ( rj >= c->mv_buffer_height ) rj = c->mv_buffer_height;
+
+               /* Adjacent to collocated */
+               if( c->former_vectors_valid )
+               {
+                       // Top of colocated
+                       if( rj > c->prev_top_mb ){// && COL_TOP->valid ){
+                               candidates[n  ].dx = -FORMER(ri,rj-1)->dx;
+                               candidates[n++].dy = -FORMER(ri,rj-1)->dy;
+                       }
+       
+                       // Left of colocated
+                       if( ri > c->prev_left_mb ){// && COL_LEFT->valid ){
+                               candidates[n  ].dx = -FORMER(ri-1,rj)->dx;
+                               candidates[n++].dy = -FORMER(ri-1,rj)->dy;
+                       }
+       
+                       // Right of colocated
+                       if( ri < c->prev_right_mb ){// && COL_RIGHT->valid ){
+                               candidates[n  ].dx = -FORMER(ri+1,rj)->dx;
+                               candidates[n++].dy = -FORMER(ri+1,rj)->dy;
+                       }
+       
+                       // Bottom of colocated
+                       if( rj < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
+                               candidates[n  ].dx = -FORMER(ri,rj+1)->dx;
+                               candidates[n++].dy = -FORMER(ri,rj+1)->dy;
+                       }
+
+                       // And finally, colocated
+                       candidates[n  ].dx = -FORMER(ri,rj)->dx;
+                       candidates[n++].dy = -FORMER(ri,rj)->dy;
+               }
+#endif
+#if 0
+               // Zero vector
+               candidates[n].dx = 0;
+               candidates[n++].dy = 0;
+
+               check_candidates ( &to, &from, from_x, from_y, candidates, 1, 1, &reverse, c ); 
+
+               /* Scan for the best candidate */
+               while( n ) {
+                       n--;
+
+                       score = compare( to, from, from_x, from_y,      /* to and from are reversed */
+                                        from_x + candidates[n].dx,     /* to x */
+                                        from_y + candidates[n].dy,     /* to y */
+                                        c);                            /* context */
+
+                       if ( score < reverse.msad ) {
+                               reverse.dx = candidates[n].dx;
+                               reverse.dy = candidates[n].dy;
+                               reverse.msad = score;
+                               if ( score < c->initial_thresh )
+                                       n=0;            // Simplified version of early termination thresh
+                       }
+               }
+
+//             if ( reverse.msad == here->msad)        // If nothing better was found
+//             {                                       // this is an opportunity
+//                                                     // to skip 4 block comparisons
+//                     continue;                       // in the diamond search
+//             }
+
+
+               diamond_search( &to, &from, from_x, from_y, &reverse, c); /* to and from are reversed */
+
+               if ( ABS( reverse.dx + here->dx ) + ABS( reverse.dy + here->dy ) > 5  )
+//             if ( here->msad > reverse.msad + c->initial_thresh*10   )
+               {
+                       here->valid = 2;
+               }
+
+#endif
+        } /* End column loop */
+       } /* End row loop */
+
+       asm volatile ( "emms" );
+
+#ifdef COUNT_COMPARES
+       fprintf(stderr, "%d comparisons per block were made", compares/count);
+#endif
+       return;
+}
+
+void collect_post_statistics( struct motion_est_context_s *c ) {
+
+       c->comparison_average = 0;
+       c->average_length = 0;
+       c->average_x = 0;
+       c->average_y = 0;
+
+       int i, j, count = 0;
+
+       for ( i = c->left_mb; i <= c->right_mb; i++ ){
+        for ( j = c->top_mb; j <= c->bottom_mb; j++ ){  
+
+               count++;
+               c->comparison_average += CURRENT(i,j)->msad;
+               c->average_x += CURRENT(i,j)->dx;
+               c->average_y += CURRENT(i,j)->dy;
+
+
+        }
+       }
+
+       if ( count > 0 )
+       {
+               c->comparison_average /= count;
+               c->average_x /= count;
+               c->average_y /= count;
+               c->average_length = sqrt( c->average_x * c->average_x + c->average_y * c->average_y );
+       }
+
+}
+
+static void init_optimizations( struct motion_est_context_s *c )
+{
+       if ( c->check_chroma ) {
+               switch(c->macroblock_width){
+                       case 8:  if(c->macroblock_height == 8)  c->compare_optimized = sad_sse_8x8;
+                                else                           c->compare_optimized = sad_sse_8w;
+                                break;
+                       case 16: if(c->macroblock_height == 16) c->compare_optimized = sad_sse_16x16;
+                                else                           c->compare_optimized = sad_sse_16w;
+                                break;
+                       case 32: if(c->macroblock_height == 32) c->compare_optimized = sad_sse_32x32;
+                                else                           c->compare_optimized = sad_sse_32w;
+                                break;
+                       case 64: c->compare_optimized = sad_sse_64w;
+                                break;
+                       default: c->compare_optimized = sad_reference;
+                                break;
+               }
+       }
+       else
+       {
+               switch(c->macroblock_width){
+                       case 4:  if(c->macroblock_height == 4)  c->compare_optimized = sad_sse_422_luma_4x4;
+                                else                           c->compare_optimized = sad_sse_422_luma_4w;
+                                break;
+                       case 8:  if(c->macroblock_height == 8)  c->compare_optimized = sad_sse_422_luma_8x8;
+                                else                           c->compare_optimized = sad_sse_422_luma_8w;
+                                break;
+                       case 16: if(c->macroblock_height == 16) c->compare_optimized = sad_sse_422_luma_16x16;
+                                else                           c->compare_optimized = sad_sse_422_luma_16w;
+                                break;
+                       case 32: if(c->macroblock_height == 32) c->compare_optimized = sad_sse_422_luma_32x32;
+                                else                           c->compare_optimized = sad_sse_422_luma_32w;
+                                break;
+                       case 64: c->compare_optimized = sad_sse_422_luma_64w;
+                                break;
+                       default: c->compare_optimized = sad_reference;
+                                break;
+               }
+       }
+}
+       
+// Image stack(able) method
+static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
+{
+       // Get the filter
+       mlt_filter filter = mlt_frame_pop_service( frame );
+
+       // Get the motion_est context object
+       struct motion_est_context_s *context = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL);
+
+       // Get the new image and frame number
+       int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
+
+       if( error != 0 )
+               mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr );
+
+       context->current_frame_position = mlt_frame_get_position( frame );
+
+       /* Context Initialization */
+       if ( context->initialized == 0 ) {
+
+               // Get the filter properties object
+               mlt_properties properties = mlt_filter_properties( filter );
+
+               context->width = *width;
+               context->height = *height;
+
+               /* Get parameters that may have been overridden */
+               if( mlt_properties_get( properties, "macroblock_width") != NULL )
+                       context->macroblock_width = mlt_properties_get_int( properties, "macroblock_width");
+
+               if( mlt_properties_get( properties, "macroblock_height") != NULL )
+                       context->macroblock_height = mlt_properties_get_int( properties, "macroblock_height");
+
+               if( mlt_properties_get( properties, "prediction_thresh") != NULL )
+                       context->initial_thresh = mlt_properties_get_int( properties, "prediction_thresh" );
+               else
+                       context->initial_thresh = context->macroblock_width * context->macroblock_height;
+
+               if( mlt_properties_get( properties, "search_method") != NULL )
+                       context->search_method = mlt_properties_get_int( properties, "search_method");
+
+               if( mlt_properties_get( properties, "skip_prediction") != NULL )
+                       context->skip_prediction = mlt_properties_get_int( properties, "skip_prediction");
+
+               if( mlt_properties_get( properties, "limit_x") != NULL )
+                       context->limit_x = mlt_properties_get_int( properties, "limit_x");
+
+               if( mlt_properties_get( properties, "limit_y") != NULL )
+                       context->limit_y = mlt_properties_get_int( properties, "limit_y");
+
+               if( mlt_properties_get( properties, "check_chroma" ) != NULL )
+                       context->check_chroma = mlt_properties_get_int( properties, "check_chroma" );
+
+               init_optimizations( context );
+
+               // Calculate the dimensions in macroblock units
+               context->mv_buffer_width = (*width / context->macroblock_width);
+               context->mv_buffer_height = (*height / context->macroblock_height);
+
+               // Size of the motion vector buffer
+               context->mv_size =  context->mv_buffer_width * context->mv_buffer_height * sizeof(struct motion_vector_s);
+
+               // Allocate the motion vector buffers
+               context->former_vectors = mlt_pool_alloc( context->mv_size ); 
+               context->current_vectors = mlt_pool_alloc( context->mv_size ); 
+
+               // Register motion buffers for destruction
+               mlt_properties_set_data( properties, "current_motion_vectors", (void *)context->current_vectors, 0, mlt_pool_release, NULL );
+               mlt_properties_set_data( properties, "former_motion_vectors", (void *)context->former_vectors, 0, mlt_pool_release, NULL );
+
+
+               context->former_vectors_valid = 0;
+               memset( context->former_vectors, 0, context->mv_size );
+
+               // Figure out how many blocks should be considered edge blocks
+               context->edge_blocks_x = (context->limit_x + context->macroblock_width - 1) / context->macroblock_width;
+               context->edge_blocks_y = (context->limit_y + context->macroblock_height - 1) / context->macroblock_height;
+
+               // Calculate the size of our steps (the number of bytes that seperate adjacent pixels in X and Y direction)
+               switch( *format ) {
+                       case mlt_image_yuv422:
+                               if ( context->check_chroma )
+                                       context->xstride = 1;
+                               else
+                                       context->xstride = 2;
+                               context->ystride = context->xstride * *width;
+                               break; 
+/*                     case mlt_image_yuv420p:
+                               context->xstride = 1;
+                               context->ystride = context->xstride * *width;
+                               break;
+*/                     default:
+                               // I don't know
+                               fprintf(stderr, "\"I am unfamiliar with your new fangled pixel format!\" -filter_motion_est\n");
+                               return -1;
+               }
+
+               if ( context->check_chroma ) {
+                       // Allocate memory for the 444 images
+                       context->former_image.y = mlt_pool_alloc( *width * *height * 3 );
+                       context->current_image.y = mlt_pool_alloc( *width * *height * 3 );
+                       context->current_image.u = context->current_image.y + *width * *height;
+                       context->current_image.v = context->current_image.u + *width * *height;
+                       context->former_image.u = context->former_image.y + *width * *height;
+                       context->former_image.v = context->former_image.u + *width * *height;
+                       // Register for destruction
+                       mlt_properties_set_data( properties, "current_image", (void *)context->current_image.y, 0, mlt_pool_release, NULL );
+               }
+               else
+               {
+                       context->former_image.y = mlt_pool_alloc( *width * *height * 2 );
+               }
+               // Register for destruction
+               mlt_properties_set_data( properties, "former_image", (void *)context->former_image.y, 0, mlt_pool_release, NULL );
+
+
+               context->former_frame_position = context->current_frame_position;
+
+               context->initialized = 1;
+       }
+
+       /* Check to see if somebody else has given us bounds */
+       context->bounds = mlt_properties_get_data( MLT_FRAME_PROPERTIES( frame ), "bounds", NULL );
+
+       /* no bounds were given, they won't change next frame, so use a convient storage place */
+       if( context->bounds == NULL ) {
+               context->bounds = &context->prev_bounds;
+               context->bounds->x = 0;
+               context->bounds->y = 0;
+               context->bounds->w = *width - 1;        // Zero indexed
+               context->bounds->h = *height - 1;       // Zero indexed
+       }
+
+       // translate pixel units (from bounds) to macroblock units
+       // make sure whole macroblock stays within bounds
+       context->left_mb = (context->bounds->x + context->macroblock_width - 1) / context->macroblock_width;
+       context->top_mb = (context->bounds->y + context->macroblock_height - 1) / context->macroblock_height;
+       context->right_mb = (context->bounds->x + context->bounds->w - context->macroblock_width + 1) / context->macroblock_width;
+       context->bottom_mb = (context->bounds->y + context->bounds->h - context->macroblock_height + 1) / context->macroblock_height;
+
+       // Do the same thing for the previous frame's geometry
+       // This will be used for determining validity of predictors
+       context->prev_left_mb = (context->prev_bounds.x + context->macroblock_width - 1) / context->macroblock_width;
+       context->prev_top_mb = (context->prev_bounds.y + context->macroblock_height - 1) / context->macroblock_height;
+       context->prev_right_mb = (context->prev_bounds.x + context->prev_bounds.w - context->macroblock_width - 1)
+                                                                       / context->macroblock_width;
+       context->prev_bottom_mb = (context->prev_bounds.y + context->prev_bounds.h - context->macroblock_height - 1)
+                                                                       / context->macroblock_height;
+
+       
+       // If video is advancing, run motion vector algorithm and etc...        
+       if( context->former_frame_position + 1 == context->current_frame_position )
+       {
+               #ifdef BENCHMARK
+               struct timeval start; gettimeofday(&start, NULL );
+               #endif
+
+               // Swap the motion vector buffers and reuse allocated memory
+               struct motion_vector_s *temp = context->current_vectors;
+               context->current_vectors = context->former_vectors;
+               context->former_vectors = temp;
+
+               // Swap the image buffers
+               if ( context->check_chroma ) {
+                       uint8_t *temp_yuv;
+                       temp_yuv = context->current_image.y;
+                       context->current_image.y = context->former_image.y;
+                       context->former_image.y = temp_yuv;
+                       temp_yuv = context->current_image.u;
+                       context->current_image.u = context->former_image.u;
+                       context->former_image.u = temp_yuv;
+                       temp_yuv = context->current_image.v;
+                       context->current_image.v = context->former_image.v;
+                       context->former_image.v = temp_yuv;
+
+                       switch ( *format ) {
+                               case mlt_image_yuv422:
+                                       change_422_to_444_planar_rep( *image, context->current_image, context );
+                                       break;
+                               case mlt_image_yuv420p:
+                                       change_420p_to_444_planar_rep( *image, context->current_image, context );
+                                       break;
+                               default:
+                                       break;
+                       }
+               }
+               else
+                       context->current_image.y = *image;
+
+               // Find a better place for this
+               memset( context->current_vectors, 0, context->mv_size );
+
+               // Perform the motion search
+
+               //collect_pre_statistics( context, *image );
+               search( context->current_image, context->former_image, context );
+               collect_post_statistics( context );
+
+               #ifdef BENCHMARK
+               struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
+               fprintf(stderr, " in frame %d:%d usec\n", context->current_frame_position, difference);
+               #endif
+
+
+
+               // Detect shot changes
+               if( context->comparison_average > 12 * context->macroblock_width * context->macroblock_height ) {
+                       //fprintf(stderr, " - SAD: %d   <<Shot change>>\n", context->comparison_average);
+                       mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1);
+               //      context->former_vectors_valid = 0; // Invalidate the previous frame's predictors
+                       context->shot_change = 1;
+               }
+               else {
+                       context->former_vectors_valid = 1;
+                       context->shot_change = 0;
+                       //fprintf(stderr, " - SAD: %d\n", context->comparison_average);
+               }
+
+               if( context->comparison_average != 0 ) {
+                       // Pass the new vector data into the frame
+                       mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
+                                        (void*)context->current_vectors, context->mv_size, NULL, NULL );
+
+               }
+               else {
+                       // This fixes the ugliness caused by a duplicate frame
+                       temp = context->current_vectors;
+                       context->current_vectors = context->former_vectors;
+                       context->former_vectors = temp;
+                       mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
+                                        (void*)context->former_vectors, context->mv_size, NULL, NULL );
+               }
+
+       }
+       // paused
+       else if( context->former_frame_position == context->current_frame_position )
+       {
+               // Pass the old vector data into the frame if it's valid
+               if( context->former_vectors_valid == 1 )
+                       mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
+                                        (void*)context->current_vectors, context->mv_size, NULL, NULL );
+
+               mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", context->shot_change);
+       }
+       // there was jump in frame number
+       else
+               context->former_vectors_valid = 0;
+
+
+       // Cache our bounding geometry for the next frame's processing
+       if( context->bounds != &context->prev_bounds )
+               memcpy( &context->prev_bounds, context->bounds, sizeof( struct mlt_geometry_item_s ) );
+
+       // Remember which frame this is
+       context->former_frame_position = context->current_frame_position;
+
+       if ( context->check_chroma == 0 )
+               memcpy( context->former_image.y, *image, *width * *height * context->xstride );
+
+       mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_width", context->macroblock_width );
+       mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_height", context->macroblock_height );
+
+       return error;
+}
+
+
+
+/** filter processing.
+*/
+
+static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
+{
+
+       // Keeps tabs on the filter object
+       mlt_frame_push_service( frame, this);
+
+       // Push the frame filter
+       mlt_frame_push_get_image( frame, filter_get_image );
+
+       return frame;
+}
+
+/** Constructor for the filter.
+*/
+mlt_filter filter_motion_est_init( char *arg )
+{
+       mlt_filter this = mlt_filter_new( );
+       if ( this != NULL )
+       {
+               // Get the properties object
+               mlt_properties properties = MLT_FILTER_PROPERTIES( this );
+
+               // Initialize the motion estimation context
+               struct motion_est_context_s *context;
+               context = mlt_pool_alloc( sizeof(struct motion_est_context_s) );
+               mlt_properties_set_data( properties, "context", (void *)context, sizeof( struct motion_est_context_s ),
+                                       mlt_pool_release, NULL );
+
+
+               // Register the filter
+               this->process = filter_process;
+
+               /* defaults that may be overridden */
+               context->macroblock_width = 16;
+               context->macroblock_height = 16;
+               context->skip_prediction = 0;
+               context->limit_x = 64;
+               context->limit_y = 64;
+               context->search_method = DIAMOND_SEARCH;
+               context->check_chroma = 0;
+
+               /* reference functions that may have optimized versions */
+               context->compare_reference = sad_reference;
+               context->vert_deviation_reference = vertical_gradient_reference;
+               context->horiz_deviation_reference = horizontal_gradient_reference;
+
+               // The rest of the buffers will be initialized when the filter is first processed
+               context->initialized = 0;
+       }
+       return this;
+}
+
+/** This source code will self destruct in 5...4...3... */
diff --git a/src/modules/motion_est/filter_motion_est.h b/src/modules/motion_est/filter_motion_est.h
new file mode 100644 (file)
index 0000000..75aa029
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Perform motion estimation
+ * Zachary K Drew, Copyright 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _FILTER_MOTION_EST_H_
+#define _FILTER_MOTION_EST_H_
+
+#include <framework/mlt_filter.h>
+
+extern mlt_filter filter_motion_est_init( char *arg );
+
+#define MAX_MSAD 0xffff
+
+struct motion_vector_s
+{
+       int msad;       //<! Sum of Absolute Differences for this vector
+       int dx;         //<! integer X displacement
+       int dy;         //<! integer Y displacement
+       int vert_dev;   //<! a measure of vertical color deviation
+       int horiz_dev;  //<! a measure of horizontal color deviation
+       int valid;
+       int quality;
+       uint8_t color;  //<! The color
+};
+
+#endif
diff --git a/src/modules/motion_est/filter_vismv.c b/src/modules/motion_est/filter_vismv.c
new file mode 100644 (file)
index 0000000..4bfd5df
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ *     /brief Draw motion vectors
+ *     /author Zachary Drew, Copyright 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "filter_motion_est.h"
+#include "arrow_code.h"
+
+#include <framework/mlt_frame.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+
+static void paint_arrows( uint8_t *image, struct motion_vector_s *vectors, int w, int h, int mb_w, int mb_h )
+{
+       int i, j, x, y;
+       struct motion_vector_s *p;
+       for( i = 0; i < w/mb_w; i++ ){
+               for( j = 0; j < h/mb_h; j++ ){
+                       x = i*mb_w;
+                       y = j*mb_h;
+                       p = vectors + (w/mb_w)*j + i;
+#if 0
+                       if( p->color == 0 )
+                               continue;
+                       if( p->quality > 10 ){
+                               draw_line(image, x, y, x + mb_w, y, 100);
+                               draw_line(image, x, y, x, y + mb_h, 100);
+                               draw_line(image, x + mb_w, y, x + mb_w, y + mb_h, 100);
+                               draw_line(image, x + mb_w, y + mb_h, x, y + mb_w, 100);
+                       }
+                       else if ( p->color == 18 ) {
+                               draw_line(image, x, y, x + mb_w, y + mb_h, 100);
+                               draw_line(image, x, y + mb_h, x + mb_w, y, 100);
+                               continue;
+                       }
+                       else if( p->vert_dev < 150 ){
+                               x += mb_w/2;
+                               draw_line(image, x, y, x, y + mb_h, 100);
+                               continue;
+                       }
+                       else if( p->horiz_dev < 150 ){
+                               y += mb_w/2;
+                               draw_line(image, x, y, x+mb_w, y, 100);
+                               continue;
+                       }
+                       else
+#endif
+                       /*if ( p->valid == 1 ){
+                               x += mb_w/2;
+                               y += mb_h/2;
+                               draw_arrow(image, x + p->dx, y + p->dy, x, y, 100);
+                       } else */
+                       if ( p->valid == 3 ) {
+                               draw_rectangle_fill(image, x, y, mb_w, mb_h,0);
+                       }
+                       if ( p->valid == 1 ) {
+                               //draw_rectangle_outline(image, x, y, mb_w, mb_h,100);
+                               //x += mb_w/4;
+                               //y += mb_h/4;
+                               //draw_rectangle_outline(image, x + p->dx, y + p->dy, mb_w, mb_h,100);
+                               x += mb_w/2;
+                               y += mb_h/2;
+                               draw_arrow(image, x, y, x + p->dx, y + p->dy, 100);
+                               //draw_rectangle_fill(image, x + p->dx, y + p->dy, mb_w, mb_h, 100);
+                       }
+               }
+       }
+       //if (count > 300)
+       //      fprintf(stderr, "%d mbs above %d\n", count, mb_w * mb_h * 12);
+}
+
+#if 0
+static void paint_mbs( uint8_t *image, struct motion_vector_s *vectors, int w, int h, int mb_w, int mb_h, int xstep, int ystep )
+{
+       int i, j, x, y;
+       struct motion_vector_s *p;
+       for( i = 0; i < w/mb_w; i++ ){
+               for( j = 0; j < h/mb_h; j++ ){
+                       x = i * mb_w;
+                       y = j * mb_h;
+                       p = vectors + (w/mb_w)*j + i;
+                       if( p->color == 0 )
+                               continue;
+                       draw_line(image, x, y, x + mb_w, y, 100);
+                       draw_line(image, x, y, x, y + mb_h, 100);
+                       draw_line(image, x + mb_w, y, x + mb_w, y + mb_h, 100);
+                       draw_line(image, x + mb_w, y + mb_h, x, y + mb_w, 100);
+               }
+       }
+}
+#endif
+
+// Image stack(able) method
+static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
+{
+       // Get the frame properties
+       mlt_properties properties = MLT_FRAME_PROPERTIES(frame);
+
+       // Get the new image
+       int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
+
+       if( error != 0 )
+               mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image()", stderr );
+
+
+       // Get the size of macroblocks in pixel units
+       int macroblock_height = mlt_properties_get_int( properties, "motion_est.macroblock_height" );
+       int macroblock_width = mlt_properties_get_int( properties, "motion_est.macroblock_width" );
+
+       // Get the motion vectors
+       struct motion_vectors_s *current_vectors = mlt_properties_get_data( properties, "motion_est.vectors", NULL );
+
+       init_arrows( format, *width, *height );
+
+       if ( mlt_properties_get_int( properties, "shot_change" ) == 1 )
+       {
+               draw_line(*image, 0, 0, *width, *height, 100);
+               draw_line(*image, 0, *height, *width, 0, 100);
+       }
+       if( current_vectors != NULL ) {
+               paint_arrows( *image, current_vectors, *width, *height, macroblock_width, macroblock_height);
+               //paint_mbs( *image, current_vectors, *width, *height, macroblock_width, macroblock_height, xstep, ystep);
+       }
+
+       return error;
+}
+
+
+
+/** Filter processing.
+*/
+
+static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
+{
+       // Push the frame filter
+       mlt_frame_push_get_image( frame, filter_get_image );
+
+
+       return frame;
+}
+
+/** Constructor for the filter.
+*/
+
+
+mlt_filter filter_vismv_init( char *arg )
+{
+       mlt_filter this = mlt_filter_new( );
+       if ( this != NULL )
+       {
+               this->process = filter_process;
+
+       }
+
+       return this;
+}
+
+/** This source code will self destruct in 5...4...3...
+*/
+
+
+
+
+
+
+
+
diff --git a/src/modules/motion_est/sad_sse.h b/src/modules/motion_est/sad_sse.h
new file mode 100644 (file)
index 0000000..955a6f8
--- /dev/null
@@ -0,0 +1,429 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+
+
+#define SAD_SSE_INIT \
+       asm volatile ( "pxor %%mm6,%%mm6\n\t" ::  );\
+
+// Sum two 8x1 pixel blocks
+#define SAD_SSE_SUM_8(OFFSET) \
+                       "movq " #OFFSET "(%0),%%mm0             \n\t"\
+                       "movq " #OFFSET "(%1),%%mm1             \n\t"\
+                       "psadbw %%mm1,%%mm0                     \n\t"\
+                       "paddw %%mm0,%%mm6                      \n\t"\
+
+#define SAD_SSE_FINISH(RESULT) \
+       asm volatile( "movd %%mm6,%0" : "=r" (RESULT) : );
+
+// Advance by ystride
+#define SAD_SSE_NEXTROW \
+                       "add %2,%0                              \n\t"\
+                       "add %2,%1                              \n\t"\
+
+// BROKEN!
+inline static int sad_sse_4x4( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_INIT
+       #define ROW     SAD_SSE_SUM_8(0) SAD_SSE_NEXTROW
+       asm volatile (  ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_8x8( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_INIT
+       #define ROW     SAD_SSE_SUM_8(0) SAD_SSE_NEXTROW
+       asm volatile (  ROW ROW ROW ROW ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_16x16( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_INIT
+       #define ROW     SAD_SSE_SUM_8(0) SAD_SSE_SUM_8(8) SAD_SSE_NEXTROW
+       asm volatile (  ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_32x32( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_INIT
+       #define ROW     SAD_SSE_SUM_8(0) SAD_SSE_SUM_8(8) SAD_SSE_SUM_8(16) SAD_SSE_SUM_8(24)\
+                       SAD_SSE_NEXTROW
+
+       asm volatile (  ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+// BROKEN!
+inline static int sad_sse_4w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_SUM_8(0)
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_8w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_SUM_8(0)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_16w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_SUM_8(0)
+                       SAD_SSE_SUM_8(8)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_32w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_SUM_8(0)
+                       SAD_SSE_SUM_8(8)
+                       SAD_SSE_SUM_8(16)
+                       SAD_SSE_SUM_8(24)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_64w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_SUM_8(0)
+                       SAD_SSE_SUM_8(8)
+                       SAD_SSE_SUM_8(16)
+                       SAD_SSE_SUM_8(24)
+                       SAD_SSE_SUM_8(32)
+                       SAD_SSE_SUM_8(40)
+                       SAD_SSE_SUM_8(48)
+                       SAD_SSE_SUM_8(56)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+static __attribute__((used)) __attribute__((aligned(8))) uint64_t sad_sse_422_mask_chroma = 0x00ff00ff00ff00ffULL;
+
+#define SAD_SSE_422_LUMA_INIT \
+       asm volatile (  "movq sad_sse_422_mask_chroma,%%mm7\n\t"\
+                       "pxor %%mm6,%%mm6\n\t" ::  );\
+
+// Sum two 4x1 pixel blocks
+#define SAD_SSE_422_LUMA_SUM_4(OFFSET) \
+                       "movq " #OFFSET "(%0),%%mm0             \n\t"\
+                       "movq " #OFFSET "(%1),%%mm1             \n\t"\
+                       "pand %%mm7,%%mm0                       \n\t"\
+                       "pand %%mm7,%%mm1                       \n\t"\
+                       "psadbw %%mm1,%%mm0                     \n\t"\
+                       "paddw %%mm0,%%mm6                      \n\t"\
+
+inline static int sad_sse_422_luma_4x4( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_422_LUMA_INIT
+       #define ROW     SAD_SSE_422_LUMA_SUM_4(0) SAD_SSE_NEXTROW
+       asm volatile (  ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_422_luma_8x8( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_422_LUMA_INIT
+       #define ROW     SAD_SSE_422_LUMA_SUM_4(0) SAD_SSE_422_LUMA_SUM_4(8) SAD_SSE_NEXTROW
+       asm volatile (  ROW ROW ROW ROW ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_422_luma_16x16( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_422_LUMA_INIT
+       #define ROW     SAD_SSE_422_LUMA_SUM_4(0) SAD_SSE_422_LUMA_SUM_4(8) SAD_SSE_422_LUMA_SUM_4(16) SAD_SSE_422_LUMA_SUM_4(24) SAD_SSE_NEXTROW
+       asm volatile (  ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_422_luma_32x32( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+       SAD_SSE_422_LUMA_INIT
+       #define ROW     SAD_SSE_422_LUMA_SUM_4(0) SAD_SSE_422_LUMA_SUM_4(8) SAD_SSE_422_LUMA_SUM_4(16) SAD_SSE_422_LUMA_SUM_4(24)\
+                       SAD_SSE_422_LUMA_SUM_4(32) SAD_SSE_422_LUMA_SUM_4(40) SAD_SSE_422_LUMA_SUM_4(48) SAD_SSE_422_LUMA_SUM_4(56)\
+                       SAD_SSE_NEXTROW
+
+       asm volatile (  ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       ROW ROW ROW ROW ROW ROW ROW ROW
+                       :: "r" (block1), "r" (block2), "r" (ystride));
+       
+       SAD_SSE_FINISH(result)
+       return result;
+       #undef ROW
+
+}
+
+inline static int sad_sse_422_luma_4w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_422_LUMA_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_422_LUMA_SUM_4(0)
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_422_luma_8w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_422_LUMA_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_422_LUMA_SUM_4(0)
+                       SAD_SSE_422_LUMA_SUM_4(8)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_422_luma_16w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_422_LUMA_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_422_LUMA_SUM_4(0)
+                       SAD_SSE_422_LUMA_SUM_4(8)
+                       SAD_SSE_422_LUMA_SUM_4(16)
+                       SAD_SSE_422_LUMA_SUM_4(24)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_422_luma_32w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_422_LUMA_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_422_LUMA_SUM_4(0)
+                       SAD_SSE_422_LUMA_SUM_4(8)
+                       SAD_SSE_422_LUMA_SUM_4(16)
+                       SAD_SSE_422_LUMA_SUM_4(24)
+                       SAD_SSE_422_LUMA_SUM_4(32)
+                       SAD_SSE_422_LUMA_SUM_4(40)
+                       SAD_SSE_422_LUMA_SUM_4(48)
+                       SAD_SSE_422_LUMA_SUM_4(56)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+
+}
+
+inline static int sad_sse_422_luma_64w( uint8_t *block1, uint8_t *block2, int xstride, int ystride, int w, int h )
+{
+       int result; 
+
+       SAD_SSE_422_LUMA_INIT
+
+       while( h != 0 ) {
+               asm volatile (
+                       SAD_SSE_422_LUMA_SUM_4(0)
+                       SAD_SSE_422_LUMA_SUM_4(8)
+                       SAD_SSE_422_LUMA_SUM_4(16)
+                       SAD_SSE_422_LUMA_SUM_4(24)
+                       SAD_SSE_422_LUMA_SUM_4(32)
+                       SAD_SSE_422_LUMA_SUM_4(40)
+                       SAD_SSE_422_LUMA_SUM_4(48)
+                       SAD_SSE_422_LUMA_SUM_4(56)
+                       SAD_SSE_422_LUMA_SUM_4(64)
+                       SAD_SSE_422_LUMA_SUM_4(72)
+                       SAD_SSE_422_LUMA_SUM_4(80)
+                       SAD_SSE_422_LUMA_SUM_4(88)
+                       SAD_SSE_422_LUMA_SUM_4(96)
+                       SAD_SSE_422_LUMA_SUM_4(104)
+                       SAD_SSE_422_LUMA_SUM_4(112)
+                       SAD_SSE_422_LUMA_SUM_4(120)
+
+                       :: "r" (block1), "r" (block2)
+               );
+       
+               h--;
+               block1 += ystride;
+               block2 += ystride;
+       }
+       SAD_SSE_FINISH(result)
+       return result;
+}