From: lilo_booter Date: Thu, 15 Apr 2004 20:08:52 +0000 (+0000) Subject: LGPL deinterlace and resampler X-Git-Url: http://research.m1stereo.tv/gitweb?a=commitdiff_plain;h=1f8135566477df29f2b62528f9bf18359c0ab971;p=melted LGPL deinterlace and resampler git-svn-id: https://mlt.svn.sourceforge.net/svnroot/mlt/trunk/mlt@279 d19143bc-622f-0410-bfdd-b5b2a6649095 --- diff --git a/src/modules/avformat/Makefile b/src/modules/avformat/Makefile index 296fc6c..e258004 100644 --- a/src/modules/avformat/Makefile +++ b/src/modules/avformat/Makefile @@ -4,7 +4,9 @@ TARGET = ../libmltavformat.so OBJS = factory.o \ producer_avformat.o \ - consumer_avformat.o + consumer_avformat.o \ + filter_avdeinterlace.o \ + filter_avresample.o CFLAGS += -I../.. diff --git a/src/modules/avformat/configure b/src/modules/avformat/configure index f3dd31a..cbecb5d 100755 --- a/src/modules/avformat/configure +++ b/src/modules/avformat/configure @@ -7,6 +7,11 @@ cat << EOF >> ../producers.dat avformat libmltavformat.so EOF +cat << EOF >> ../filters.dat +avdeinterlace libmltavformat.so +avresample libmltavformat.so +EOF + cat << EOF >> ../consumers.dat avformat libmltavformat.so EOF diff --git a/src/modules/avformat/factory.c b/src/modules/avformat/factory.c index bf01e95..a9de9e0 100644 --- a/src/modules/avformat/factory.c +++ b/src/modules/avformat/factory.c @@ -24,6 +24,8 @@ #include #include "producer_avformat.h" #include "consumer_avformat.h" +#include "filter_avdeinterlace.h" +#include "filter_avresample.h" // ffmpeg Header files #include @@ -97,6 +99,11 @@ void *mlt_create_producer( char *id, void *arg ) void *mlt_create_filter( char *id, void *arg ) { + avformat_init( ); + if ( !strcmp( id, "avdeinterlace" ) ) + return filter_avdeinterlace_init( arg ); + if ( !strcmp( id, "avresample" ) ) + return filter_avresample_init( arg ); return NULL; } diff --git a/src/modules/avformat/filter_avdeinterlace.c b/src/modules/avformat/filter_avdeinterlace.c new file mode 100644 index 0000000..f1d1a3f --- /dev/null +++ b/src/modules/avformat/filter_avdeinterlace.c @@ -0,0 +1,352 @@ +/* + * filter_avdeinterlace.c -- deinterlace filter + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Charles Yates + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include "filter_avdeinterlace.h" + +#include + +#include +#include + +// ffmpeg Header files +#include + +#ifdef USE_MMX +#include "mmx.h" +#endif + +#ifdef USE_MMX +#define DEINT_INPLACE_LINE_LUM \ + movd_m2r(lum_m4[0],mm0);\ + movd_m2r(lum_m3[0],mm1);\ + movd_m2r(lum_m2[0],mm2);\ + movd_m2r(lum_m1[0],mm3);\ + movd_m2r(lum[0],mm4);\ + punpcklbw_r2r(mm7,mm0);\ + movd_r2m(mm2,lum_m4[0]);\ + punpcklbw_r2r(mm7,mm1);\ + punpcklbw_r2r(mm7,mm2);\ + punpcklbw_r2r(mm7,mm3);\ + punpcklbw_r2r(mm7,mm4);\ + paddw_r2r(mm3,mm1);\ + psllw_i2r(1,mm2);\ + paddw_r2r(mm4,mm0);\ + psllw_i2r(2,mm1);\ + paddw_r2r(mm6,mm2);\ + paddw_r2r(mm2,mm1);\ + psubusw_r2r(mm0,mm1);\ + psrlw_i2r(3,mm1);\ + packuswb_r2r(mm7,mm1);\ + movd_r2m(mm1,lum_m2[0]); + +#define DEINT_LINE_LUM \ + movd_m2r(lum_m4[0],mm0);\ + movd_m2r(lum_m3[0],mm1);\ + movd_m2r(lum_m2[0],mm2);\ + movd_m2r(lum_m1[0],mm3);\ + movd_m2r(lum[0],mm4);\ + punpcklbw_r2r(mm7,mm0);\ + punpcklbw_r2r(mm7,mm1);\ + punpcklbw_r2r(mm7,mm2);\ + punpcklbw_r2r(mm7,mm3);\ + punpcklbw_r2r(mm7,mm4);\ + paddw_r2r(mm3,mm1);\ + psllw_i2r(1,mm2);\ + paddw_r2r(mm4,mm0);\ + psllw_i2r(2,mm1);\ + paddw_r2r(mm6,mm2);\ + paddw_r2r(mm2,mm1);\ + psubusw_r2r(mm0,mm1);\ + psrlw_i2r(3,mm1);\ + packuswb_r2r(mm7,mm1);\ + movd_r2m(mm1,dst[0]); +#endif + +/* filter parameters: [-1 4 2 4 -1] // 8 */ +static void deinterlace_line(uint8_t *dst, + const uint8_t *lum_m4, const uint8_t *lum_m3, + const uint8_t *lum_m2, const uint8_t *lum_m1, + const uint8_t *lum, + int size) +{ +#ifndef USE_MMX + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int sum; + + for(;size > 0;size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + sum += lum_m1[0] << 2; + sum += -lum[0]; + dst[0] = cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + dst++; + } +#else + + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } + for (;size > 3; size-=4) { + DEINT_LINE_LUM + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + dst+=4; + } +#endif +} +static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum, + int size) +{ +#ifndef USE_MMX + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int sum; + + for(;size > 0;size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + lum_m4[0]=lum_m2[0]; + sum += lum_m1[0] << 2; + sum += -lum[0]; + lum_m2[0] = cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + } +#else + + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } + for (;size > 3; size-=4) { + DEINT_INPLACE_LINE_LUM + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + } +#endif +} + +/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The + top field is copied as is, but the bottom field is deinterlaced + against the top field. */ +static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, + const uint8_t *src1, int src_wrap, + int width, int height) +{ + const uint8_t *src_m2, *src_m1, *src_0, *src_p1, *src_p2; + int y; + + src_m2 = src1; + src_m1 = src1; + src_0=&src_m1[src_wrap]; + src_p1=&src_0[src_wrap]; + src_p2=&src_p1[src_wrap]; + for(y=0;y<(height-2);y+=2) { + memcpy(dst,src_m1,width); + dst += dst_wrap; + deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width); + src_m2 = src_0; + src_m1 = src_p1; + src_0 = src_p2; + src_p1 += 2*src_wrap; + src_p2 += 2*src_wrap; + dst += dst_wrap; + } + memcpy(dst,src_m1,width); + dst += dst_wrap; + /* do last line */ + deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width); +} + +static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap, + int width, int height) +{ + uint8_t *src_m1, *src_0, *src_p1, *src_p2; + int y; + uint8_t *buf; + buf = (uint8_t*)av_malloc(width); + + src_m1 = src1; + memcpy(buf,src_m1,width); + src_0=&src_m1[src_wrap]; + src_p1=&src_0[src_wrap]; + src_p2=&src_p1[src_wrap]; + for(y=0;y<(height-2);y+=2) { + deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width); + src_m1 = src_p1; + src_0 = src_p2; + src_p1 += 2*src_wrap; + src_p2 += 2*src_wrap; + } + /* do last line */ + deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width); + av_free(buf); +} + + +/* deinterlace - if not supported return -1 */ +static int mlt_avpicture_deinterlace(AVPicture *dst, const AVPicture *src, + int pix_fmt, int width, int height) +{ + int i; + + if (pix_fmt != PIX_FMT_YUV420P && + pix_fmt != PIX_FMT_YUV422P && + pix_fmt != PIX_FMT_YUV422 && + pix_fmt != PIX_FMT_YUV444P && + pix_fmt != PIX_FMT_YUV411P) + return -1; + if ((width & 3) != 0 || (height & 3) != 0) + return -1; + + if ( pix_fmt != PIX_FMT_YUV422 ) + { + for(i=0;i<3;i++) { + if (i == 1) { + switch(pix_fmt) { + case PIX_FMT_YUV420P: + width >>= 1; + height >>= 1; + break; + case PIX_FMT_YUV422P: + width >>= 1; + break; + case PIX_FMT_YUV411P: + width >>= 2; + break; + default: + break; + } + } + if (src == dst) { + deinterlace_bottom_field_inplace(dst->data[i], dst->linesize[i], + width, height); + } else { + deinterlace_bottom_field(dst->data[i],dst->linesize[i], + src->data[i], src->linesize[i], + width, height); + } + } + } + else { + if (src == dst) { + deinterlace_bottom_field_inplace(dst->data[0], dst->linesize[0], + width, height); + } else { + deinterlace_bottom_field(dst->data[0],dst->linesize[0], + src->data[0], src->linesize[0], + width, height); + } + } + +#ifdef USE_MMX + emms(); +#endif + return 0; +} + +/** Do it :-). +*/ + +static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable ) +{ + int error = 0; + + // Check that we want progressive and we aren't already progressive + if ( *format == mlt_image_yuv422 && + !mlt_properties_get_int( mlt_frame_properties( this ), "progressive" ) && + mlt_properties_get_int( mlt_frame_properties( this ), "consumer_deinterlace" ) ) + { + // Create a picture + AVPicture *output = mlt_pool_alloc( sizeof( AVPicture ) ); + + // Get the input image + error = mlt_frame_get_image( this, image, format, width, height, 1 ); + + // Fill the picture + if ( *format == mlt_image_yuv422 ) + { + avpicture_fill( output, *image, PIX_FMT_YUV422, *width, *height ); + mlt_avpicture_deinterlace( output, output, PIX_FMT_YUV422, *width, *height ); + } + + // Free the picture + mlt_pool_release( output ); + + // Make sure that others know the frame is deinterlaced + mlt_properties_set_int( mlt_frame_properties( this ), "progressive", 1 ); + } + else + { + // Get the input image + error = mlt_frame_get_image( this, image, format, width, height, writable ); + } + + return error; +} + +/** Deinterlace filter processing - this should be lazy evaluation here... +*/ + +static mlt_frame deinterlace_process( mlt_filter this, mlt_frame frame ) +{ + // Push the get_image method on to the stack + mlt_frame_push_get_image( frame, filter_get_image ); + + return frame; +} + +/** Constructor for the filter. +*/ + +mlt_filter filter_avdeinterlace_init( void *arg ) +{ + mlt_filter this = mlt_filter_new( ); + if ( this != NULL ) + this->process = deinterlace_process; + return this; +} + diff --git a/src/modules/avformat/filter_avdeinterlace.h b/src/modules/avformat/filter_avdeinterlace.h new file mode 100644 index 0000000..dc5f9ef --- /dev/null +++ b/src/modules/avformat/filter_avdeinterlace.h @@ -0,0 +1,28 @@ +/* + * filter_avdeinterlace.h -- deinterlace filter + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Charles Yates + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _FILTER_AVDEINTERLACE_H_ +#define _FILTER_AVDEINTERLACE_H_ + +#include + +extern mlt_filter filter_avdeinterlace_init( void *arg ); + +#endif diff --git a/src/modules/avformat/filter_avresample.c b/src/modules/avformat/filter_avresample.c new file mode 100644 index 0000000..dce42ee --- /dev/null +++ b/src/modules/avformat/filter_avresample.c @@ -0,0 +1,203 @@ +/* + * filter_avresample.c -- adjust audio sample frequency + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Charles Yates + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include "filter_avresample.h" + +#include + +#include +#include +#include + +// ffmpeg Header files +#include + +/** Get the audio. +*/ + +static int resample_get_audio( mlt_frame frame, int16_t **buffer, mlt_audio_format *format, int *frequency, int *channels, int *samples ) +{ + // Get the properties of the frame + mlt_properties properties = mlt_frame_properties( frame ); + + // Get the filter service + mlt_filter filter = mlt_frame_pop_audio( frame ); + + // Get the filter properties + mlt_properties filter_properties = mlt_filter_properties( filter ); + + // Get the resample information + int output_rate = mlt_properties_get_int( filter_properties, "frequency" ); + int16_t *sample_buffer = mlt_properties_get_data( filter_properties, "buffer", NULL ); + + // Obtain the resample context if it exists + ReSampleContext *resample = mlt_properties_get_data( filter_properties, "audio_resample", NULL ); + + // Used to return number of channels in the source + int channels_avail = *channels; + + // Loop variable + int i; + + // If no resample frequency is specified, default to requested value + if ( output_rate == 0 ) + output_rate = *frequency; + + // Restore the original get_audio + frame->get_audio = mlt_frame_pop_audio( frame ); + + // Get the producer's audio + mlt_frame_get_audio( frame, buffer, format, frequency, &channels_avail, samples ); + + // Duplicate channels as necessary + if ( channels_avail < *channels ) + { + int size = *channels * *samples * sizeof( int16_t ); + int16_t *new_buffer = mlt_pool_alloc( size ); + int j, k = 0; + + // Duplicate the existing channels + for ( i = 0; i < *samples; i++ ) + { + for ( j = 0; j < *channels; j++ ) + { + new_buffer[ ( i * *channels ) + j ] = (*buffer)[ ( i * channels_avail ) + k ]; + k = ( k + 1 ) % channels_avail; + } + } + + // Update the audio buffer now - destroys the old + mlt_properties_set_data( properties, "audio", new_buffer, size, ( mlt_destructor )mlt_pool_release, NULL ); + + *buffer = new_buffer; + } + else if ( channels_avail == 6 && *channels == 2 ) + { + // Nasty hack for ac3 5.1 audio - may be a cause of failure? + int size = *channels * *samples * sizeof( int16_t ); + int16_t *new_buffer = mlt_pool_alloc( size ); + + // Drop all but the first *channels + for ( i = 0; i < *samples; i++ ) + { + new_buffer[ ( i * *channels ) + 0 ] = (*buffer)[ ( i * channels_avail ) + 2 ]; + new_buffer[ ( i * *channels ) + 1 ] = (*buffer)[ ( i * channels_avail ) + 3 ]; + } + + // Update the audio buffer now - destroys the old + mlt_properties_set_data( properties, "audio", new_buffer, size, ( mlt_destructor )mlt_pool_release, NULL ); + + *buffer = new_buffer; + } + + // Return now if no work to do + if ( output_rate != *frequency ) + { + // Will store number of samples created + int used = 0; + + // Create a resampler if nececessary + if ( resample == NULL || *frequency != mlt_properties_get_int( filter_properties, "last_frequency" ) ) + { + // Create the resampler + resample = audio_resample_init( *channels, *channels, output_rate, *frequency ); + + // And store it on properties + mlt_properties_set_data( filter_properties, "audio_resample", resample, 0, ( mlt_destructor )audio_resample_close, NULL ); + + // And remember what it was created for + mlt_properties_set_int( filter_properties, "last_frequency", *frequency ); + } + + // Resample the audio + used = audio_resample( resample, sample_buffer, *buffer, *samples ); + + // Resize if necessary + if ( used > *samples ) + { + *buffer = mlt_pool_realloc( *buffer, *samples * *channels * sizeof( int16_t ) ); + mlt_properties_set_data( properties, "audio", *buffer, *channels * used * sizeof( int16_t ), mlt_pool_release, NULL ); + } + + // Copy samples + memcpy( *buffer, sample_buffer, *channels * used * sizeof( int16_t ) ); + + // Update output variables + *samples = used; + *frequency = output_rate; + } + + return 0; +} + +/** Filter processing. +*/ + +static mlt_frame filter_process( mlt_filter this, mlt_frame frame ) +{ + // Only call this if we have a means to get audio + if ( frame->get_audio != NULL ) + { + // Push the original method on to the stack + mlt_frame_push_audio( frame, frame->get_audio ); + + // Push the filter on to the stack + mlt_frame_push_audio( frame, this ); + + // Assign our get_audio method + frame->get_audio = resample_get_audio; + } + + return frame; +} + +/** Constructor for the filter. +*/ + +mlt_filter filter_avresample_init( char *arg ) +{ + // Create a filter + mlt_filter this = mlt_filter_new( ); + + // Initialise if successful + if ( this != NULL ) + { + // Calculate size of the buffer + int size = AVCODEC_MAX_AUDIO_FRAME_SIZE * sizeof( int16_t ); + + // Allocate the buffer + int16_t *buffer = mlt_pool_alloc( size ); + + // Assign the process method + this->process = filter_process; + + // Deal with argument + if ( arg != NULL ) + mlt_properties_set( mlt_filter_properties( this ), "frequency", arg ); + + // Default to 2 channel output + mlt_properties_set_int( mlt_filter_properties( this ), "channels", 2 ); + + // Store the buffer + mlt_properties_set_data( mlt_filter_properties( this ), "buffer", buffer, size, mlt_pool_release, NULL ); + } + + return this; +} diff --git a/src/modules/avformat/filter_avresample.h b/src/modules/avformat/filter_avresample.h new file mode 100644 index 0000000..c68ced2 --- /dev/null +++ b/src/modules/avformat/filter_avresample.h @@ -0,0 +1,28 @@ +/* + * filter_avresample.h -- adjust audio sample frequency + * Copyright (C) 2003-2004 Ushodaya Enterprises Limited + * Author: Charles Yates + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _FILTER_AVRESAMPLE_H_ +#define _FILTER_AVRESAMPLE_H_ + +#include + +extern mlt_filter filter_avresample_init( char *arg ); + +#endif diff --git a/src/modules/avformat/mmx.h b/src/modules/avformat/mmx.h new file mode 100644 index 0000000..7e94cfd --- /dev/null +++ b/src/modules/avformat/mmx.h @@ -0,0 +1,243 @@ +/* + * mmx.h + * Copyright (C) 1997-2001 H. Dietz and R. Fisher + */ +#ifndef AVCODEC_I386MMX_H +#define AVCODEC_I386MMX_H + +/* + * The type of an value that fits in an MMX register (note that long + * long constant values MUST be suffixed by LL and unsigned long long + * values by ULL, lest they be truncated by the compiler) + */ + +typedef union { + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} mmx_t; /* On an 8-byte (64-bit) boundary */ + + +#define mmx_i2r(op,imm,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_m2r(op,mem,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op,reg,mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op,regs,regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + + +#define emms() __asm__ __volatile__ ("emms") + +#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) +#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) +#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) + +#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) +#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) +#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) + +#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) +#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) +#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) +#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) + +#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) +#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) + +#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) +#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) +#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) +#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) +#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) +#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) + +#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) +#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) +#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) +#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) + +#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) +#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) +#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) +#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) + +#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) +#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) + +#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) +#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) + +#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) +#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) +#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) + +#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) +#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) +#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) + +#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) +#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) + +#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) +#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) + +#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) +#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) + +#define por_m2r(var,reg) mmx_m2r (por, var, reg) +#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) + +#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) +#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) +#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) +#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) +#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) +#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) +#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) +#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) +#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) + +#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) +#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) +#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) +#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) +#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) +#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) + +#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) +#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) +#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) +#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) +#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) +#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) +#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) +#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) +#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) + +#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) +#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) +#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) +#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) +#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) +#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) + +#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) +#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) +#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) +#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) + +#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) +#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) +#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) +#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) + +#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) +#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) +#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) +#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) +#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) +#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) + +#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) +#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) +#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) +#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) +#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) +#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) + +#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) +#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) + + +/* 3DNOW extensions */ + +#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) +#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) + + +/* AMD MMX extensions - also available in intel SSE */ + + +#define mmx_m2ri(op,mem,reg,imm) \ + __asm__ __volatile__ (#op " %1, %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem), "X" (imm)) +#define mmx_r2ri(op,regs,regd,imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "X" (imm) ) + +#define mmx_fetch(mem,hint) \ + __asm__ __volatile__ ("prefetch" #hint " %0" \ + : /* nothing */ \ + : "X" (mem)) + + +#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) + +#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) + +#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) +#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) +#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) +#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) + +#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) + +#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) + +#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) +#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) + +#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) +#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) + +#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) +#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) + +#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) +#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) + +#define pmovmskb(mmreg,reg) \ + __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) + +#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) +#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) + +#define prefetcht0(mem) mmx_fetch (mem, t0) +#define prefetcht1(mem) mmx_fetch (mem, t1) +#define prefetcht2(mem) mmx_fetch (mem, t2) +#define prefetchnta(mem) mmx_fetch (mem, nta) + +#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) +#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) + +#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) +#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) + +#define sfence() __asm__ __volatile__ ("sfence\n\t") + +#endif /* AVCODEC_I386MMX_H */