1 .file "scale_line_22_yuv_mmx.S"
8 MSG: .ascii "scale_line_22_yuv_mmx: %d %d\n"
13 #if !defined(__MINGW32__) && !defined(__CYGWIN__)
15 .globl pixops_scale_line_22_yuv_mmx
16 .type pixops_scale_line_22_yuv_mmx,@function
17 pixops_scale_line_22_yuv_mmx:
21 .globl _pixops_scale_line_22_yuv_mmx
22 _pixops_scale_line_22_yuv_mmx:
29 * p (dest): 12(%ebp) %esi
50 * int x_scaled -24(%ebp)
57 /* Initialize variables */
58 movl 36(%ebp),%eax # destx
60 movl 32(%ebp),%ebx # x
61 movl 12(%ebp),%esi # dest
63 cmpl 28(%ebp),%esi # dest == dest_end ?
66 /* For the body of this loop, %mm0, %mm1, %mm2, %mm3 hold the 4 adjoining
67 * points we are interpolating between, as:
72 /* Load initial values into %mm1, %mm3 */
74 /* x_scaled = ( x >> 16 ) * stride */
81 movzbl (%edi,%edx), %ecx
83 /* x_aligned = x_scaled divided by 2 and multiplied by 4 */
88 /* uv_index = ( ( dest_x & 1 ) << 1 ) + 1; */
93 movzbl 1(%edi,%edx), %eax
101 /* x_scaled = ( x >> 16 ) * stride */
108 movzbl (%edi,%edx), %ecx
110 /* x_aligned = x_scaled divided by 2 and multiplied by 4 */
115 /* uv_index = ( ( dest_x & 1 ) << 1 ) + 1; */
120 movzbl (%edi,%edx), %eax
132 /* x_scaled = x >> 16 */
141 /* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
148 /* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and
151 movq (%edi,%eax),%mm4
153 movq 8(%edi,%eax),%mm5
155 movq 16(%edi,%eax),%mm6
156 movq 24(%edi,%eax),%mm7
163 /* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256
166 movl $0x80808080, %eax
172 /* Pack into %eax and store result
177 movb %al, 0(%esi) # *dest = y
179 movb %al, 1(%esi) # *dest = uv
181 addl $2, %esi # dest += 2
183 cmpl %esi,28(%ebp) # if dest == dest_end ?
186 movl 36(%ebp), %eax # get dest_x
187 addl $1, %eax # dest_x++
188 movl %eax, 36(%ebp) # put dest_x
190 addl 24(%ebp), %ebx # x += x_step
192 movl %ebx, %edx # x_scaled = x ...
193 sarl $16, %edx # >> 16
194 movl %edx, -24(%ebp) # save x_scaled
199 * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
204 sall $1, %edx # x_scaled *= channels
206 movl 16(%ebp), %edi # get src0
207 movzbl (%edi,%edx), %ecx # y = src0[ x_scaled ]
209 sarl $2, %edx # x_aligned = ( x_scaled / channels ) >> 1 ...
212 movl 36(%ebp), %eax # uv_index = dest_x ...
214 andl $1, %eax # ( dest_x & 1 ) ...
216 addl %eax, %edx # x_aligned += uv_index
223 movzbl 1(%edi,%edx), %eax # uv = src0[ x_aligned + 1 ]
224 shll $8, %eax # store uv
227 movd %ecx, %mm1 # move to mmx1
230 movl %ebx, %edx # x_scaled = x ...
231 sarl $16, %edx # >> 16
232 sall $1, %edx # x_scaled *= channels
234 movl 20(%ebp), %edi # get src1
235 movzbl (%edi,%edx), %ecx # y = src1[ x_scaled ]
237 sarl $2, %edx # x_aligned = ( x_scaled / channels ) >> 1 ...
240 movl 36(%ebp), %eax # uv_index = dest_x ...
241 andl $1, %eax # ( dest_x & 1 ) ...
243 addl %eax, %edx # x_aligned += uv_index
244 movzbl 1(%edi,%edx), %eax # uv = src1[ x_aligned + 1 ]
245 shll $8, %eax # store uv
248 movd %ecx, %mm3 # move to mmx3
251 movl 8(%ebp), %edi # get weights pointer