1 .file "scale_line_22_yuv_mmx.S"
8 MSG: .ascii "scale_line_22_yuv_mmx: %d %d\n"
13 #if !defined(__MINGW32__) && !defined(__CYGWIN__)
15 .globl pixops_scale_line_22_yuv_mmx
16 .type pixops_scale_line_22_yuv_mmx,@function
17 pixops_scale_line_22_yuv_mmx:
21 .globl _pixops_scale_line_22_yuv_mmx
22 _pixops_scale_line_22_yuv_mmx:
29 * p (dest): 12(%ebp) %esi
50 * int x_scaled -24(%ebp)
57 /* Initialize variables */
58 movl 36(%ebp),%eax # destx
60 movl 32(%ebp),%ebx # x
61 movl 12(%ebp),%esi # dest
63 cmpl 28(%ebp),%esi # dest == dest_end ?
66 /* For the body of this loop, %mm0, %mm1, %mm2, %mm3 hold the 4 adjoining
67 * points we are interpolating between, as:
74 * Load next component values into mm1 (src0) and mm3 (src1)
76 movl %ebx, %eax # x_scaled
78 andl $0xfffffffe, %eax
79 movl %eax, %edx # x_aligned
80 andl $0xfffffffc, %edx
82 movl 16(%ebp), %edi # get src0
83 movl (%edi,%eax), %ecx # get y
84 andl $0x00ff00ff, %ecx # mask off y
85 movl (%edi,%edx), %eax # get uv
86 andl $0xff00ff00, %eax # mask off uv
87 orl %eax, %ecx # composite y, uv
88 movd %ecx, %mm1 # move to mmx1
91 movl 20(%ebp), %edi # get src1
92 movl (%edi,%edx), %ecx # get y
93 andl $0x00ff00ff, %ecx # mask off y
94 movl (%edi,%edx), %eax # get uv
95 andl $0xff00ff00, %eax # mask off uv
96 orl %eax, %ecx # composite y, uv
97 movd %ecx, %mm3 # move to mmx3
105 /* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
108 movl 8(%ebp), %edi # get weights pointer
113 /* At this point, %edi holds weights. Load the 4 weights into
114 * %mm4,%mm5,%mm6,%mm7, multiply and accumulate.
116 movq (%edi,%eax), %mm4
118 movq 8(%edi,%eax), %mm5
120 movq 16(%edi,%eax), %mm6
122 movq 24(%edi,%eax), %mm7
129 /* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256
132 movl $0x80808080, %eax
138 /* Pack into %eax and store result
143 movb %al, (%esi) # *dest = y
145 movl 36(%ebp), %ecx # get dest_x
146 andl $1, %ecx # select u or v
147 sall $1, %ecx # determine offset
148 addl $1, %ecx # relative to x_aligned
149 sall $3, %ecx # offset * 8 bits/byte
153 movb %al, 1(%esi) # *dest = uv
155 addl $2, %esi # dest += 2
156 cmpl %esi,28(%ebp) # if dest == dest_end
159 addl $1, 36(%ebp) # dest_x++
163 addl 24(%ebp), %ebx # x += x_step
165 * Load current component values into mm0 (src0) and mm2 (src1)
171 * Load next component values into mm1 (src0) and mm3 (src1)
173 movl %ebx, %eax # x_scaled
175 andl $0xfffffffe, %eax
176 movl %eax, %edx # x_aligned
177 andl $0xfffffffc, %edx
179 movl 16(%ebp), %edi # get src0
180 movl (%edi,%eax), %ecx # get y
181 andl $0x00ff00ff, %ecx # mask off y
182 movl (%edi,%edx), %eax # get uv
183 andl $0xff00ff00, %eax # mask off uv
184 orl %eax, %ecx # composite y, uv
185 movd %ecx, %mm1 # move to mmx1
188 movl 20(%ebp), %edi # get src1
189 movl (%edi,%edx), %ecx # get y
190 andl $0x00ff00ff, %ecx # mask off y
191 movl (%edi,%edx), %eax # get uv
192 andl $0xff00ff00, %eax # mask off uv
193 orl %eax, %ecx # composite y, uv
194 movd %ecx, %mm3 # move to mmx3