00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #undef REAL_MOVNTQ
00025 #undef MOVNTQ
00026 #undef PAVGB
00027 #undef PREFETCH
00028 #undef PREFETCHW
00029 #undef EMMS
00030 #undef SFENCE
00031
00032 #ifdef HAVE_3DNOW
00033
00034 #define EMMS "femms"
00035 #else
00036 #define EMMS "emms"
00037 #endif
00038
00039 #ifdef HAVE_3DNOW
00040 #define PREFETCH "prefetch"
00041 #define PREFETCHW "prefetchw"
00042 #elif defined (HAVE_MMX2)
00043 #define PREFETCH "prefetchnta"
00044 #define PREFETCHW "prefetcht0"
00045 #else
00046 #define PREFETCH " # nop"
00047 #define PREFETCHW " # nop"
00048 #endif
00049
00050 #ifdef HAVE_MMX2
00051 #define SFENCE "sfence"
00052 #else
00053 #define SFENCE " # nop"
00054 #endif
00055
00056 #ifdef HAVE_MMX2
00057 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00058 #elif defined (HAVE_3DNOW)
00059 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00060 #endif
00061
00062 #ifdef HAVE_MMX2
00063 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00064 #else
00065 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00066 #endif
00067 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00068
00069 #ifdef HAVE_ALTIVEC
00070 #include "swscale_altivec_template.c"
00071 #endif
00072
00073 #define YSCALEYUV2YV12X(x, offset, dest, width) \
00074 asm volatile(\
00075 "xor %%"REG_a", %%"REG_a" \n\t"\
00076 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00077 "movq %%mm3, %%mm4 \n\t"\
00078 "lea " offset "(%0), %%"REG_d" \n\t"\
00079 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00080 ASMALIGN(4) \
00081 "1: \n\t"\
00082 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00083 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00084 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" \
00085 "add $16, %%"REG_d" \n\t"\
00086 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00087 "test %%"REG_S", %%"REG_S" \n\t"\
00088 "pmulhw %%mm0, %%mm2 \n\t"\
00089 "pmulhw %%mm0, %%mm5 \n\t"\
00090 "paddw %%mm2, %%mm3 \n\t"\
00091 "paddw %%mm5, %%mm4 \n\t"\
00092 " jnz 1b \n\t"\
00093 "psraw $3, %%mm3 \n\t"\
00094 "psraw $3, %%mm4 \n\t"\
00095 "packuswb %%mm4, %%mm3 \n\t"\
00096 MOVNTQ(%%mm3, (%1, %%REGa))\
00097 "add $8, %%"REG_a" \n\t"\
00098 "cmp %2, %%"REG_a" \n\t"\
00099 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00100 "movq %%mm3, %%mm4 \n\t"\
00101 "lea " offset "(%0), %%"REG_d" \n\t"\
00102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00103 "jb 1b \n\t"\
00104 :: "r" (&c->redDither),\
00105 "r" (dest), "g" (width)\
00106 : "%"REG_a, "%"REG_d, "%"REG_S\
00107 );
00108
00109 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
00110 asm volatile(\
00111 "lea " offset "(%0), %%"REG_d" \n\t"\
00112 "xor %%"REG_a", %%"REG_a" \n\t"\
00113 "pxor %%mm4, %%mm4 \n\t"\
00114 "pxor %%mm5, %%mm5 \n\t"\
00115 "pxor %%mm6, %%mm6 \n\t"\
00116 "pxor %%mm7, %%mm7 \n\t"\
00117 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00118 ASMALIGN(4) \
00119 "1: \n\t"\
00120 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00121 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00122 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
00123 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" \
00124 "movq %%mm0, %%mm3 \n\t"\
00125 "punpcklwd %%mm1, %%mm0 \n\t"\
00126 "punpckhwd %%mm1, %%mm3 \n\t"\
00127 "movq 8(%%"REG_d"), %%mm1 \n\t" \
00128 "pmaddwd %%mm1, %%mm0 \n\t"\
00129 "pmaddwd %%mm1, %%mm3 \n\t"\
00130 "paddd %%mm0, %%mm4 \n\t"\
00131 "paddd %%mm3, %%mm5 \n\t"\
00132 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00133 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
00134 "add $16, %%"REG_d" \n\t"\
00135 "test %%"REG_S", %%"REG_S" \n\t"\
00136 "movq %%mm2, %%mm0 \n\t"\
00137 "punpcklwd %%mm3, %%mm2 \n\t"\
00138 "punpckhwd %%mm3, %%mm0 \n\t"\
00139 "pmaddwd %%mm1, %%mm2 \n\t"\
00140 "pmaddwd %%mm1, %%mm0 \n\t"\
00141 "paddd %%mm2, %%mm6 \n\t"\
00142 "paddd %%mm0, %%mm7 \n\t"\
00143 " jnz 1b \n\t"\
00144 "psrad $16, %%mm4 \n\t"\
00145 "psrad $16, %%mm5 \n\t"\
00146 "psrad $16, %%mm6 \n\t"\
00147 "psrad $16, %%mm7 \n\t"\
00148 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00149 "packssdw %%mm5, %%mm4 \n\t"\
00150 "packssdw %%mm7, %%mm6 \n\t"\
00151 "paddw %%mm0, %%mm4 \n\t"\
00152 "paddw %%mm0, %%mm6 \n\t"\
00153 "psraw $3, %%mm4 \n\t"\
00154 "psraw $3, %%mm6 \n\t"\
00155 "packuswb %%mm6, %%mm4 \n\t"\
00156 MOVNTQ(%%mm4, (%1, %%REGa))\
00157 "add $8, %%"REG_a" \n\t"\
00158 "cmp %2, %%"REG_a" \n\t"\
00159 "lea " offset "(%0), %%"REG_d" \n\t"\
00160 "pxor %%mm4, %%mm4 \n\t"\
00161 "pxor %%mm5, %%mm5 \n\t"\
00162 "pxor %%mm6, %%mm6 \n\t"\
00163 "pxor %%mm7, %%mm7 \n\t"\
00164 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00165 "jb 1b \n\t"\
00166 :: "r" (&c->redDither),\
00167 "r" (dest), "g" (width)\
00168 : "%"REG_a, "%"REG_d, "%"REG_S\
00169 );
00170
00171 #define YSCALEYUV2YV121 \
00172 "mov %2, %%"REG_a" \n\t"\
00173 ASMALIGN(4) \
00174 "1: \n\t"\
00175 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
00176 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
00177 "psraw $7, %%mm0 \n\t"\
00178 "psraw $7, %%mm1 \n\t"\
00179 "packuswb %%mm1, %%mm0 \n\t"\
00180 MOVNTQ(%%mm0, (%1, %%REGa))\
00181 "add $8, %%"REG_a" \n\t"\
00182 "jnc 1b \n\t"
00183
00184
00185
00186
00187
00188
00189
00190
00191 #define YSCALEYUV2PACKEDX \
00192 asm volatile(\
00193 "xor %%"REG_a", %%"REG_a" \n\t"\
00194 ASMALIGN(4)\
00195 "nop \n\t"\
00196 "1: \n\t"\
00197 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00198 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00199 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00200 "movq %%mm3, %%mm4 \n\t"\
00201 ASMALIGN(4)\
00202 "2: \n\t"\
00203 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00204 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00205 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00206 "add $16, %%"REG_d" \n\t"\
00207 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00208 "pmulhw %%mm0, %%mm2 \n\t"\
00209 "pmulhw %%mm0, %%mm5 \n\t"\
00210 "paddw %%mm2, %%mm3 \n\t"\
00211 "paddw %%mm5, %%mm4 \n\t"\
00212 "test %%"REG_S", %%"REG_S" \n\t"\
00213 " jnz 2b \n\t"\
00214 \
00215 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00216 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00217 "movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\
00218 "movq %%mm1, %%mm7 \n\t"\
00219 ASMALIGN(4)\
00220 "2: \n\t"\
00221 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00222 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00223 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" \
00224 "add $16, %%"REG_d" \n\t"\
00225 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00226 "pmulhw %%mm0, %%mm2 \n\t"\
00227 "pmulhw %%mm0, %%mm5 \n\t"\
00228 "paddw %%mm2, %%mm1 \n\t"\
00229 "paddw %%mm5, %%mm7 \n\t"\
00230 "test %%"REG_S", %%"REG_S" \n\t"\
00231 " jnz 2b \n\t"\
00232
00233 #define YSCALEYUV2PACKEDX_END \
00234 :: "r" (&c->redDither), \
00235 "m" (dummy), "m" (dummy), "m" (dummy),\
00236 "r" (dest), "m" (dstW) \
00237 : "%"REG_a, "%"REG_d, "%"REG_S \
00238 );
00239
00240 #define YSCALEYUV2PACKEDX_ACCURATE \
00241 asm volatile(\
00242 "xor %%"REG_a", %%"REG_a" \n\t"\
00243 ASMALIGN(4)\
00244 "nop \n\t"\
00245 "1: \n\t"\
00246 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00247 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00248 "pxor %%mm4, %%mm4 \n\t"\
00249 "pxor %%mm5, %%mm5 \n\t"\
00250 "pxor %%mm6, %%mm6 \n\t"\
00251 "pxor %%mm7, %%mm7 \n\t"\
00252 ASMALIGN(4)\
00253 "2: \n\t"\
00254 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00255 "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00256 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
00257 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00258 "movq %%mm0, %%mm3 \n\t"\
00259 "punpcklwd %%mm1, %%mm0 \n\t"\
00260 "punpckhwd %%mm1, %%mm3 \n\t"\
00261 "movq 8(%%"REG_d"), %%mm1 \n\t" \
00262 "pmaddwd %%mm1, %%mm0 \n\t"\
00263 "pmaddwd %%mm1, %%mm3 \n\t"\
00264 "paddd %%mm0, %%mm4 \n\t"\
00265 "paddd %%mm3, %%mm5 \n\t"\
00266 "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00267 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
00268 "add $16, %%"REG_d" \n\t"\
00269 "test %%"REG_S", %%"REG_S" \n\t"\
00270 "movq %%mm2, %%mm0 \n\t"\
00271 "punpcklwd %%mm3, %%mm2 \n\t"\
00272 "punpckhwd %%mm3, %%mm0 \n\t"\
00273 "pmaddwd %%mm1, %%mm2 \n\t"\
00274 "pmaddwd %%mm1, %%mm0 \n\t"\
00275 "paddd %%mm2, %%mm6 \n\t"\
00276 "paddd %%mm0, %%mm7 \n\t"\
00277 " jnz 2b \n\t"\
00278 "psrad $16, %%mm4 \n\t"\
00279 "psrad $16, %%mm5 \n\t"\
00280 "psrad $16, %%mm6 \n\t"\
00281 "psrad $16, %%mm7 \n\t"\
00282 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00283 "packssdw %%mm5, %%mm4 \n\t"\
00284 "packssdw %%mm7, %%mm6 \n\t"\
00285 "paddw %%mm0, %%mm4 \n\t"\
00286 "paddw %%mm0, %%mm6 \n\t"\
00287 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00288 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00289 \
00290 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00291 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00292 "pxor %%mm1, %%mm1 \n\t"\
00293 "pxor %%mm5, %%mm5 \n\t"\
00294 "pxor %%mm7, %%mm7 \n\t"\
00295 "pxor %%mm6, %%mm6 \n\t"\
00296 ASMALIGN(4)\
00297 "2: \n\t"\
00298 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00299 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00300 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
00301 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00302 "movq %%mm0, %%mm3 \n\t"\
00303 "punpcklwd %%mm4, %%mm0 \n\t"\
00304 "punpckhwd %%mm4, %%mm3 \n\t"\
00305 "movq 8(%%"REG_d"), %%mm4 \n\t" \
00306 "pmaddwd %%mm4, %%mm0 \n\t"\
00307 "pmaddwd %%mm4, %%mm3 \n\t"\
00308 "paddd %%mm0, %%mm1 \n\t"\
00309 "paddd %%mm3, %%mm5 \n\t"\
00310 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00311 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
00312 "add $16, %%"REG_d" \n\t"\
00313 "test %%"REG_S", %%"REG_S" \n\t"\
00314 "movq %%mm2, %%mm0 \n\t"\
00315 "punpcklwd %%mm3, %%mm2 \n\t"\
00316 "punpckhwd %%mm3, %%mm0 \n\t"\
00317 "pmaddwd %%mm4, %%mm2 \n\t"\
00318 "pmaddwd %%mm4, %%mm0 \n\t"\
00319 "paddd %%mm2, %%mm7 \n\t"\
00320 "paddd %%mm0, %%mm6 \n\t"\
00321 " jnz 2b \n\t"\
00322 "psrad $16, %%mm1 \n\t"\
00323 "psrad $16, %%mm5 \n\t"\
00324 "psrad $16, %%mm7 \n\t"\
00325 "psrad $16, %%mm6 \n\t"\
00326 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00327 "packssdw %%mm5, %%mm1 \n\t"\
00328 "packssdw %%mm6, %%mm7 \n\t"\
00329 "paddw %%mm0, %%mm1 \n\t"\
00330 "paddw %%mm0, %%mm7 \n\t"\
00331 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00332 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00333
00334 #define YSCALEYUV2RGBX \
00335 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00336 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00337 "movq %%mm3, %%mm2 \n\t" \
00338 "movq %%mm4, %%mm5 \n\t" \
00339 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00340 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00341 \
00342 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00343 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00344 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00345 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00346 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00347 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00348 \
00349 "paddw %%mm3, %%mm4 \n\t"\
00350 "movq %%mm2, %%mm0 \n\t"\
00351 "movq %%mm5, %%mm6 \n\t"\
00352 "movq %%mm4, %%mm3 \n\t"\
00353 "punpcklwd %%mm2, %%mm2 \n\t"\
00354 "punpcklwd %%mm5, %%mm5 \n\t"\
00355 "punpcklwd %%mm4, %%mm4 \n\t"\
00356 "paddw %%mm1, %%mm2 \n\t"\
00357 "paddw %%mm1, %%mm5 \n\t"\
00358 "paddw %%mm1, %%mm4 \n\t"\
00359 "punpckhwd %%mm0, %%mm0 \n\t"\
00360 "punpckhwd %%mm6, %%mm6 \n\t"\
00361 "punpckhwd %%mm3, %%mm3 \n\t"\
00362 "paddw %%mm7, %%mm0 \n\t"\
00363 "paddw %%mm7, %%mm6 \n\t"\
00364 "paddw %%mm7, %%mm3 \n\t"\
00365 \
00366 "packuswb %%mm0, %%mm2 \n\t"\
00367 "packuswb %%mm6, %%mm5 \n\t"\
00368 "packuswb %%mm3, %%mm4 \n\t"\
00369 "pxor %%mm7, %%mm7 \n\t"
00370 #if 0
00371 #define FULL_YSCALEYUV2RGB \
00372 "pxor %%mm7, %%mm7 \n\t"\
00373 "movd %6, %%mm6 \n\t" \
00374 "punpcklwd %%mm6, %%mm6 \n\t"\
00375 "punpcklwd %%mm6, %%mm6 \n\t"\
00376 "movd %7, %%mm5 \n\t" \
00377 "punpcklwd %%mm5, %%mm5 \n\t"\
00378 "punpcklwd %%mm5, %%mm5 \n\t"\
00379 "xor %%"REG_a", %%"REG_a" \n\t"\
00380 ASMALIGN(4)\
00381 "1: \n\t"\
00382 "movq (%0, %%"REG_a",2), %%mm0 \n\t" \
00383 "movq (%1, %%"REG_a",2), %%mm1 \n\t" \
00384 "movq (%2, %%"REG_a",2), %%mm2 \n\t" \
00385 "movq (%3, %%"REG_a",2), %%mm3 \n\t" \
00386 "psubw %%mm1, %%mm0 \n\t" \
00387 "psubw %%mm3, %%mm2 \n\t" \
00388 "pmulhw %%mm6, %%mm0 \n\t" \
00389 "pmulhw %%mm5, %%mm2 \n\t" \
00390 "psraw $4, %%mm1 \n\t" \
00391 "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" \
00392 "psraw $4, %%mm3 \n\t" \
00393 "paddw %%mm0, %%mm1 \n\t" \
00394 "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" \
00395 "paddw %%mm2, %%mm3 \n\t" \
00396 "psubw %%mm0, %%mm4 \n\t" \
00397 "psubw "MANGLE(w80)", %%mm1 \n\t" \
00398 "psubw "MANGLE(w400)", %%mm3 \n\t" \
00399 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
00400 \
00401 \
00402 "pmulhw %%mm5, %%mm4 \n\t" \
00403 "movq %%mm3, %%mm2 \n\t" \
00404 "pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\
00405 "psraw $4, %%mm0 \n\t" \
00406 "pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\
00407 "paddw %%mm4, %%mm0 \n\t" \
00408 "psubw "MANGLE(w400)", %%mm0 \n\t" \
00409 \
00410 \
00411 "movq %%mm0, %%mm4 \n\t" \
00412 "pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\
00413 "pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\
00414 "paddw %%mm1, %%mm3 \n\t" \
00415 "paddw %%mm1, %%mm0 \n\t" \
00416 "packuswb %%mm3, %%mm3 \n\t"\
00417 \
00418 "packuswb %%mm0, %%mm0 \n\t"\
00419 "paddw %%mm4, %%mm2 \n\t"\
00420 "paddw %%mm2, %%mm1 \n\t" \
00421 \
00422 "packuswb %%mm1, %%mm1 \n\t"
00423 #endif
00424
00425 #define REAL_YSCALEYUV2PACKED(index, c) \
00426 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00427 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00428 "psraw $3, %%mm0 \n\t"\
00429 "psraw $3, %%mm1 \n\t"\
00430 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00431 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00432 "xor "#index", "#index" \n\t"\
00433 ASMALIGN(4)\
00434 "1: \n\t"\
00435 "movq (%2, "#index"), %%mm2 \n\t" \
00436 "movq (%3, "#index"), %%mm3 \n\t" \
00437 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00438 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00439 "psubw %%mm3, %%mm2 \n\t" \
00440 "psubw %%mm4, %%mm5 \n\t" \
00441 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00442 "pmulhw %%mm0, %%mm2 \n\t" \
00443 "pmulhw %%mm0, %%mm5 \n\t" \
00444 "psraw $7, %%mm3 \n\t" \
00445 "psraw $7, %%mm4 \n\t" \
00446 "paddw %%mm2, %%mm3 \n\t" \
00447 "paddw %%mm5, %%mm4 \n\t" \
00448 "movq (%0, "#index", 2), %%mm0 \n\t" \
00449 "movq (%1, "#index", 2), %%mm1 \n\t" \
00450 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00451 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00452 "psubw %%mm1, %%mm0 \n\t" \
00453 "psubw %%mm7, %%mm6 \n\t" \
00454 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00455 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00456 "psraw $7, %%mm1 \n\t" \
00457 "psraw $7, %%mm7 \n\t" \
00458 "paddw %%mm0, %%mm1 \n\t" \
00459 "paddw %%mm6, %%mm7 \n\t" \
00460
00461 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
00462
00463 #define REAL_YSCALEYUV2RGB(index, c) \
00464 "xor "#index", "#index" \n\t"\
00465 ASMALIGN(4)\
00466 "1: \n\t"\
00467 "movq (%2, "#index"), %%mm2 \n\t" \
00468 "movq (%3, "#index"), %%mm3 \n\t" \
00469 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00470 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00471 "psubw %%mm3, %%mm2 \n\t" \
00472 "psubw %%mm4, %%mm5 \n\t" \
00473 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00474 "pmulhw %%mm0, %%mm2 \n\t" \
00475 "pmulhw %%mm0, %%mm5 \n\t" \
00476 "psraw $4, %%mm3 \n\t" \
00477 "psraw $4, %%mm4 \n\t" \
00478 "paddw %%mm2, %%mm3 \n\t" \
00479 "paddw %%mm5, %%mm4 \n\t" \
00480 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00481 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00482 "movq %%mm3, %%mm2 \n\t" \
00483 "movq %%mm4, %%mm5 \n\t" \
00484 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00485 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00486 \
00487 "movq (%0, "#index", 2), %%mm0 \n\t" \
00488 "movq (%1, "#index", 2), %%mm1 \n\t" \
00489 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00490 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00491 "psubw %%mm1, %%mm0 \n\t" \
00492 "psubw %%mm7, %%mm6 \n\t" \
00493 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00494 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00495 "psraw $4, %%mm1 \n\t" \
00496 "psraw $4, %%mm7 \n\t" \
00497 "paddw %%mm0, %%mm1 \n\t" \
00498 "paddw %%mm6, %%mm7 \n\t" \
00499 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00500 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00501 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00502 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00503 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00504 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00505 \
00506 "paddw %%mm3, %%mm4 \n\t"\
00507 "movq %%mm2, %%mm0 \n\t"\
00508 "movq %%mm5, %%mm6 \n\t"\
00509 "movq %%mm4, %%mm3 \n\t"\
00510 "punpcklwd %%mm2, %%mm2 \n\t"\
00511 "punpcklwd %%mm5, %%mm5 \n\t"\
00512 "punpcklwd %%mm4, %%mm4 \n\t"\
00513 "paddw %%mm1, %%mm2 \n\t"\
00514 "paddw %%mm1, %%mm5 \n\t"\
00515 "paddw %%mm1, %%mm4 \n\t"\
00516 "punpckhwd %%mm0, %%mm0 \n\t"\
00517 "punpckhwd %%mm6, %%mm6 \n\t"\
00518 "punpckhwd %%mm3, %%mm3 \n\t"\
00519 "paddw %%mm7, %%mm0 \n\t"\
00520 "paddw %%mm7, %%mm6 \n\t"\
00521 "paddw %%mm7, %%mm3 \n\t"\
00522 \
00523 "packuswb %%mm0, %%mm2 \n\t"\
00524 "packuswb %%mm6, %%mm5 \n\t"\
00525 "packuswb %%mm3, %%mm4 \n\t"\
00526 "pxor %%mm7, %%mm7 \n\t"
00527 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
00528
00529 #define REAL_YSCALEYUV2PACKED1(index, c) \
00530 "xor "#index", "#index" \n\t"\
00531 ASMALIGN(4)\
00532 "1: \n\t"\
00533 "movq (%2, "#index"), %%mm3 \n\t" \
00534 "movq 4096(%2, "#index"), %%mm4 \n\t" \
00535 "psraw $7, %%mm3 \n\t" \
00536 "psraw $7, %%mm4 \n\t" \
00537 "movq (%0, "#index", 2), %%mm1 \n\t" \
00538 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00539 "psraw $7, %%mm1 \n\t" \
00540 "psraw $7, %%mm7 \n\t" \
00541
00542 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
00543
00544 #define REAL_YSCALEYUV2RGB1(index, c) \
00545 "xor "#index", "#index" \n\t"\
00546 ASMALIGN(4)\
00547 "1: \n\t"\
00548 "movq (%2, "#index"), %%mm3 \n\t" \
00549 "movq 4096(%2, "#index"), %%mm4 \n\t" \
00550 "psraw $4, %%mm3 \n\t" \
00551 "psraw $4, %%mm4 \n\t" \
00552 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00553 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00554 "movq %%mm3, %%mm2 \n\t" \
00555 "movq %%mm4, %%mm5 \n\t" \
00556 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00557 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00558 \
00559 "movq (%0, "#index", 2), %%mm1 \n\t" \
00560 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00561 "psraw $4, %%mm1 \n\t" \
00562 "psraw $4, %%mm7 \n\t" \
00563 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00564 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00565 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00566 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00567 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00568 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00569 \
00570 "paddw %%mm3, %%mm4 \n\t"\
00571 "movq %%mm2, %%mm0 \n\t"\
00572 "movq %%mm5, %%mm6 \n\t"\
00573 "movq %%mm4, %%mm3 \n\t"\
00574 "punpcklwd %%mm2, %%mm2 \n\t"\
00575 "punpcklwd %%mm5, %%mm5 \n\t"\
00576 "punpcklwd %%mm4, %%mm4 \n\t"\
00577 "paddw %%mm1, %%mm2 \n\t"\
00578 "paddw %%mm1, %%mm5 \n\t"\
00579 "paddw %%mm1, %%mm4 \n\t"\
00580 "punpckhwd %%mm0, %%mm0 \n\t"\
00581 "punpckhwd %%mm6, %%mm6 \n\t"\
00582 "punpckhwd %%mm3, %%mm3 \n\t"\
00583 "paddw %%mm7, %%mm0 \n\t"\
00584 "paddw %%mm7, %%mm6 \n\t"\
00585 "paddw %%mm7, %%mm3 \n\t"\
00586 \
00587 "packuswb %%mm0, %%mm2 \n\t"\
00588 "packuswb %%mm6, %%mm5 \n\t"\
00589 "packuswb %%mm3, %%mm4 \n\t"\
00590 "pxor %%mm7, %%mm7 \n\t"
00591 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
00592
00593 #define REAL_YSCALEYUV2PACKED1b(index, c) \
00594 "xor "#index", "#index" \n\t"\
00595 ASMALIGN(4)\
00596 "1: \n\t"\
00597 "movq (%2, "#index"), %%mm2 \n\t" \
00598 "movq (%3, "#index"), %%mm3 \n\t" \
00599 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00600 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00601 "paddw %%mm2, %%mm3 \n\t" \
00602 "paddw %%mm5, %%mm4 \n\t" \
00603 "psrlw $8, %%mm3 \n\t" \
00604 "psrlw $8, %%mm4 \n\t" \
00605 "movq (%0, "#index", 2), %%mm1 \n\t" \
00606 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00607 "psraw $7, %%mm1 \n\t" \
00608 "psraw $7, %%mm7 \n\t"
00609 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
00610
00611
00612 #define REAL_YSCALEYUV2RGB1b(index, c) \
00613 "xor "#index", "#index" \n\t"\
00614 ASMALIGN(4)\
00615 "1: \n\t"\
00616 "movq (%2, "#index"), %%mm2 \n\t" \
00617 "movq (%3, "#index"), %%mm3 \n\t" \
00618 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00619 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00620 "paddw %%mm2, %%mm3 \n\t" \
00621 "paddw %%mm5, %%mm4 \n\t" \
00622 "psrlw $5, %%mm3 \n\t" \
00623 "psrlw $5, %%mm4 \n\t" \
00624 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00625 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00626 "movq %%mm3, %%mm2 \n\t" \
00627 "movq %%mm4, %%mm5 \n\t" \
00628 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00629 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00630 \
00631 "movq (%0, "#index", 2), %%mm1 \n\t" \
00632 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00633 "psraw $4, %%mm1 \n\t" \
00634 "psraw $4, %%mm7 \n\t" \
00635 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00636 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00637 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00638 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00639 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00640 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00641 \
00642 "paddw %%mm3, %%mm4 \n\t"\
00643 "movq %%mm2, %%mm0 \n\t"\
00644 "movq %%mm5, %%mm6 \n\t"\
00645 "movq %%mm4, %%mm3 \n\t"\
00646 "punpcklwd %%mm2, %%mm2 \n\t"\
00647 "punpcklwd %%mm5, %%mm5 \n\t"\
00648 "punpcklwd %%mm4, %%mm4 \n\t"\
00649 "paddw %%mm1, %%mm2 \n\t"\
00650 "paddw %%mm1, %%mm5 \n\t"\
00651 "paddw %%mm1, %%mm4 \n\t"\
00652 "punpckhwd %%mm0, %%mm0 \n\t"\
00653 "punpckhwd %%mm6, %%mm6 \n\t"\
00654 "punpckhwd %%mm3, %%mm3 \n\t"\
00655 "paddw %%mm7, %%mm0 \n\t"\
00656 "paddw %%mm7, %%mm6 \n\t"\
00657 "paddw %%mm7, %%mm3 \n\t"\
00658 \
00659 "packuswb %%mm0, %%mm2 \n\t"\
00660 "packuswb %%mm6, %%mm5 \n\t"\
00661 "packuswb %%mm3, %%mm4 \n\t"\
00662 "pxor %%mm7, %%mm7 \n\t"
00663 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
00664
00665 #define REAL_WRITEBGR32(dst, dstw, index) \
00666 \
00667 "movq %%mm2, %%mm1 \n\t" \
00668 "movq %%mm5, %%mm6 \n\t" \
00669 "punpcklbw %%mm4, %%mm2 \n\t" \
00670 "punpcklbw %%mm7, %%mm5 \n\t" \
00671 "punpckhbw %%mm4, %%mm1 \n\t" \
00672 "punpckhbw %%mm7, %%mm6 \n\t" \
00673 "movq %%mm2, %%mm0 \n\t" \
00674 "movq %%mm1, %%mm3 \n\t" \
00675 "punpcklwd %%mm5, %%mm0 \n\t" \
00676 "punpckhwd %%mm5, %%mm2 \n\t" \
00677 "punpcklwd %%mm6, %%mm1 \n\t" \
00678 "punpckhwd %%mm6, %%mm3 \n\t" \
00679 \
00680 MOVNTQ(%%mm0, (dst, index, 4))\
00681 MOVNTQ(%%mm2, 8(dst, index, 4))\
00682 MOVNTQ(%%mm1, 16(dst, index, 4))\
00683 MOVNTQ(%%mm3, 24(dst, index, 4))\
00684 \
00685 "add $8, "#index" \n\t"\
00686 "cmp "#dstw", "#index" \n\t"\
00687 " jb 1b \n\t"
00688 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
00689
00690 #define REAL_WRITEBGR16(dst, dstw, index) \
00691 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00692 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00693 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00694 "psrlq $3, %%mm2 \n\t"\
00695 \
00696 "movq %%mm2, %%mm1 \n\t"\
00697 "movq %%mm4, %%mm3 \n\t"\
00698 \
00699 "punpcklbw %%mm7, %%mm3 \n\t"\
00700 "punpcklbw %%mm5, %%mm2 \n\t"\
00701 "punpckhbw %%mm7, %%mm4 \n\t"\
00702 "punpckhbw %%mm5, %%mm1 \n\t"\
00703 \
00704 "psllq $3, %%mm3 \n\t"\
00705 "psllq $3, %%mm4 \n\t"\
00706 \
00707 "por %%mm3, %%mm2 \n\t"\
00708 "por %%mm4, %%mm1 \n\t"\
00709 \
00710 MOVNTQ(%%mm2, (dst, index, 2))\
00711 MOVNTQ(%%mm1, 8(dst, index, 2))\
00712 \
00713 "add $8, "#index" \n\t"\
00714 "cmp "#dstw", "#index" \n\t"\
00715 " jb 1b \n\t"
00716 #define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index)
00717
00718 #define REAL_WRITEBGR15(dst, dstw, index) \
00719 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00720 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00721 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00722 "psrlq $3, %%mm2 \n\t"\
00723 "psrlq $1, %%mm5 \n\t"\
00724 \
00725 "movq %%mm2, %%mm1 \n\t"\
00726 "movq %%mm4, %%mm3 \n\t"\
00727 \
00728 "punpcklbw %%mm7, %%mm3 \n\t"\
00729 "punpcklbw %%mm5, %%mm2 \n\t"\
00730 "punpckhbw %%mm7, %%mm4 \n\t"\
00731 "punpckhbw %%mm5, %%mm1 \n\t"\
00732 \
00733 "psllq $2, %%mm3 \n\t"\
00734 "psllq $2, %%mm4 \n\t"\
00735 \
00736 "por %%mm3, %%mm2 \n\t"\
00737 "por %%mm4, %%mm1 \n\t"\
00738 \
00739 MOVNTQ(%%mm2, (dst, index, 2))\
00740 MOVNTQ(%%mm1, 8(dst, index, 2))\
00741 \
00742 "add $8, "#index" \n\t"\
00743 "cmp "#dstw", "#index" \n\t"\
00744 " jb 1b \n\t"
00745 #define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index)
00746
00747 #define WRITEBGR24OLD(dst, dstw, index) \
00748 \
00749 "movq %%mm2, %%mm1 \n\t" \
00750 "movq %%mm5, %%mm6 \n\t" \
00751 "punpcklbw %%mm4, %%mm2 \n\t" \
00752 "punpcklbw %%mm7, %%mm5 \n\t" \
00753 "punpckhbw %%mm4, %%mm1 \n\t" \
00754 "punpckhbw %%mm7, %%mm6 \n\t" \
00755 "movq %%mm2, %%mm0 \n\t" \
00756 "movq %%mm1, %%mm3 \n\t" \
00757 "punpcklwd %%mm5, %%mm0 \n\t" \
00758 "punpckhwd %%mm5, %%mm2 \n\t" \
00759 "punpcklwd %%mm6, %%mm1 \n\t" \
00760 "punpckhwd %%mm6, %%mm3 \n\t" \
00761 \
00762 "movq %%mm0, %%mm4 \n\t" \
00763 "psrlq $8, %%mm0 \n\t" \
00764 "pand "MANGLE(bm00000111)", %%mm4 \n\t" \
00765 "pand "MANGLE(bm11111000)", %%mm0 \n\t" \
00766 "por %%mm4, %%mm0 \n\t" \
00767 "movq %%mm2, %%mm4 \n\t" \
00768 "psllq $48, %%mm2 \n\t" \
00769 "por %%mm2, %%mm0 \n\t" \
00770 \
00771 "movq %%mm4, %%mm2 \n\t" \
00772 "psrld $16, %%mm4 \n\t" \
00773 "psrlq $24, %%mm2 \n\t" \
00774 "por %%mm4, %%mm2 \n\t" \
00775 "pand "MANGLE(bm00001111)", %%mm2 \n\t" \
00776 "movq %%mm1, %%mm4 \n\t" \
00777 "psrlq $8, %%mm1 \n\t" \
00778 "pand "MANGLE(bm00000111)", %%mm4 \n\t" \
00779 "pand "MANGLE(bm11111000)", %%mm1 \n\t" \
00780 "por %%mm4, %%mm1 \n\t" \
00781 "movq %%mm1, %%mm4 \n\t" \
00782 "psllq $32, %%mm1 \n\t" \
00783 "por %%mm1, %%mm2 \n\t" \
00784 \
00785 "psrlq $32, %%mm4 \n\t" \
00786 "movq %%mm3, %%mm5 \n\t" \
00787 "psrlq $8, %%mm3 \n\t" \
00788 "pand "MANGLE(bm00000111)", %%mm5 \n\t" \
00789 "pand "MANGLE(bm11111000)", %%mm3 \n\t" \
00790 "por %%mm5, %%mm3 \n\t" \
00791 "psllq $16, %%mm3 \n\t" \
00792 "por %%mm4, %%mm3 \n\t" \
00793 \
00794 MOVNTQ(%%mm0, (dst))\
00795 MOVNTQ(%%mm2, 8(dst))\
00796 MOVNTQ(%%mm3, 16(dst))\
00797 "add $24, "#dst" \n\t"\
00798 \
00799 "add $8, "#index" \n\t"\
00800 "cmp "#dstw", "#index" \n\t"\
00801 " jb 1b \n\t"
00802
00803 #define WRITEBGR24MMX(dst, dstw, index) \
00804 \
00805 "movq %%mm2, %%mm1 \n\t" \
00806 "movq %%mm5, %%mm6 \n\t" \
00807 "punpcklbw %%mm4, %%mm2 \n\t" \
00808 "punpcklbw %%mm7, %%mm5 \n\t" \
00809 "punpckhbw %%mm4, %%mm1 \n\t" \
00810 "punpckhbw %%mm7, %%mm6 \n\t" \
00811 "movq %%mm2, %%mm0 \n\t" \
00812 "movq %%mm1, %%mm3 \n\t" \
00813 "punpcklwd %%mm5, %%mm0 \n\t" \
00814 "punpckhwd %%mm5, %%mm2 \n\t" \
00815 "punpcklwd %%mm6, %%mm1 \n\t" \
00816 "punpckhwd %%mm6, %%mm3 \n\t" \
00817 \
00818 "movq %%mm0, %%mm4 \n\t" \
00819 "movq %%mm2, %%mm6 \n\t" \
00820 "movq %%mm1, %%mm5 \n\t" \
00821 "movq %%mm3, %%mm7 \n\t" \
00822 \
00823 "psllq $40, %%mm0 \n\t" \
00824 "psllq $40, %%mm2 \n\t" \
00825 "psllq $40, %%mm1 \n\t" \
00826 "psllq $40, %%mm3 \n\t" \
00827 \
00828 "punpckhdq %%mm4, %%mm0 \n\t" \
00829 "punpckhdq %%mm6, %%mm2 \n\t" \
00830 "punpckhdq %%mm5, %%mm1 \n\t" \
00831 "punpckhdq %%mm7, %%mm3 \n\t" \
00832 \
00833 "psrlq $8, %%mm0 \n\t" \
00834 "movq %%mm2, %%mm6 \n\t" \
00835 "psllq $40, %%mm2 \n\t" \
00836 "por %%mm2, %%mm0 \n\t" \
00837 MOVNTQ(%%mm0, (dst))\
00838 \
00839 "psrlq $24, %%mm6 \n\t" \
00840 "movq %%mm1, %%mm5 \n\t" \
00841 "psllq $24, %%mm1 \n\t" \
00842 "por %%mm1, %%mm6 \n\t" \
00843 MOVNTQ(%%mm6, 8(dst))\
00844 \
00845 "psrlq $40, %%mm5 \n\t" \
00846 "psllq $8, %%mm3 \n\t" \
00847 "por %%mm3, %%mm5 \n\t" \
00848 MOVNTQ(%%mm5, 16(dst))\
00849 \
00850 "add $24, "#dst" \n\t"\
00851 \
00852 "add $8, "#index" \n\t"\
00853 "cmp "#dstw", "#index" \n\t"\
00854 " jb 1b \n\t"
00855
00856 #define WRITEBGR24MMX2(dst, dstw, index) \
00857 \
00858 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00859 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00860 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00861 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00862 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00863 \
00864 "pand %%mm0, %%mm1 \n\t" \
00865 "pand %%mm0, %%mm3 \n\t" \
00866 "pand %%mm7, %%mm6 \n\t" \
00867 \
00868 "psllq $8, %%mm3 \n\t" \
00869 "por %%mm1, %%mm6 \n\t"\
00870 "por %%mm3, %%mm6 \n\t"\
00871 MOVNTQ(%%mm6, (dst))\
00872 \
00873 "psrlq $8, %%mm4 \n\t" \
00874 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00875 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00876 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00877 \
00878 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00879 "pand %%mm7, %%mm3 \n\t" \
00880 "pand %%mm0, %%mm6 \n\t" \
00881 \
00882 "por %%mm1, %%mm3 \n\t" \
00883 "por %%mm3, %%mm6 \n\t"\
00884 MOVNTQ(%%mm6, 8(dst))\
00885 \
00886 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00887 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00888 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00889 \
00890 "pand %%mm7, %%mm1 \n\t" \
00891 "pand %%mm0, %%mm3 \n\t" \
00892 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00893 \
00894 "por %%mm1, %%mm3 \n\t"\
00895 "por %%mm3, %%mm6 \n\t"\
00896 MOVNTQ(%%mm6, 16(dst))\
00897 \
00898 "add $24, "#dst" \n\t"\
00899 \
00900 "add $8, "#index" \n\t"\
00901 "cmp "#dstw", "#index" \n\t"\
00902 " jb 1b \n\t"
00903
00904 #ifdef HAVE_MMX2
00905 #undef WRITEBGR24
00906 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00907 #else
00908 #undef WRITEBGR24
00909 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00910 #endif
00911
00912 #define REAL_WRITEYUY2(dst, dstw, index) \
00913 "packuswb %%mm3, %%mm3 \n\t"\
00914 "packuswb %%mm4, %%mm4 \n\t"\
00915 "packuswb %%mm7, %%mm1 \n\t"\
00916 "punpcklbw %%mm4, %%mm3 \n\t"\
00917 "movq %%mm1, %%mm7 \n\t"\
00918 "punpcklbw %%mm3, %%mm1 \n\t"\
00919 "punpckhbw %%mm3, %%mm7 \n\t"\
00920 \
00921 MOVNTQ(%%mm1, (dst, index, 2))\
00922 MOVNTQ(%%mm7, 8(dst, index, 2))\
00923 \
00924 "add $8, "#index" \n\t"\
00925 "cmp "#dstw", "#index" \n\t"\
00926 " jb 1b \n\t"
00927 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00928
00929
00930 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00931 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00932 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
00933 {
00934 #ifdef HAVE_MMX
00935 if (c->flags & SWS_ACCURATE_RND){
00936 if (uDest){
00937 YSCALEYUV2YV12X_ACCURATE( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
00938 YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
00939 }
00940
00941 YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
00942 }else{
00943 if (uDest){
00944 YSCALEYUV2YV12X( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
00945 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
00946 }
00947
00948 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
00949 }
00950 #else
00951 #ifdef HAVE_ALTIVEC
00952 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
00953 chrFilter, chrSrc, chrFilterSize,
00954 dest, uDest, vDest, dstW, chrDstW);
00955 #else //HAVE_ALTIVEC
00956 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
00957 chrFilter, chrSrc, chrFilterSize,
00958 dest, uDest, vDest, dstW, chrDstW);
00959 #endif
00960 #endif
00961 }
00962
00963 static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00964 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00965 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
00966 {
00967 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
00968 chrFilter, chrSrc, chrFilterSize,
00969 dest, uDest, dstW, chrDstW, dstFormat);
00970 }
00971
00972 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
00973 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
00974 {
00975 #ifdef HAVE_MMX
00976 if (uDest)
00977 {
00978 asm volatile(
00979 YSCALEYUV2YV121
00980 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
00981 "g" (-chrDstW)
00982 : "%"REG_a
00983 );
00984
00985 asm volatile(
00986 YSCALEYUV2YV121
00987 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
00988 "g" (-chrDstW)
00989 : "%"REG_a
00990 );
00991 }
00992
00993 asm volatile(
00994 YSCALEYUV2YV121
00995 :: "r" (lumSrc + dstW), "r" (dest + dstW),
00996 "g" (-dstW)
00997 : "%"REG_a
00998 );
00999 #else
01000 int i;
01001 for (i=0; i<dstW; i++)
01002 {
01003 int val= lumSrc[i]>>7;
01004
01005 if (val&256){
01006 if (val<0) val=0;
01007 else val=255;
01008 }
01009
01010 dest[i]= val;
01011 }
01012
01013 if (uDest)
01014 for (i=0; i<chrDstW; i++)
01015 {
01016 int u=chrSrc[i]>>7;
01017 int v=chrSrc[i + 2048]>>7;
01018
01019 if ((u|v)&256){
01020 if (u<0) u=0;
01021 else if (u>255) u=255;
01022 if (v<0) v=0;
01023 else if (v>255) v=255;
01024 }
01025
01026 uDest[i]= u;
01027 vDest[i]= v;
01028 }
01029 #endif
01030 }
01031
01032
01036 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
01037 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
01038 uint8_t *dest, long dstW, long dstY)
01039 {
01040 #ifdef HAVE_MMX
01041 long dummy=0;
01042 if (c->flags & SWS_ACCURATE_RND){
01043 switch(c->dstFormat){
01044 case PIX_FMT_RGB32:
01045 YSCALEYUV2PACKEDX_ACCURATE
01046 YSCALEYUV2RGBX
01047 WRITEBGR32(%4, %5, %%REGa)
01048
01049 YSCALEYUV2PACKEDX_END
01050 return;
01051 case PIX_FMT_BGR24:
01052 YSCALEYUV2PACKEDX_ACCURATE
01053 YSCALEYUV2RGBX
01054 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
01055 "add %4, %%"REG_c" \n\t"
01056 WRITEBGR24(%%REGc, %5, %%REGa)
01057
01058
01059 :: "r" (&c->redDither),
01060 "m" (dummy), "m" (dummy), "m" (dummy),
01061 "r" (dest), "m" (dstW)
01062 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
01063 );
01064 return;
01065 case PIX_FMT_BGR555:
01066 YSCALEYUV2PACKEDX_ACCURATE
01067 YSCALEYUV2RGBX
01068
01069 #ifdef DITHER1XBPP
01070 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01071 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
01072 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01073 #endif
01074
01075 WRITEBGR15(%4, %5, %%REGa)
01076 YSCALEYUV2PACKEDX_END
01077 return;
01078 case PIX_FMT_BGR565:
01079 YSCALEYUV2PACKEDX_ACCURATE
01080 YSCALEYUV2RGBX
01081
01082 #ifdef DITHER1XBPP
01083 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01084 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
01085 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01086 #endif
01087
01088 WRITEBGR16(%4, %5, %%REGa)
01089 YSCALEYUV2PACKEDX_END
01090 return;
01091 case PIX_FMT_YUYV422:
01092 YSCALEYUV2PACKEDX_ACCURATE
01093
01094
01095 "psraw $3, %%mm3 \n\t"
01096 "psraw $3, %%mm4 \n\t"
01097 "psraw $3, %%mm1 \n\t"
01098 "psraw $3, %%mm7 \n\t"
01099 WRITEYUY2(%4, %5, %%REGa)
01100 YSCALEYUV2PACKEDX_END
01101 return;
01102 }
01103 }else{
01104 switch(c->dstFormat)
01105 {
01106 case PIX_FMT_RGB32:
01107 YSCALEYUV2PACKEDX
01108 YSCALEYUV2RGBX
01109 WRITEBGR32(%4, %5, %%REGa)
01110 YSCALEYUV2PACKEDX_END
01111 return;
01112 case PIX_FMT_BGR24:
01113 YSCALEYUV2PACKEDX
01114 YSCALEYUV2RGBX
01115 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
01116 "add %4, %%"REG_c" \n\t"
01117 WRITEBGR24(%%REGc, %5, %%REGa)
01118
01119 :: "r" (&c->redDither),
01120 "m" (dummy), "m" (dummy), "m" (dummy),
01121 "r" (dest), "m" (dstW)
01122 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
01123 );
01124 return;
01125 case PIX_FMT_BGR555:
01126 YSCALEYUV2PACKEDX
01127 YSCALEYUV2RGBX
01128
01129 #ifdef DITHER1XBPP
01130 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01131 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
01132 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01133 #endif
01134
01135 WRITEBGR15(%4, %5, %%REGa)
01136 YSCALEYUV2PACKEDX_END
01137 return;
01138 case PIX_FMT_BGR565:
01139 YSCALEYUV2PACKEDX
01140 YSCALEYUV2RGBX
01141
01142 #ifdef DITHER1XBPP
01143 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01144 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
01145 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01146 #endif
01147
01148 WRITEBGR16(%4, %5, %%REGa)
01149 YSCALEYUV2PACKEDX_END
01150 return;
01151 case PIX_FMT_YUYV422:
01152 YSCALEYUV2PACKEDX
01153
01154
01155 "psraw $3, %%mm3 \n\t"
01156 "psraw $3, %%mm4 \n\t"
01157 "psraw $3, %%mm1 \n\t"
01158 "psraw $3, %%mm7 \n\t"
01159 WRITEYUY2(%4, %5, %%REGa)
01160 YSCALEYUV2PACKEDX_END
01161 return;
01162 }
01163 }
01164 #endif
01165 #ifdef HAVE_ALTIVEC
01166
01167
01168 if (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
01169 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
01170 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)
01171 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
01172 chrFilter, chrSrc, chrFilterSize,
01173 dest, dstW, dstY);
01174 else
01175 #endif
01176 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
01177 chrFilter, chrSrc, chrFilterSize,
01178 dest, dstW, dstY);
01179 }
01180
01184 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
01185 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
01186 {
01187 int yalpha1=yalpha^4095;
01188 int uvalpha1=uvalpha^4095;
01189 int i;
01190
01191 #if 0 //isn't used
01192 if (flags&SWS_FULL_CHR_H_INT)
01193 {
01194 switch(dstFormat)
01195 {
01196 #ifdef HAVE_MMX
01197 case PIX_FMT_RGB32:
01198 asm volatile(
01199
01200
01201 FULL_YSCALEYUV2RGB
01202 "punpcklbw %%mm1, %%mm3 \n\t"
01203 "punpcklbw %%mm7, %%mm0 \n\t"
01204
01205 "movq %%mm3, %%mm1 \n\t"
01206 "punpcklwd %%mm0, %%mm3 \n\t"
01207 "punpckhwd %%mm0, %%mm1 \n\t"
01208
01209 MOVNTQ(%%mm3, (%4, %%REGa, 4))
01210 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
01211
01212 "add $4, %%"REG_a" \n\t"
01213 "cmp %5, %%"REG_a" \n\t"
01214 " jb 1b \n\t"
01215
01216 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
01217 "m" (yalpha1), "m" (uvalpha1)
01218 : "%"REG_a
01219 );
01220 break;
01221 case PIX_FMT_BGR24:
01222 asm volatile(
01223
01224 FULL_YSCALEYUV2RGB
01225
01226
01227 "punpcklbw %%mm1, %%mm3 \n\t"
01228 "punpcklbw %%mm7, %%mm0 \n\t"
01229
01230 "movq %%mm3, %%mm1 \n\t"
01231 "punpcklwd %%mm0, %%mm3 \n\t"
01232 "punpckhwd %%mm0, %%mm1 \n\t"
01233
01234 "movq %%mm3, %%mm2 \n\t"
01235 "psrlq $8, %%mm3 \n\t"
01236 "pand "MANGLE(bm00000111)", %%mm2 \n\t"
01237 "pand "MANGLE(bm11111000)", %%mm3 \n\t"
01238 "por %%mm2, %%mm3 \n\t"
01239 "movq %%mm1, %%mm2 \n\t"
01240 "psllq $48, %%mm1 \n\t"
01241 "por %%mm1, %%mm3 \n\t"
01242
01243 "movq %%mm2, %%mm1 \n\t"
01244 "psrld $16, %%mm2 \n\t"
01245 "psrlq $24, %%mm1 \n\t"
01246 "por %%mm2, %%mm1 \n\t"
01247
01248 "mov %4, %%"REG_b" \n\t"
01249 "add %%"REG_a", %%"REG_b" \n\t"
01250
01251 #ifdef HAVE_MMX2
01252
01253 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
01254 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
01255 #else
01256 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
01257 "psrlq $32, %%mm3 \n\t"
01258 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
01259 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
01260 #endif
01261 "add $4, %%"REG_a" \n\t"
01262 "cmp %5, %%"REG_a" \n\t"
01263 " jb 1b \n\t"
01264
01265 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
01266 "m" (yalpha1), "m" (uvalpha1)
01267 : "%"REG_a, "%"REG_b
01268 );
01269 break;
01270 case PIX_FMT_BGR555:
01271 asm volatile(
01272
01273 FULL_YSCALEYUV2RGB
01274 #ifdef DITHER1XBPP
01275 "paddusb "MANGLE(g5Dither)", %%mm1 \n\t"
01276 "paddusb "MANGLE(r5Dither)", %%mm0 \n\t"
01277 "paddusb "MANGLE(b5Dither)", %%mm3 \n\t"
01278 #endif
01279 "punpcklbw %%mm7, %%mm1 \n\t"
01280 "punpcklbw %%mm7, %%mm3 \n\t"
01281 "punpcklbw %%mm7, %%mm0 \n\t"
01282
01283 "psrlw $3, %%mm3 \n\t"
01284 "psllw $2, %%mm1 \n\t"
01285 "psllw $7, %%mm0 \n\t"
01286 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
01287 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
01288
01289 "por %%mm3, %%mm1 \n\t"
01290 "por %%mm1, %%mm0 \n\t"
01291
01292 MOVNTQ(%%mm0, (%4, %%REGa, 2))
01293
01294 "add $4, %%"REG_a" \n\t"
01295 "cmp %5, %%"REG_a" \n\t"
01296 " jb 1b \n\t"
01297
01298 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
01299 "m" (yalpha1), "m" (uvalpha1)
01300 : "%"REG_a
01301 );
01302 break;
01303 case PIX_FMT_BGR565:
01304 asm volatile(
01305
01306 FULL_YSCALEYUV2RGB
01307 #ifdef DITHER1XBPP
01308 "paddusb "MANGLE(g6Dither)", %%mm1 \n\t"
01309 "paddusb "MANGLE(r5Dither)", %%mm0 \n\t"
01310 "paddusb "MANGLE(b5Dither)", %%mm3 \n\t"
01311 #endif
01312 "punpcklbw %%mm7, %%mm1 \n\t"
01313 "punpcklbw %%mm7, %%mm3 \n\t"
01314 "punpcklbw %%mm7, %%mm0 \n\t"
01315
01316 "psrlw $3, %%mm3 \n\t"
01317 "psllw $3, %%mm1 \n\t"
01318 "psllw $8, %%mm0 \n\t"
01319 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
01320 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
01321
01322 "por %%mm3, %%mm1 \n\t"
01323 "por %%mm1, %%mm0 \n\t"
01324
01325 MOVNTQ(%%mm0, (%4, %%REGa, 2))
01326
01327 "add $4, %%"REG_a" \n\t"
01328 "cmp %5, %%"REG_a" \n\t"
01329 " jb 1b \n\t"
01330
01331 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
01332 "m" (yalpha1), "m" (uvalpha1)
01333 : "%"REG_a
01334 );
01335 break;
01336 #endif
01337 case PIX_FMT_BGR32:
01338 #ifndef HAVE_MMX
01339 case PIX_FMT_RGB32:
01340 #endif
01341 if (dstFormat==PIX_FMT_RGB32)
01342 {
01343 int i;
01344 #ifdef WORDS_BIGENDIAN
01345 dest++;
01346 #endif
01347 for (i=0;i<dstW;i++){
01348
01349 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01350 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01351 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01352 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
01353 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
01354 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
01355 dest+= 4;
01356 }
01357 }
01358 else if (dstFormat==PIX_FMT_BGR24)
01359 {
01360 int i;
01361 for (i=0;i<dstW;i++){
01362
01363 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01364 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01365 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01366 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
01367 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
01368 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
01369 dest+= 3;
01370 }
01371 }
01372 else if (dstFormat==PIX_FMT_BGR565)
01373 {
01374 int i;
01375 for (i=0;i<dstW;i++){
01376
01377 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01378 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01379 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01380
01381 ((uint16_t*)dest)[i] =
01382 clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
01383 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
01384 clip_table16r[(Y + yuvtab_3343[V]) >>13];
01385 }
01386 }
01387 else if (dstFormat==PIX_FMT_BGR555)
01388 {
01389 int i;
01390 for (i=0;i<dstW;i++){
01391
01392 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01393 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01394 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01395
01396 ((uint16_t*)dest)[i] =
01397 clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
01398 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
01399 clip_table15r[(Y + yuvtab_3343[V]) >>13];
01400 }
01401 }
01402 }
01403 else
01404 {
01405 #endif // if 0
01406 #ifdef HAVE_MMX
01407 switch(c->dstFormat)
01408 {
01409
01410 case PIX_FMT_RGB32:
01411 asm volatile(
01412 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01413 "mov %4, %%"REG_b" \n\t"
01414 "push %%"REG_BP" \n\t"
01415 YSCALEYUV2RGB(%%REGBP, %5)
01416 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
01417 "pop %%"REG_BP" \n\t"
01418 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01419
01420 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01421 "a" (&c->redDither)
01422 );
01423 return;
01424 case PIX_FMT_BGR24:
01425 asm volatile(
01426 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01427 "mov %4, %%"REG_b" \n\t"
01428 "push %%"REG_BP" \n\t"
01429 YSCALEYUV2RGB(%%REGBP, %5)
01430 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01431 "pop %%"REG_BP" \n\t"
01432 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01433 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01434 "a" (&c->redDither)
01435 );
01436 return;
01437 case PIX_FMT_BGR555:
01438 asm volatile(
01439 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01440 "mov %4, %%"REG_b" \n\t"
01441 "push %%"REG_BP" \n\t"
01442 YSCALEYUV2RGB(%%REGBP, %5)
01443
01444 #ifdef DITHER1XBPP
01445 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01446 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
01447 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01448 #endif
01449
01450 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
01451 "pop %%"REG_BP" \n\t"
01452 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01453
01454 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01455 "a" (&c->redDither)
01456 );
01457 return;
01458 case PIX_FMT_BGR565:
01459 asm volatile(
01460 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01461 "mov %4, %%"REG_b" \n\t"
01462 "push %%"REG_BP" \n\t"
01463 YSCALEYUV2RGB(%%REGBP, %5)
01464
01465 #ifdef DITHER1XBPP
01466 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01467 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
01468 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01469 #endif
01470
01471 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
01472 "pop %%"REG_BP" \n\t"
01473 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01474 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01475 "a" (&c->redDither)
01476 );
01477 return;
01478 case PIX_FMT_YUYV422:
01479 asm volatile(
01480 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01481 "mov %4, %%"REG_b" \n\t"
01482 "push %%"REG_BP" \n\t"
01483 YSCALEYUV2PACKED(%%REGBP, %5)
01484 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01485 "pop %%"REG_BP" \n\t"
01486 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01487 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01488 "a" (&c->redDither)
01489 );
01490 return;
01491 default: break;
01492 }
01493 #endif //HAVE_MMX
01494 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
01495 }
01496
01500 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
01501 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
01502 {
01503 const int yalpha1=0;
01504 int i;
01505
01506 uint16_t *buf1= buf0;
01507 const int yalpha= 4096;
01508
01509 if (flags&SWS_FULL_CHR_H_INT)
01510 {
01511 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
01512 return;
01513 }
01514
01515 #ifdef HAVE_MMX
01516 if (uvalpha < 2048)
01517 {
01518 switch(dstFormat)
01519 {
01520 case PIX_FMT_RGB32:
01521 asm volatile(
01522 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01523 "mov %4, %%"REG_b" \n\t"
01524 "push %%"REG_BP" \n\t"
01525 YSCALEYUV2RGB1(%%REGBP, %5)
01526 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
01527 "pop %%"REG_BP" \n\t"
01528 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01529
01530 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01531 "a" (&c->redDither)
01532 );
01533 return;
01534 case PIX_FMT_BGR24:
01535 asm volatile(
01536 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01537 "mov %4, %%"REG_b" \n\t"
01538 "push %%"REG_BP" \n\t"
01539 YSCALEYUV2RGB1(%%REGBP, %5)
01540 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01541 "pop %%"REG_BP" \n\t"
01542 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01543
01544 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01545 "a" (&c->redDither)
01546 );
01547 return;
01548 case PIX_FMT_BGR555:
01549 asm volatile(
01550 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01551 "mov %4, %%"REG_b" \n\t"
01552 "push %%"REG_BP" \n\t"
01553 YSCALEYUV2RGB1(%%REGBP, %5)
01554
01555 #ifdef DITHER1XBPP
01556 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01557 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
01558 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01559 #endif
01560 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
01561 "pop %%"REG_BP" \n\t"
01562 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01563
01564 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01565 "a" (&c->redDither)
01566 );
01567 return;
01568 case PIX_FMT_BGR565:
01569 asm volatile(
01570 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01571 "mov %4, %%"REG_b" \n\t"
01572 "push %%"REG_BP" \n\t"
01573 YSCALEYUV2RGB1(%%REGBP, %5)
01574
01575 #ifdef DITHER1XBPP
01576 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01577 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
01578 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01579 #endif
01580
01581 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
01582 "pop %%"REG_BP" \n\t"
01583 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01584
01585 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01586 "a" (&c->redDither)
01587 );
01588 return;
01589 case PIX_FMT_YUYV422:
01590 asm volatile(
01591 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01592 "mov %4, %%"REG_b" \n\t"
01593 "push %%"REG_BP" \n\t"
01594 YSCALEYUV2PACKED1(%%REGBP, %5)
01595 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01596 "pop %%"REG_BP" \n\t"
01597 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01598
01599 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01600 "a" (&c->redDither)
01601 );
01602 return;
01603 }
01604 }
01605 else
01606 {
01607 switch(dstFormat)
01608 {
01609 case PIX_FMT_RGB32:
01610 asm volatile(
01611 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01612 "mov %4, %%"REG_b" \n\t"
01613 "push %%"REG_BP" \n\t"
01614 YSCALEYUV2RGB1b(%%REGBP, %5)
01615 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
01616 "pop %%"REG_BP" \n\t"
01617 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01618
01619 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01620 "a" (&c->redDither)
01621 );
01622 return;
01623 case PIX_FMT_BGR24:
01624 asm volatile(
01625 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01626 "mov %4, %%"REG_b" \n\t"
01627 "push %%"REG_BP" \n\t"
01628 YSCALEYUV2RGB1b(%%REGBP, %5)
01629 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01630 "pop %%"REG_BP" \n\t"
01631 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01632
01633 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01634 "a" (&c->redDither)
01635 );
01636 return;
01637 case PIX_FMT_BGR555:
01638 asm volatile(
01639 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01640 "mov %4, %%"REG_b" \n\t"
01641 "push %%"REG_BP" \n\t"
01642 YSCALEYUV2RGB1b(%%REGBP, %5)
01643
01644 #ifdef DITHER1XBPP
01645 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01646 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
01647 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01648 #endif
01649 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
01650 "pop %%"REG_BP" \n\t"
01651 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01652
01653 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01654 "a" (&c->redDither)
01655 );
01656 return;
01657 case PIX_FMT_BGR565:
01658 asm volatile(
01659 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01660 "mov %4, %%"REG_b" \n\t"
01661 "push %%"REG_BP" \n\t"
01662 YSCALEYUV2RGB1b(%%REGBP, %5)
01663
01664 #ifdef DITHER1XBPP
01665 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
01666 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
01667 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
01668 #endif
01669
01670 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
01671 "pop %%"REG_BP" \n\t"
01672 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01673
01674 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01675 "a" (&c->redDither)
01676 );
01677 return;
01678 case PIX_FMT_YUYV422:
01679 asm volatile(
01680 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01681 "mov %4, %%"REG_b" \n\t"
01682 "push %%"REG_BP" \n\t"
01683 YSCALEYUV2PACKED1b(%%REGBP, %5)
01684 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01685 "pop %%"REG_BP" \n\t"
01686 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01687
01688 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01689 "a" (&c->redDither)
01690 );
01691 return;
01692 }
01693 }
01694 #endif
01695 if (uvalpha < 2048)
01696 {
01697 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
01698 }else{
01699 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
01700 }
01701 }
01702
01703
01704
01705 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
01706 {
01707 #ifdef HAVE_MMX
01708 asm volatile(
01709 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
01710 "mov %0, %%"REG_a" \n\t"
01711 "1: \n\t"
01712 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01713 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01714 "pand %%mm2, %%mm0 \n\t"
01715 "pand %%mm2, %%mm1 \n\t"
01716 "packuswb %%mm1, %%mm0 \n\t"
01717 "movq %%mm0, (%2, %%"REG_a") \n\t"
01718 "add $8, %%"REG_a" \n\t"
01719 " js 1b \n\t"
01720 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
01721 : "%"REG_a
01722 );
01723 #else
01724 int i;
01725 for (i=0; i<width; i++)
01726 dst[i]= src[2*i];
01727 #endif
01728 }
01729
01730 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
01731 {
01732 #ifdef HAVE_MMX
01733 asm volatile(
01734 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01735 "mov %0, %%"REG_a" \n\t"
01736 "1: \n\t"
01737 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01738 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01739 "psrlw $8, %%mm0 \n\t"
01740 "psrlw $8, %%mm1 \n\t"
01741 "packuswb %%mm1, %%mm0 \n\t"
01742 "movq %%mm0, %%mm1 \n\t"
01743 "psrlw $8, %%mm0 \n\t"
01744 "pand %%mm4, %%mm1 \n\t"
01745 "packuswb %%mm0, %%mm0 \n\t"
01746 "packuswb %%mm1, %%mm1 \n\t"
01747 "movd %%mm0, (%3, %%"REG_a") \n\t"
01748 "movd %%mm1, (%2, %%"REG_a") \n\t"
01749 "add $4, %%"REG_a" \n\t"
01750 " js 1b \n\t"
01751 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01752 : "%"REG_a
01753 );
01754 #else
01755 int i;
01756 for (i=0; i<width; i++)
01757 {
01758 dstU[i]= src1[4*i + 1];
01759 dstV[i]= src1[4*i + 3];
01760 }
01761 #endif
01762 assert(src1 == src2);
01763 }
01764
01765
01766 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
01767 {
01768 #ifdef HAVE_MMX
01769 asm volatile(
01770 "mov %0, %%"REG_a" \n\t"
01771 "1: \n\t"
01772 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01773 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01774 "psrlw $8, %%mm0 \n\t"
01775 "psrlw $8, %%mm1 \n\t"
01776 "packuswb %%mm1, %%mm0 \n\t"
01777 "movq %%mm0, (%2, %%"REG_a") \n\t"
01778 "add $8, %%"REG_a" \n\t"
01779 " js 1b \n\t"
01780 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
01781 : "%"REG_a
01782 );
01783 #else
01784 int i;
01785 for (i=0; i<width; i++)
01786 dst[i]= src[2*i+1];
01787 #endif
01788 }
01789
01790 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
01791 {
01792 #ifdef HAVE_MMX
01793 asm volatile(
01794 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01795 "mov %0, %%"REG_a" \n\t"
01796 "1: \n\t"
01797 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01798 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01799 "pand %%mm4, %%mm0 \n\t"
01800 "pand %%mm4, %%mm1 \n\t"
01801 "packuswb %%mm1, %%mm0 \n\t"
01802 "movq %%mm0, %%mm1 \n\t"
01803 "psrlw $8, %%mm0 \n\t"
01804 "pand %%mm4, %%mm1 \n\t"
01805 "packuswb %%mm0, %%mm0 \n\t"
01806 "packuswb %%mm1, %%mm1 \n\t"
01807 "movd %%mm0, (%3, %%"REG_a") \n\t"
01808 "movd %%mm1, (%2, %%"REG_a") \n\t"
01809 "add $4, %%"REG_a" \n\t"
01810 " js 1b \n\t"
01811 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01812 : "%"REG_a
01813 );
01814 #else
01815 int i;
01816 for (i=0; i<width; i++)
01817 {
01818 dstU[i]= src1[4*i + 0];
01819 dstV[i]= src1[4*i + 2];
01820 }
01821 #endif
01822 assert(src1 == src2);
01823 }
01824
01825 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
01826 {
01827 int i;
01828 for (i=0; i<width; i++)
01829 {
01830 int b= ((uint32_t*)src)[i]&0xFF;
01831 int g= (((uint32_t*)src)[i]>>8)&0xFF;
01832 int r= (((uint32_t*)src)[i]>>16)&0xFF;
01833
01834 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01835 }
01836 }
01837
01838 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01839 {
01840 int i;
01841 assert(src1 == src2);
01842 for (i=0; i<width; i++)
01843 {
01844 const int a= ((uint32_t*)src1)[2*i+0];
01845 const int e= ((uint32_t*)src1)[2*i+1];
01846 const int l= (a&0xFF00FF) + (e&0xFF00FF);
01847 const int h= (a&0x00FF00) + (e&0x00FF00);
01848 const int b= l&0x3FF;
01849 const int g= h>>8;
01850 const int r= l>>16;
01851
01852 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
01853 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
01854 }
01855 }
01856
01857 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
01858 {
01859 #ifdef HAVE_MMX
01860 asm volatile(
01861 "mov %2, %%"REG_a" \n\t"
01862 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
01863 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01864 "pxor %%mm7, %%mm7 \n\t"
01865 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
01866 ASMALIGN(4)
01867 "1: \n\t"
01868 PREFETCH" 64(%0, %%"REG_d") \n\t"
01869 "movd (%0, %%"REG_d"), %%mm0 \n\t"
01870 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
01871 "punpcklbw %%mm7, %%mm0 \n\t"
01872 "punpcklbw %%mm7, %%mm1 \n\t"
01873 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
01874 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
01875 "punpcklbw %%mm7, %%mm2 \n\t"
01876 "punpcklbw %%mm7, %%mm3 \n\t"
01877 "pmaddwd %%mm6, %%mm0 \n\t"
01878 "pmaddwd %%mm6, %%mm1 \n\t"
01879 "pmaddwd %%mm6, %%mm2 \n\t"
01880 "pmaddwd %%mm6, %%mm3 \n\t"
01881 #ifndef FAST_BGR2YV12
01882 "psrad $8, %%mm0 \n\t"
01883 "psrad $8, %%mm1 \n\t"
01884 "psrad $8, %%mm2 \n\t"
01885 "psrad $8, %%mm3 \n\t"
01886 #endif
01887 "packssdw %%mm1, %%mm0 \n\t"
01888 "packssdw %%mm3, %%mm2 \n\t"
01889 "pmaddwd %%mm5, %%mm0 \n\t"
01890 "pmaddwd %%mm5, %%mm2 \n\t"
01891 "packssdw %%mm2, %%mm0 \n\t"
01892 "psraw $7, %%mm0 \n\t"
01893
01894 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
01895 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
01896 "punpcklbw %%mm7, %%mm4 \n\t"
01897 "punpcklbw %%mm7, %%mm1 \n\t"
01898 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
01899 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
01900 "punpcklbw %%mm7, %%mm2 \n\t"
01901 "punpcklbw %%mm7, %%mm3 \n\t"
01902 "pmaddwd %%mm6, %%mm4 \n\t"
01903 "pmaddwd %%mm6, %%mm1 \n\t"
01904 "pmaddwd %%mm6, %%mm2 \n\t"
01905 "pmaddwd %%mm6, %%mm3 \n\t"
01906 #ifndef FAST_BGR2YV12
01907 "psrad $8, %%mm4 \n\t"
01908 "psrad $8, %%mm1 \n\t"
01909 "psrad $8, %%mm2 \n\t"
01910 "psrad $8, %%mm3 \n\t"
01911 #endif
01912 "packssdw %%mm1, %%mm4 \n\t"
01913 "packssdw %%mm3, %%mm2 \n\t"
01914 "pmaddwd %%mm5, %%mm4 \n\t"
01915 "pmaddwd %%mm5, %%mm2 \n\t"
01916 "add $24, %%"REG_d" \n\t"
01917 "packssdw %%mm2, %%mm4 \n\t"
01918 "psraw $7, %%mm4 \n\t"
01919
01920 "packuswb %%mm4, %%mm0 \n\t"
01921 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
01922
01923 "movq %%mm0, (%1, %%"REG_a") \n\t"
01924 "add $8, %%"REG_a" \n\t"
01925 " js 1b \n\t"
01926 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
01927 : "%"REG_a, "%"REG_d
01928 );
01929 #else
01930 int i;
01931 for (i=0; i<width; i++)
01932 {
01933 int b= src[i*3+0];
01934 int g= src[i*3+1];
01935 int r= src[i*3+2];
01936
01937 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01938 }
01939 #endif
01940 }
01941
01942 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
01943 {
01944 #ifdef HAVE_MMX
01945 asm volatile(
01946 "mov %3, %%"REG_a" \n\t"
01947 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01948 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
01949 "pxor %%mm7, %%mm7 \n\t"
01950 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
01951 "add %%"REG_d", %%"REG_d" \n\t"
01952 ASMALIGN(4)
01953 "1: \n\t"
01954 PREFETCH" 64(%0, %%"REG_d") \n\t"
01955 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01956 "movq (%0, %%"REG_d"), %%mm0 \n\t"
01957 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
01958 "movq %%mm0, %%mm1 \n\t"
01959 "movq %%mm2, %%mm3 \n\t"
01960 "psrlq $24, %%mm0 \n\t"
01961 "psrlq $24, %%mm2 \n\t"
01962 PAVGB(%%mm1, %%mm0)
01963 PAVGB(%%mm3, %%mm2)
01964 "punpcklbw %%mm7, %%mm0 \n\t"
01965 "punpcklbw %%mm7, %%mm2 \n\t"
01966 #else
01967 "movd (%0, %%"REG_d"), %%mm0 \n\t"
01968 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
01969 "punpcklbw %%mm7, %%mm0 \n\t"
01970 "punpcklbw %%mm7, %%mm2 \n\t"
01971 "paddw %%mm2, %%mm0 \n\t"
01972 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
01973 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
01974 "punpcklbw %%mm7, %%mm4 \n\t"
01975 "punpcklbw %%mm7, %%mm2 \n\t"
01976 "paddw %%mm4, %%mm2 \n\t"
01977 "psrlw $1, %%mm0 \n\t"
01978 "psrlw $1, %%mm2 \n\t"
01979 #endif
01980 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
01981 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
01982
01983 "pmaddwd %%mm0, %%mm1 \n\t"
01984 "pmaddwd %%mm2, %%mm3 \n\t"
01985 "pmaddwd %%mm6, %%mm0 \n\t"
01986 "pmaddwd %%mm6, %%mm2 \n\t"
01987 #ifndef FAST_BGR2YV12
01988 "psrad $8, %%mm0 \n\t"
01989 "psrad $8, %%mm1 \n\t"
01990 "psrad $8, %%mm2 \n\t"
01991 "psrad $8, %%mm3 \n\t"
01992 #endif
01993 "packssdw %%mm2, %%mm0 \n\t"
01994 "packssdw %%mm3, %%mm1 \n\t"
01995 "pmaddwd %%mm5, %%mm0 \n\t"
01996 "pmaddwd %%mm5, %%mm1 \n\t"
01997 "packssdw %%mm1, %%mm0 \n\t"
01998 "psraw $7, %%mm0 \n\t"
01999
02000 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02001 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
02002 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
02003 "movq %%mm4, %%mm1 \n\t"
02004 "movq %%mm2, %%mm3 \n\t"
02005 "psrlq $24, %%mm4 \n\t"
02006 "psrlq $24, %%mm2 \n\t"
02007 PAVGB(%%mm1, %%mm4)
02008 PAVGB(%%mm3, %%mm2)
02009 "punpcklbw %%mm7, %%mm4 \n\t"
02010 "punpcklbw %%mm7, %%mm2 \n\t"
02011 #else
02012 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02013 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
02014 "punpcklbw %%mm7, %%mm4 \n\t"
02015 "punpcklbw %%mm7, %%mm2 \n\t"
02016 "paddw %%mm2, %%mm4 \n\t"
02017 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
02018 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
02019 "punpcklbw %%mm7, %%mm5 \n\t"
02020 "punpcklbw %%mm7, %%mm2 \n\t"
02021 "paddw %%mm5, %%mm2 \n\t"
02022 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02023 "psrlw $2, %%mm4 \n\t"
02024 "psrlw $2, %%mm2 \n\t"
02025 #endif
02026 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02027 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02028
02029 "pmaddwd %%mm4, %%mm1 \n\t"
02030 "pmaddwd %%mm2, %%mm3 \n\t"
02031 "pmaddwd %%mm6, %%mm4 \n\t"
02032 "pmaddwd %%mm6, %%mm2 \n\t"
02033 #ifndef FAST_BGR2YV12
02034 "psrad $8, %%mm4 \n\t"
02035 "psrad $8, %%mm1 \n\t"
02036 "psrad $8, %%mm2 \n\t"
02037 "psrad $8, %%mm3 \n\t"
02038 #endif
02039 "packssdw %%mm2, %%mm4 \n\t"
02040 "packssdw %%mm3, %%mm1 \n\t"
02041 "pmaddwd %%mm5, %%mm4 \n\t"
02042 "pmaddwd %%mm5, %%mm1 \n\t"
02043 "add $24, %%"REG_d" \n\t"
02044 "packssdw %%mm1, %%mm4 \n\t"
02045 "psraw $7, %%mm4 \n\t"
02046
02047 "movq %%mm0, %%mm1 \n\t"
02048 "punpckldq %%mm4, %%mm0 \n\t"
02049 "punpckhdq %%mm4, %%mm1 \n\t"
02050 "packsswb %%mm1, %%mm0 \n\t"
02051 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
02052
02053 "movd %%mm0, (%1, %%"REG_a") \n\t"
02054 "punpckhdq %%mm0, %%mm0 \n\t"
02055 "movd %%mm0, (%2, %%"REG_a") \n\t"
02056 "add $4, %%"REG_a" \n\t"
02057 " js 1b \n\t"
02058 : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
02059 : "%"REG_a, "%"REG_d
02060 );
02061 #else
02062 int i;
02063 for (i=0; i<width; i++)
02064 {
02065 int b= src1[6*i + 0] + src1[6*i + 3];
02066 int g= src1[6*i + 1] + src1[6*i + 4];
02067 int r= src1[6*i + 2] + src1[6*i + 5];
02068
02069 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
02070 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
02071 }
02072 #endif
02073 assert(src1 == src2);
02074 }
02075
02076 static inline void RENAME(rgb16ToY)(uint8_t *dst, uint8_t *src, int width)
02077 {
02078 int i;
02079 for (i=0; i<width; i++)
02080 {
02081 int d= ((uint16_t*)src)[i];
02082 int b= d&0x1F;
02083 int g= (d>>5)&0x3F;
02084 int r= (d>>11)&0x1F;
02085
02086 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
02087 }
02088 }
02089
02090 static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02091 {
02092 int i;
02093 assert(src1==src2);
02094 for (i=0; i<width; i++)
02095 {
02096 int d0= ((uint32_t*)src1)[i];
02097
02098 int dl= (d0&0x07E0F81F);
02099 int dh= ((d0>>5)&0x07C0F83F);
02100
02101 int dh2= (dh>>11) + (dh<<21);
02102 int d= dh2 + dl;
02103
02104 int b= d&0x7F;
02105 int r= (d>>11)&0x7F;
02106 int g= d>>21;
02107 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
02108 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
02109 }
02110 }
02111
02112 static inline void RENAME(rgb15ToY)(uint8_t *dst, uint8_t *src, int width)
02113 {
02114 int i;
02115 for (i=0; i<width; i++)
02116 {
02117 int d= ((uint16_t*)src)[i];
02118 int b= d&0x1F;
02119 int g= (d>>5)&0x1F;
02120 int r= (d>>10)&0x1F;
02121
02122 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
02123 }
02124 }
02125
02126 static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02127 {
02128 int i;
02129 assert(src1==src2);
02130 for (i=0; i<width; i++)
02131 {
02132 int d0= ((uint32_t*)src1)[i];
02133
02134 int dl= (d0&0x03E07C1F);
02135 int dh= ((d0>>5)&0x03E0F81F);
02136
02137 int dh2= (dh>>11) + (dh<<21);
02138 int d= dh2 + dl;
02139
02140 int b= d&0x7F;
02141 int r= (d>>10)&0x7F;
02142 int g= d>>21;
02143 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
02144 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
02145 }
02146 }
02147
02148
02149 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
02150 {
02151 int i;
02152 for (i=0; i<width; i++)
02153 {
02154 int r= ((uint32_t*)src)[i]&0xFF;
02155 int g= (((uint32_t*)src)[i]>>8)&0xFF;
02156 int b= (((uint32_t*)src)[i]>>16)&0xFF;
02157
02158 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
02159 }
02160 }
02161
02162 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02163 {
02164 int i;
02165 assert(src1==src2);
02166 for (i=0; i<width; i++)
02167 {
02168 const int a= ((uint32_t*)src1)[2*i+0];
02169 const int e= ((uint32_t*)src1)[2*i+1];
02170 const int l= (a&0xFF00FF) + (e&0xFF00FF);
02171 const int h= (a&0x00FF00) + (e&0x00FF00);
02172 const int r= l&0x3FF;
02173 const int g= h>>8;
02174 const int b= l>>16;
02175
02176 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
02177 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
02178 }
02179 }
02180
02181 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
02182 {
02183 int i;
02184 for (i=0; i<width; i++)
02185 {
02186 int r= src[i*3+0];
02187 int g= src[i*3+1];
02188 int b= src[i*3+2];
02189
02190 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
02191 }
02192 }
02193
02194 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02195 {
02196 int i;
02197 assert(src1==src2);
02198 for (i=0; i<width; i++)
02199 {
02200 int r= src1[6*i + 0] + src1[6*i + 3];
02201 int g= src1[6*i + 1] + src1[6*i + 4];
02202 int b= src1[6*i + 2] + src1[6*i + 5];
02203
02204 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
02205 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
02206 }
02207 }
02208
02209 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
02210 {
02211 int i;
02212 for (i=0; i<width; i++)
02213 {
02214 int d= ((uint16_t*)src)[i];
02215 int r= d&0x1F;
02216 int g= (d>>5)&0x3F;
02217 int b= (d>>11)&0x1F;
02218
02219 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
02220 }
02221 }
02222
02223 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02224 {
02225 int i;
02226 assert(src1 == src2);
02227 for (i=0; i<width; i++)
02228 {
02229 int d0= ((uint32_t*)src1)[i];
02230
02231 int dl= (d0&0x07E0F81F);
02232 int d= dl + (((d0>>16) + (d0<<16))&0x07E0F81F);
02233
02234 int r= d&0x3F;
02235 int b= (d>>11)&0x3F;
02236 int g= d>>21;
02237 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
02238 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
02239 }
02240 }
02241
02242 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width)
02243 {
02244 int i;
02245 for (i=0; i<width; i++)
02246 {
02247 int d= ((uint16_t*)src)[i];
02248 int r= d&0x1F;
02249 int g= (d>>5)&0x1F;
02250 int b= (d>>10)&0x1F;
02251
02252 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
02253 }
02254 }
02255
02256 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02257 {
02258 int i;
02259 assert(src1 == src2);
02260 for (i=0; i<width; i++)
02261 {
02262 int d0= ((uint32_t*)src1)[i];
02263
02264 int dl= (d0&0x03E07C1F);
02265 int d= dl + (((d0>>16) + (d0<<16))&0x03E07C1F);
02266
02267 int r= d&0x3F;
02268 int b= (d>>10)&0x3F;
02269 int g= d>>21;
02270 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
02271 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
02272 }
02273 }
02274
02275 static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, int width, uint32_t *pal)
02276 {
02277 int i;
02278 for (i=0; i<width; i++)
02279 {
02280 int d= src[i];
02281
02282 dst[i]= pal[d] & 0xFF;
02283 }
02284 }
02285
02286 static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width, uint32_t *pal)
02287 {
02288 int i;
02289 assert(src1 == src2);
02290 for (i=0; i<width; i++)
02291 {
02292 int p= pal[src1[i]];
02293
02294 dstU[i]= p>>8;
02295 dstV[i]= p>>16;
02296 }
02297 }
02298
02299
02300 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
02301 int16_t *filter, int16_t *filterPos, long filterSize)
02302 {
02303 #ifdef HAVE_MMX
02304 assert(filterSize % 4 == 0 && filterSize>0);
02305 if (filterSize==4)
02306 {
02307 long counter= -2*dstW;
02308 filter-= counter*2;
02309 filterPos-= counter/2;
02310 dst-= counter/2;
02311 asm volatile(
02312 #if defined(PIC)
02313 "push %%"REG_b" \n\t"
02314 #endif
02315 "pxor %%mm7, %%mm7 \n\t"
02316 "movq "MANGLE(w02)", %%mm6 \n\t"
02317 "push %%"REG_BP" \n\t"
02318 "mov %%"REG_a", %%"REG_BP" \n\t"
02319 ASMALIGN(4)
02320 "1: \n\t"
02321 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02322 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02323 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
02324 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
02325 "movd (%3, %%"REG_a"), %%mm0 \n\t"
02326 "movd (%3, %%"REG_b"), %%mm2 \n\t"
02327 "punpcklbw %%mm7, %%mm0 \n\t"
02328 "punpcklbw %%mm7, %%mm2 \n\t"
02329 "pmaddwd %%mm1, %%mm0 \n\t"
02330 "pmaddwd %%mm2, %%mm3 \n\t"
02331 "psrad $8, %%mm0 \n\t"
02332 "psrad $8, %%mm3 \n\t"
02333 "packssdw %%mm3, %%mm0 \n\t"
02334 "pmaddwd %%mm6, %%mm0 \n\t"
02335 "packssdw %%mm0, %%mm0 \n\t"
02336 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02337 "add $4, %%"REG_BP" \n\t"
02338 " jnc 1b \n\t"
02339
02340 "pop %%"REG_BP" \n\t"
02341 #if defined(PIC)
02342 "pop %%"REG_b" \n\t"
02343 #endif
02344 : "+a" (counter)
02345 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02346 #if !defined(PIC)
02347 : "%"REG_b
02348 #endif
02349 );
02350 }
02351 else if (filterSize==8)
02352 {
02353 long counter= -2*dstW;
02354 filter-= counter*4;
02355 filterPos-= counter/2;
02356 dst-= counter/2;
02357 asm volatile(
02358 #if defined(PIC)
02359 "push %%"REG_b" \n\t"
02360 #endif
02361 "pxor %%mm7, %%mm7 \n\t"
02362 "movq "MANGLE(w02)", %%mm6 \n\t"
02363 "push %%"REG_BP" \n\t"
02364 "mov %%"REG_a", %%"REG_BP" \n\t"
02365 ASMALIGN(4)
02366 "1: \n\t"
02367 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02368 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02369 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
02370 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
02371 "movd (%3, %%"REG_a"), %%mm0 \n\t"
02372 "movd (%3, %%"REG_b"), %%mm2 \n\t"
02373 "punpcklbw %%mm7, %%mm0 \n\t"
02374 "punpcklbw %%mm7, %%mm2 \n\t"
02375 "pmaddwd %%mm1, %%mm0 \n\t"
02376 "pmaddwd %%mm2, %%mm3 \n\t"
02377
02378 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
02379 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
02380 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
02381 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
02382 "punpcklbw %%mm7, %%mm4 \n\t"
02383 "punpcklbw %%mm7, %%mm2 \n\t"
02384 "pmaddwd %%mm1, %%mm4 \n\t"
02385 "pmaddwd %%mm2, %%mm5 \n\t"
02386 "paddd %%mm4, %%mm0 \n\t"
02387 "paddd %%mm5, %%mm3 \n\t"
02388
02389 "psrad $8, %%mm0 \n\t"
02390 "psrad $8, %%mm3 \n\t"
02391 "packssdw %%mm3, %%mm0 \n\t"
02392 "pmaddwd %%mm6, %%mm0 \n\t"
02393 "packssdw %%mm0, %%mm0 \n\t"
02394 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02395 "add $4, %%"REG_BP" \n\t"
02396 " jnc 1b \n\t"
02397
02398 "pop %%"REG_BP" \n\t"
02399 #if defined(PIC)
02400 "pop %%"REG_b" \n\t"
02401 #endif
02402 : "+a" (counter)
02403 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02404 #if !defined(PIC)
02405 : "%"REG_b
02406 #endif
02407 );
02408 }
02409 else
02410 {
02411 uint8_t *offset = src+filterSize;
02412 long counter= -2*dstW;
02413
02414 filterPos-= counter/2;
02415 dst-= counter/2;
02416 asm volatile(
02417 "pxor %%mm7, %%mm7 \n\t"
02418 "movq "MANGLE(w02)", %%mm6 \n\t"
02419 ASMALIGN(4)
02420 "1: \n\t"
02421 "mov %2, %%"REG_c" \n\t"
02422 "movzwl (%%"REG_c", %0), %%eax \n\t"
02423 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
02424 "mov %5, %%"REG_c" \n\t"
02425 "pxor %%mm4, %%mm4 \n\t"
02426 "pxor %%mm5, %%mm5 \n\t"
02427 "2: \n\t"
02428 "movq (%1), %%mm1 \n\t"
02429 "movq (%1, %6), %%mm3 \n\t"
02430 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
02431 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
02432 "punpcklbw %%mm7, %%mm0 \n\t"
02433 "punpcklbw %%mm7, %%mm2 \n\t"
02434 "pmaddwd %%mm1, %%mm0 \n\t"
02435 "pmaddwd %%mm2, %%mm3 \n\t"
02436 "paddd %%mm3, %%mm5 \n\t"
02437 "paddd %%mm0, %%mm4 \n\t"
02438 "add $8, %1 \n\t"
02439 "add $4, %%"REG_c" \n\t"
02440 "cmp %4, %%"REG_c" \n\t"
02441 " jb 2b \n\t"
02442 "add %6, %1 \n\t"
02443 "psrad $8, %%mm4 \n\t"
02444 "psrad $8, %%mm5 \n\t"
02445 "packssdw %%mm5, %%mm4 \n\t"
02446 "pmaddwd %%mm6, %%mm4 \n\t"
02447 "packssdw %%mm4, %%mm4 \n\t"
02448 "mov %3, %%"REG_a" \n\t"
02449 "movd %%mm4, (%%"REG_a", %0) \n\t"
02450 "add $4, %0 \n\t"
02451 " jnc 1b \n\t"
02452
02453 : "+r" (counter), "+r" (filter)
02454 : "m" (filterPos), "m" (dst), "m"(offset),
02455 "m" (src), "r" (filterSize*2)
02456 : "%"REG_a, "%"REG_c, "%"REG_d
02457 );
02458 }
02459 #else
02460 #ifdef HAVE_ALTIVEC
02461 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
02462 #else
02463 int i;
02464 for (i=0; i<dstW; i++)
02465 {
02466 int j;
02467 int srcPos= filterPos[i];
02468 int val=0;
02469
02470 for (j=0; j<filterSize; j++)
02471 {
02472
02473 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02474 }
02475
02476 dst[i] = av_clip(val>>7, 0, (1<<15)-1);
02477
02478 }
02479 #endif
02480 #endif
02481 }
02482
02483 static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
02484 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
02485 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
02486 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
02487 int32_t *mmx2FilterPos, uint8_t *pal)
02488 {
02489 if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
02490 {
02491 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
02492 src= formatConvBuffer;
02493 }
02494 else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
02495 {
02496 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
02497 src= formatConvBuffer;
02498 }
02499 else if (srcFormat==PIX_FMT_RGB32)
02500 {
02501 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
02502 src= formatConvBuffer;
02503 }
02504 else if (srcFormat==PIX_FMT_BGR24)
02505 {
02506 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
02507 src= formatConvBuffer;
02508 }
02509 else if (srcFormat==PIX_FMT_BGR565)
02510 {
02511 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
02512 src= formatConvBuffer;
02513 }
02514 else if (srcFormat==PIX_FMT_BGR555)
02515 {
02516 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
02517 src= formatConvBuffer;
02518 }
02519 else if (srcFormat==PIX_FMT_BGR32)
02520 {
02521 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
02522 src= formatConvBuffer;
02523 }
02524 else if (srcFormat==PIX_FMT_RGB24)
02525 {
02526 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
02527 src= formatConvBuffer;
02528 }
02529 else if (srcFormat==PIX_FMT_RGB565)
02530 {
02531 RENAME(rgb16ToY)(formatConvBuffer, src, srcW);
02532 src= formatConvBuffer;
02533 }
02534 else if (srcFormat==PIX_FMT_RGB555)
02535 {
02536 RENAME(rgb15ToY)(formatConvBuffer, src, srcW);
02537 src= formatConvBuffer;
02538 }
02539 else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
02540 {
02541 RENAME(palToY)(formatConvBuffer, src, srcW, pal);
02542 src= formatConvBuffer;
02543 }
02544
02545 #ifdef HAVE_MMX
02546
02547 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
02548 #else
02549 if (!(flags&SWS_FAST_BILINEAR))
02550 #endif
02551 {
02552 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
02553 }
02554 else
02555 {
02556 #if defined(ARCH_X86)
02557 #ifdef HAVE_MMX2
02558 int i;
02559 #if defined(PIC)
02560 uint64_t ebxsave __attribute__((aligned(8)));
02561 #endif
02562 if (canMMX2BeUsed)
02563 {
02564 asm volatile(
02565 #if defined(PIC)
02566 "mov %%"REG_b", %5 \n\t"
02567 #endif
02568 "pxor %%mm7, %%mm7 \n\t"
02569 "mov %0, %%"REG_c" \n\t"
02570 "mov %1, %%"REG_D" \n\t"
02571 "mov %2, %%"REG_d" \n\t"
02572 "mov %3, %%"REG_b" \n\t"
02573 "xor %%"REG_a", %%"REG_a" \n\t"
02574 PREFETCH" (%%"REG_c") \n\t"
02575 PREFETCH" 32(%%"REG_c") \n\t"
02576 PREFETCH" 64(%%"REG_c") \n\t"
02577
02578 #ifdef ARCH_X86_64
02579
02580 #define FUNNY_Y_CODE \
02581 "movl (%%"REG_b"), %%esi \n\t"\
02582 "call *%4 \n\t"\
02583 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
02584 "add %%"REG_S", %%"REG_c" \n\t"\
02585 "add %%"REG_a", %%"REG_D" \n\t"\
02586 "xor %%"REG_a", %%"REG_a" \n\t"\
02587
02588 #else
02589
02590 #define FUNNY_Y_CODE \
02591 "movl (%%"REG_b"), %%esi \n\t"\
02592 "call *%4 \n\t"\
02593 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
02594 "add %%"REG_a", %%"REG_D" \n\t"\
02595 "xor %%"REG_a", %%"REG_a" \n\t"\
02596
02597 #endif
02598
02599 FUNNY_Y_CODE
02600 FUNNY_Y_CODE
02601 FUNNY_Y_CODE
02602 FUNNY_Y_CODE
02603 FUNNY_Y_CODE
02604 FUNNY_Y_CODE
02605 FUNNY_Y_CODE
02606 FUNNY_Y_CODE
02607
02608 #if defined(PIC)
02609 "mov %5, %%"REG_b" \n\t"
02610 #endif
02611 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
02612 "m" (funnyYCode)
02613 #if defined(PIC)
02614 ,"m" (ebxsave)
02615 #endif
02616 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02617 #if !defined(PIC)
02618 ,"%"REG_b
02619 #endif
02620 );
02621 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
02622 }
02623 else
02624 {
02625 #endif
02626 long xInc_shr16 = xInc >> 16;
02627 uint16_t xInc_mask = xInc & 0xffff;
02628
02629 asm volatile(
02630 "xor %%"REG_a", %%"REG_a" \n\t"
02631 "xor %%"REG_d", %%"REG_d" \n\t"
02632 "xorl %%ecx, %%ecx \n\t"
02633 ASMALIGN(4)
02634 "1: \n\t"
02635 "movzbl (%0, %%"REG_d"), %%edi \n\t"
02636 "movzbl 1(%0, %%"REG_d"), %%esi \n\t"
02637 "subl %%edi, %%esi \n\t"
02638 "imull %%ecx, %%esi \n\t"
02639 "shll $16, %%edi \n\t"
02640 "addl %%edi, %%esi \n\t"
02641 "mov %1, %%"REG_D" \n\t"
02642 "shrl $9, %%esi \n\t"
02643 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
02644 "addw %4, %%cx \n\t"
02645 "adc %3, %%"REG_d" \n\t"
02646
02647 "movzbl (%0, %%"REG_d"), %%edi \n\t"
02648 "movzbl 1(%0, %%"REG_d"), %%esi \n\t"
02649 "subl %%edi, %%esi \n\t"
02650 "imull %%ecx, %%esi \n\t"
02651 "shll $16, %%edi \n\t"
02652 "addl %%edi, %%esi \n\t"
02653 "mov %1, %%"REG_D" \n\t"
02654 "shrl $9, %%esi \n\t"
02655 "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
02656 "addw %4, %%cx \n\t"
02657 "adc %3, %%"REG_d" \n\t"
02658
02659
02660 "add $2, %%"REG_a" \n\t"
02661 "cmp %2, %%"REG_a" \n\t"
02662 " jb 1b \n\t"
02663
02664
02665 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
02666 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
02667 );
02668 #ifdef HAVE_MMX2
02669 }
02670 #endif
02671 #else
02672 int i;
02673 unsigned int xpos=0;
02674 for (i=0;i<dstWidth;i++)
02675 {
02676 register unsigned int xx=xpos>>16;
02677 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02678 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02679 xpos+=xInc;
02680 }
02681 #endif
02682 }
02683 }
02684
02685 inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
02686 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
02687 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
02688 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
02689 int32_t *mmx2FilterPos, uint8_t *pal)
02690 {
02691 if (srcFormat==PIX_FMT_YUYV422)
02692 {
02693 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02694 src1= formatConvBuffer;
02695 src2= formatConvBuffer+2048;
02696 }
02697 else if (srcFormat==PIX_FMT_UYVY422)
02698 {
02699 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02700 src1= formatConvBuffer;
02701 src2= formatConvBuffer+2048;
02702 }
02703 else if (srcFormat==PIX_FMT_RGB32)
02704 {
02705 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02706 src1= formatConvBuffer;
02707 src2= formatConvBuffer+2048;
02708 }
02709 else if (srcFormat==PIX_FMT_BGR24)
02710 {
02711 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02712 src1= formatConvBuffer;
02713 src2= formatConvBuffer+2048;
02714 }
02715 else if (srcFormat==PIX_FMT_BGR565)
02716 {
02717 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02718 src1= formatConvBuffer;
02719 src2= formatConvBuffer+2048;
02720 }
02721 else if (srcFormat==PIX_FMT_BGR555)
02722 {
02723 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02724 src1= formatConvBuffer;
02725 src2= formatConvBuffer+2048;
02726 }
02727 else if (srcFormat==PIX_FMT_BGR32)
02728 {
02729 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02730 src1= formatConvBuffer;
02731 src2= formatConvBuffer+2048;
02732 }
02733 else if (srcFormat==PIX_FMT_RGB24)
02734 {
02735 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02736 src1= formatConvBuffer;
02737 src2= formatConvBuffer+2048;
02738 }
02739 else if (srcFormat==PIX_FMT_RGB565)
02740 {
02741 RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02742 src1= formatConvBuffer;
02743 src2= formatConvBuffer+2048;
02744 }
02745 else if (srcFormat==PIX_FMT_RGB555)
02746 {
02747 RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02748 src1= formatConvBuffer;
02749 src2= formatConvBuffer+2048;
02750 }
02751 else if (isGray(srcFormat))
02752 {
02753 return;
02754 }
02755 else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
02756 {
02757 RENAME(palToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW, pal);
02758 src1= formatConvBuffer;
02759 src2= formatConvBuffer+2048;
02760 }
02761
02762 #ifdef HAVE_MMX
02763
02764 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
02765 #else
02766 if (!(flags&SWS_FAST_BILINEAR))
02767 #endif
02768 {
02769 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02770 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02771 }
02772 else
02773 {
02774 #if defined(ARCH_X86)
02775 #ifdef HAVE_MMX2
02776 int i;
02777 #if defined(PIC)
02778 uint64_t ebxsave __attribute__((aligned(8)));
02779 #endif
02780 if (canMMX2BeUsed)
02781 {
02782 asm volatile(
02783 #if defined(PIC)
02784 "mov %%"REG_b", %6 \n\t"
02785 #endif
02786 "pxor %%mm7, %%mm7 \n\t"
02787 "mov %0, %%"REG_c" \n\t"
02788 "mov %1, %%"REG_D" \n\t"
02789 "mov %2, %%"REG_d" \n\t"
02790 "mov %3, %%"REG_b" \n\t"
02791 "xor %%"REG_a", %%"REG_a" \n\t"
02792 PREFETCH" (%%"REG_c") \n\t"
02793 PREFETCH" 32(%%"REG_c") \n\t"
02794 PREFETCH" 64(%%"REG_c") \n\t"
02795
02796 #ifdef ARCH_X86_64
02797
02798 #define FUNNY_UV_CODE \
02799 "movl (%%"REG_b"), %%esi \n\t"\
02800 "call *%4 \n\t"\
02801 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
02802 "add %%"REG_S", %%"REG_c" \n\t"\
02803 "add %%"REG_a", %%"REG_D" \n\t"\
02804 "xor %%"REG_a", %%"REG_a" \n\t"\
02805
02806 #else
02807
02808 #define FUNNY_UV_CODE \
02809 "movl (%%"REG_b"), %%esi \n\t"\
02810 "call *%4 \n\t"\
02811 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
02812 "add %%"REG_a", %%"REG_D" \n\t"\
02813 "xor %%"REG_a", %%"REG_a" \n\t"\
02814
02815 #endif
02816
02817 FUNNY_UV_CODE
02818 FUNNY_UV_CODE
02819 FUNNY_UV_CODE
02820 FUNNY_UV_CODE
02821 "xor %%"REG_a", %%"REG_a" \n\t"
02822 "mov %5, %%"REG_c" \n\t"
02823 "mov %1, %%"REG_D" \n\t"
02824 "add $4096, %%"REG_D" \n\t"
02825 PREFETCH" (%%"REG_c") \n\t"
02826 PREFETCH" 32(%%"REG_c") \n\t"
02827 PREFETCH" 64(%%"REG_c") \n\t"
02828
02829 FUNNY_UV_CODE
02830 FUNNY_UV_CODE
02831 FUNNY_UV_CODE
02832 FUNNY_UV_CODE
02833
02834 #if defined(PIC)
02835 "mov %6, %%"REG_b" \n\t"
02836 #endif
02837 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
02838 "m" (funnyUVCode), "m" (src2)
02839 #if defined(PIC)
02840 ,"m" (ebxsave)
02841 #endif
02842 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02843 #if !defined(PIC)
02844 ,"%"REG_b
02845 #endif
02846 );
02847 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02848 {
02849
02850 dst[i] = src1[srcW-1]*128;
02851 dst[i+2048] = src2[srcW-1]*128;
02852 }
02853 }
02854 else
02855 {
02856 #endif
02857 long xInc_shr16 = (long) (xInc >> 16);
02858 uint16_t xInc_mask = xInc & 0xffff;
02859 asm volatile(
02860 "xor %%"REG_a", %%"REG_a" \n\t"
02861 "xor %%"REG_d", %%"REG_d" \n\t"
02862 "xorl %%ecx, %%ecx \n\t"
02863 ASMALIGN(4)
02864 "1: \n\t"
02865 "mov %0, %%"REG_S" \n\t"
02866 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t"
02867 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t"
02868 "subl %%edi, %%esi \n\t"
02869 "imull %%ecx, %%esi \n\t"
02870 "shll $16, %%edi \n\t"
02871 "addl %%edi, %%esi \n\t"
02872 "mov %1, %%"REG_D" \n\t"
02873 "shrl $9, %%esi \n\t"
02874 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
02875
02876 "movzbl (%5, %%"REG_d"), %%edi \n\t"
02877 "movzbl 1(%5, %%"REG_d"), %%esi \n\t"
02878 "subl %%edi, %%esi \n\t"
02879 "imull %%ecx, %%esi \n\t"
02880 "shll $16, %%edi \n\t"
02881 "addl %%edi, %%esi \n\t"
02882 "mov %1, %%"REG_D" \n\t"
02883 "shrl $9, %%esi \n\t"
02884 "movw %%si, 4096(%%"REG_D", %%"REG_a", 2) \n\t"
02885
02886 "addw %4, %%cx \n\t"
02887 "adc %3, %%"REG_d" \n\t"
02888 "add $1, %%"REG_a" \n\t"
02889 "cmp %2, %%"REG_a" \n\t"
02890 " jb 1b \n\t"
02891
02892
02893
02894 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
02895 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
02896 #else
02897 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
02898 #endif
02899 "r" (src2)
02900 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
02901 );
02902 #ifdef HAVE_MMX2
02903 }
02904 #endif
02905 #else
02906 int i;
02907 unsigned int xpos=0;
02908 for (i=0;i<dstWidth;i++)
02909 {
02910 register unsigned int xx=xpos>>16;
02911 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02912 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02913 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02914
02915
02916
02917
02918 xpos+=xInc;
02919 }
02920 #endif
02921 }
02922 }
02923
02924 static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
02925 int srcSliceH, uint8_t* dst[], int dstStride[]){
02926
02927
02928 const int srcW= c->srcW;
02929 const int dstW= c->dstW;
02930 const int dstH= c->dstH;
02931 const int chrDstW= c->chrDstW;
02932 const int chrSrcW= c->chrSrcW;
02933 const int lumXInc= c->lumXInc;
02934 const int chrXInc= c->chrXInc;
02935 const int dstFormat= c->dstFormat;
02936 const int srcFormat= c->srcFormat;
02937 const int flags= c->flags;
02938 const int canMMX2BeUsed= c->canMMX2BeUsed;
02939 int16_t *vLumFilterPos= c->vLumFilterPos;
02940 int16_t *vChrFilterPos= c->vChrFilterPos;
02941 int16_t *hLumFilterPos= c->hLumFilterPos;
02942 int16_t *hChrFilterPos= c->hChrFilterPos;
02943 int16_t *vLumFilter= c->vLumFilter;
02944 int16_t *vChrFilter= c->vChrFilter;
02945 int16_t *hLumFilter= c->hLumFilter;
02946 int16_t *hChrFilter= c->hChrFilter;
02947 int32_t *lumMmxFilter= c->lumMmxFilter;
02948 int32_t *chrMmxFilter= c->chrMmxFilter;
02949 const int vLumFilterSize= c->vLumFilterSize;
02950 const int vChrFilterSize= c->vChrFilterSize;
02951 const int hLumFilterSize= c->hLumFilterSize;
02952 const int hChrFilterSize= c->hChrFilterSize;
02953 int16_t **lumPixBuf= c->lumPixBuf;
02954 int16_t **chrPixBuf= c->chrPixBuf;
02955 const int vLumBufSize= c->vLumBufSize;
02956 const int vChrBufSize= c->vChrBufSize;
02957 uint8_t *funnyYCode= c->funnyYCode;
02958 uint8_t *funnyUVCode= c->funnyUVCode;
02959 uint8_t *formatConvBuffer= c->formatConvBuffer;
02960 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02961 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02962 int lastDstY;
02963 uint8_t *pal=NULL;
02964
02965
02966 int dstY= c->dstY;
02967 int lumBufIndex= c->lumBufIndex;
02968 int chrBufIndex= c->chrBufIndex;
02969 int lastInLumBuf= c->lastInLumBuf;
02970 int lastInChrBuf= c->lastInChrBuf;
02971
02972 if (isPacked(c->srcFormat)){
02973 pal= src[1];
02974 src[0]=
02975 src[1]=
02976 src[2]= src[0];
02977 srcStride[0]=
02978 srcStride[1]=
02979 srcStride[2]= srcStride[0];
02980 }
02981 srcStride[1]<<= c->vChrDrop;
02982 srcStride[2]<<= c->vChrDrop;
02983
02984
02985
02986
02987 #if 0 //self test FIXME move to a vfilter or something
02988 {
02989 static volatile int i=0;
02990 i++;
02991 if (srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
02992 selfTest(src, srcStride, c->srcW, c->srcH);
02993 i--;
02994 }
02995 #endif
02996
02997
02998
02999
03000 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
03001 {
03002 static int firstTime=1;
03003 if (flags & SWS_PRINT_INFO && firstTime)
03004 {
03005 av_log(c, AV_LOG_WARNING, "SwScaler: Warning: dstStride is not aligned!\n"
03006 "SwScaler: ->cannot do aligned memory acesses anymore\n");
03007 firstTime=0;
03008 }
03009 }
03010
03011
03012
03013 if (srcSliceY ==0){
03014 lumBufIndex=0;
03015 chrBufIndex=0;
03016 dstY=0;
03017 lastInLumBuf= -1;
03018 lastInChrBuf= -1;
03019 }
03020
03021 lastDstY= dstY;
03022
03023 for (;dstY < dstH; dstY++){
03024 unsigned char *dest =dst[0]+dstStride[0]*dstY;
03025 const int chrDstY= dstY>>c->chrDstVSubSample;
03026 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
03027 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
03028
03029 const int firstLumSrcY= vLumFilterPos[dstY];
03030 const int firstChrSrcY= vChrFilterPos[chrDstY];
03031 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1;
03032 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1;
03033
03034
03035
03036
03037 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
03038 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
03039
03040 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
03041 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
03042
03043
03044 if (lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
03045 {
03046
03047 while(lastInLumBuf < lastLumSrcY)
03048 {
03049 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
03050 lumBufIndex++;
03051
03052 ASSERT(lumBufIndex < 2*vLumBufSize)
03053 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
03054 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
03055
03056 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
03057 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
03058 funnyYCode, c->srcFormat, formatConvBuffer,
03059 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
03060 lastInLumBuf++;
03061 }
03062 while(lastInChrBuf < lastChrSrcY)
03063 {
03064 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
03065 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
03066 chrBufIndex++;
03067 ASSERT(chrBufIndex < 2*vChrBufSize)
03068 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
03069 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
03070
03071
03072 if (!(isGray(srcFormat) || isGray(dstFormat)))
03073 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
03074 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
03075 funnyUVCode, c->srcFormat, formatConvBuffer,
03076 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
03077 lastInChrBuf++;
03078 }
03079
03080 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
03081 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
03082 }
03083 else
03084 {
03085
03086
03087
03088
03089
03090
03091 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
03092 {
03093 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
03094 lumBufIndex++;
03095 ASSERT(lumBufIndex < 2*vLumBufSize)
03096 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
03097 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
03098 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
03099 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
03100 funnyYCode, c->srcFormat, formatConvBuffer,
03101 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
03102 lastInLumBuf++;
03103 }
03104 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
03105 {
03106 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
03107 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
03108 chrBufIndex++;
03109 ASSERT(chrBufIndex < 2*vChrBufSize)
03110 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
03111 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
03112
03113 if (!(isGray(srcFormat) || isGray(dstFormat)))
03114 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
03115 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
03116 funnyUVCode, c->srcFormat, formatConvBuffer,
03117 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
03118 lastInChrBuf++;
03119 }
03120
03121 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
03122 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
03123 break;
03124 }
03125
03126 #ifdef HAVE_MMX
03127 b5Dither= dither8[dstY&1];
03128 g6Dither= dither4[dstY&1];
03129 g5Dither= dither8[dstY&1];
03130 r5Dither= dither8[(dstY+1)&1];
03131 #endif
03132 if (dstY < dstH-2)
03133 {
03134 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
03135 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
03136 #ifdef HAVE_MMX
03137 int i;
03138 if (flags & SWS_ACCURATE_RND){
03139 for (i=0; i<vLumFilterSize; i+=2){
03140 lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ];
03141 lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)];
03142 lumMmxFilter[2*i+2]=
03143 lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ]
03144 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
03145 }
03146 for (i=0; i<vChrFilterSize; i+=2){
03147 chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ];
03148 chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)];
03149 chrMmxFilter[2*i+2]=
03150 chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ]
03151 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
03152 }
03153 }else{
03154 for (i=0; i<vLumFilterSize; i++)
03155 {
03156 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
03157 lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
03158 lumMmxFilter[4*i+2]=
03159 lumMmxFilter[4*i+3]=
03160 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
03161 }
03162 for (i=0; i<vChrFilterSize; i++)
03163 {
03164 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
03165 chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
03166 chrMmxFilter[4*i+2]=
03167 chrMmxFilter[4*i+3]=
03168 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
03169 }
03170 }
03171 #endif
03172 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
03173 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
03174 if (dstY&chrSkipMask) uDest= NULL;
03175 RENAME(yuv2nv12X)(c,
03176 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
03177 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03178 dest, uDest, dstW, chrDstW, dstFormat);
03179 }
03180 else if (isPlanarYUV(dstFormat) || isGray(dstFormat))
03181 {
03182 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
03183 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
03184 if (vLumFilterSize == 1 && vChrFilterSize == 1)
03185 {
03186 int16_t *lumBuf = lumPixBuf[0];
03187 int16_t *chrBuf= chrPixBuf[0];
03188 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
03189 }
03190 else
03191 {
03192 RENAME(yuv2yuvX)(c,
03193 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
03194 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03195 dest, uDest, vDest, dstW, chrDstW);
03196 }
03197 }
03198 else
03199 {
03200 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
03201 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
03202 if (vLumFilterSize == 1 && vChrFilterSize == 2)
03203 {
03204 int chrAlpha= vChrFilter[2*dstY+1];
03205 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
03206 dest, dstW, chrAlpha, dstFormat, flags, dstY);
03207 }
03208 else if (vLumFilterSize == 2 && vChrFilterSize == 2)
03209 {
03210 int lumAlpha= vLumFilter[2*dstY+1];
03211 int chrAlpha= vChrFilter[2*dstY+1];
03212 lumMmxFilter[2]=
03213 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
03214 chrMmxFilter[2]=
03215 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
03216 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
03217 dest, dstW, lumAlpha, chrAlpha, dstY);
03218 }
03219 else
03220 {
03221 RENAME(yuv2packedX)(c,
03222 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
03223 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03224 dest, dstW, dstY);
03225 }
03226 }
03227 }
03228 else
03229 {
03230 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
03231 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
03232 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
03233 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
03234 if (dstY&chrSkipMask) uDest= NULL;
03235 yuv2nv12XinC(
03236 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
03237 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03238 dest, uDest, dstW, chrDstW, dstFormat);
03239 }
03240 else if (isPlanarYUV(dstFormat) || isGray(dstFormat))
03241 {
03242 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
03243 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
03244 yuv2yuvXinC(
03245 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
03246 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03247 dest, uDest, vDest, dstW, chrDstW);
03248 }
03249 else
03250 {
03251 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
03252 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
03253 yuv2packedXinC(c,
03254 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
03255 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03256 dest, dstW, dstY);
03257 }
03258 }
03259 }
03260
03261 #ifdef HAVE_MMX
03262 __asm __volatile(SFENCE:::"memory");
03263 __asm __volatile(EMMS:::"memory");
03264 #endif
03265
03266 c->dstY= dstY;
03267 c->lumBufIndex= lumBufIndex;
03268 c->chrBufIndex= chrBufIndex;
03269 c->lastInLumBuf= lastInLumBuf;
03270 c->lastInChrBuf= lastInChrBuf;
03271
03272 return dstY - lastDstY;
03273 }