00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "dsputil.h"
00026 #include "dsputil_mmx.h"
00027 #include "common.h"
00028
00029
00030
00031
00032
00033
00034
00035 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
00036 {
00037 asm volatile(
00038 "movq 112(%0), %%mm4 \n\t"
00039 "movq 16(%0), %%mm5 \n\t"
00040 "movq 80(%0), %%mm2 \n\t"
00041 "movq 48(%0), %%mm7 \n\t"
00042 "movq %%mm4, %%mm0 \n\t"
00043 "movq %%mm5, %%mm3 \n\t"
00044 "movq %%mm2, %%mm6 \n\t"
00045 "movq %%mm7, %%mm1 \n\t"
00046
00047 "paddw %%mm4, %%mm4 \n\t"
00048 "paddw %%mm3, %%mm3 \n\t"
00049 "paddw %%mm6, %%mm6 \n\t"
00050 "paddw %%mm1, %%mm1 \n\t"
00051 "paddw %%mm4, %%mm0 \n\t"
00052 "paddw %%mm3, %%mm5 \n\t"
00053 "paddw %%mm6, %%mm2 \n\t"
00054 "paddw %%mm1, %%mm7 \n\t"
00055 "psubw %%mm4, %%mm5 \n\t"
00056 "paddw %%mm6, %%mm7 \n\t"
00057 "psubw %%mm2, %%mm1 \n\t"
00058 "paddw %%mm0, %%mm3 \n\t"
00059
00060 "movq %%mm5, %%mm4 \n\t"
00061 "movq %%mm7, %%mm6 \n\t"
00062 "movq %%mm3, %%mm0 \n\t"
00063 "movq %%mm1, %%mm2 \n\t"
00064 SUMSUB_BA( %%mm7, %%mm5 )
00065 "paddw %%mm3, %%mm7 \n\t"
00066 "paddw %%mm1, %%mm5 \n\t"
00067 "paddw %%mm7, %%mm7 \n\t"
00068 "paddw %%mm5, %%mm5 \n\t"
00069 "paddw %%mm6, %%mm7 \n\t"
00070 "paddw %%mm4, %%mm5 \n\t"
00071
00072 SUMSUB_BA( %%mm1, %%mm3 )
00073 "psubw %%mm1, %%mm4 \n\t"
00074 "movq %%mm4, %%mm1 \n\t"
00075 "psubw %%mm6, %%mm3 \n\t"
00076 "paddw %%mm1, %%mm1 \n\t"
00077 "paddw %%mm3, %%mm3 \n\t"
00078 "psubw %%mm2, %%mm1 \n\t"
00079 "paddw %%mm0, %%mm3 \n\t"
00080
00081 "movq 32(%0), %%mm2 \n\t"
00082 "movq 96(%0), %%mm6 \n\t"
00083 "movq %%mm2, %%mm4 \n\t"
00084 "movq %%mm6, %%mm0 \n\t"
00085 "psllw $2, %%mm4 \n\t"
00086 "psllw $2, %%mm6 \n\t"
00087 "paddw %%mm4, %%mm2 \n\t"
00088 "paddw %%mm6, %%mm0 \n\t"
00089 "paddw %%mm2, %%mm2 \n\t"
00090 "paddw %%mm0, %%mm0 \n\t"
00091 "psubw %%mm0, %%mm4 \n\t"
00092 "paddw %%mm2, %%mm6 \n\t"
00093
00094 "movq (%0), %%mm2 \n\t"
00095 "movq 64(%0), %%mm0 \n\t"
00096 SUMSUB_BA( %%mm0, %%mm2 )
00097 "psllw $3, %%mm0 \n\t"
00098 "psllw $3, %%mm2 \n\t"
00099 "paddw %1, %%mm0 \n\t"
00100 "paddw %1, %%mm2 \n\t"
00101
00102 SUMSUB_BA( %%mm6, %%mm0 )
00103 SUMSUB_BA( %%mm4, %%mm2 )
00104 SUMSUB_BA( %%mm7, %%mm6 )
00105 SUMSUB_BA( %%mm5, %%mm4 )
00106 SUMSUB_BA( %%mm3, %%mm2 )
00107 SUMSUB_BA( %%mm1, %%mm0 )
00108 :: "r"(block), "m"(bias)
00109 );
00110 }
00111
00112 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
00113 {
00114 int i;
00115 DECLARE_ALIGNED_8(int16_t, b2[64]);
00116
00117 for(i=0; i<2; i++){
00118 DECLARE_ALIGNED_8(uint64_t, tmp);
00119
00120 cavs_idct8_1d(block+4*i, ff_pw_4);
00121
00122 asm volatile(
00123 "psraw $3, %%mm7 \n\t"
00124 "psraw $3, %%mm6 \n\t"
00125 "psraw $3, %%mm5 \n\t"
00126 "psraw $3, %%mm4 \n\t"
00127 "psraw $3, %%mm3 \n\t"
00128 "psraw $3, %%mm2 \n\t"
00129 "psraw $3, %%mm1 \n\t"
00130 "psraw $3, %%mm0 \n\t"
00131 "movq %%mm7, %0 \n\t"
00132 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
00133 "movq %%mm0, 8(%1) \n\t"
00134 "movq %%mm6, 24(%1) \n\t"
00135 "movq %%mm7, 40(%1) \n\t"
00136 "movq %%mm4, 56(%1) \n\t"
00137 "movq %0, %%mm7 \n\t"
00138 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
00139 "movq %%mm7, (%1) \n\t"
00140 "movq %%mm1, 16(%1) \n\t"
00141 "movq %%mm0, 32(%1) \n\t"
00142 "movq %%mm3, 48(%1) \n\t"
00143 : "=m"(tmp)
00144 : "r"(b2+32*i)
00145 : "memory"
00146 );
00147 }
00148
00149 for(i=0; i<2; i++){
00150 cavs_idct8_1d(b2+4*i, ff_pw_64);
00151
00152 asm volatile(
00153 "psraw $7, %%mm7 \n\t"
00154 "psraw $7, %%mm6 \n\t"
00155 "psraw $7, %%mm5 \n\t"
00156 "psraw $7, %%mm4 \n\t"
00157 "psraw $7, %%mm3 \n\t"
00158 "psraw $7, %%mm2 \n\t"
00159 "psraw $7, %%mm1 \n\t"
00160 "psraw $7, %%mm0 \n\t"
00161 "movq %%mm7, (%0) \n\t"
00162 "movq %%mm5, 16(%0) \n\t"
00163 "movq %%mm3, 32(%0) \n\t"
00164 "movq %%mm1, 48(%0) \n\t"
00165 "movq %%mm0, 64(%0) \n\t"
00166 "movq %%mm2, 80(%0) \n\t"
00167 "movq %%mm4, 96(%0) \n\t"
00168 "movq %%mm6, 112(%0) \n\t"
00169 :: "r"(b2+4*i)
00170 : "memory"
00171 );
00172 }
00173
00174 add_pixels_clamped_mmx(b2, dst, stride);
00175
00176
00177 asm volatile(
00178 "pxor %%mm7, %%mm7 \n\t"
00179 "movq %%mm7, (%0) \n\t"
00180 "movq %%mm7, 8(%0) \n\t"
00181 "movq %%mm7, 16(%0) \n\t"
00182 "movq %%mm7, 24(%0) \n\t"
00183 "movq %%mm7, 32(%0) \n\t"
00184 "movq %%mm7, 40(%0) \n\t"
00185 "movq %%mm7, 48(%0) \n\t"
00186 "movq %%mm7, 56(%0) \n\t"
00187 "movq %%mm7, 64(%0) \n\t"
00188 "movq %%mm7, 72(%0) \n\t"
00189 "movq %%mm7, 80(%0) \n\t"
00190 "movq %%mm7, 88(%0) \n\t"
00191 "movq %%mm7, 96(%0) \n\t"
00192 "movq %%mm7, 104(%0) \n\t"
00193 "movq %%mm7, 112(%0) \n\t"
00194 "movq %%mm7, 120(%0) \n\t"
00195 :: "r" (block)
00196 );
00197 }
00198
00199
00200
00201
00202
00203
00204
00205
00206 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
00207 "movd (%0), "#F" \n\t"\
00208 "movq "#C", %%mm6 \n\t"\
00209 "pmullw %5, %%mm6 \n\t"\
00210 "movq "#D", %%mm7 \n\t"\
00211 "pmullw %6, %%mm7 \n\t"\
00212 "psllw $3, "#E" \n\t"\
00213 "psubw "#E", %%mm6 \n\t"\
00214 "psraw $3, "#E" \n\t"\
00215 "paddw %%mm7, %%mm6 \n\t"\
00216 "paddw "#E", %%mm6 \n\t"\
00217 "paddw "#B", "#B" \n\t"\
00218 "pxor %%mm7, %%mm7 \n\t"\
00219 "add %2, %0 \n\t"\
00220 "punpcklbw %%mm7, "#F" \n\t"\
00221 "psubw "#B", %%mm6 \n\t"\
00222 "psraw $1, "#B" \n\t"\
00223 "psubw "#A", %%mm6 \n\t"\
00224 "paddw %4, %%mm6 \n\t"\
00225 "psraw $7, %%mm6 \n\t"\
00226 "packuswb %%mm6, %%mm6 \n\t"\
00227 OP(%%mm6, (%1), A, d) \
00228 "add %3, %1 \n\t"
00229
00230
00231 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
00232 "movd (%0), "#F" \n\t"\
00233 "movq "#C", %%mm6 \n\t"\
00234 "paddw "#D", %%mm6 \n\t"\
00235 "pmullw %5, %%mm6 \n\t"\
00236 "add %2, %0 \n\t"\
00237 "punpcklbw %%mm7, "#F" \n\t"\
00238 "psubw "#B", %%mm6 \n\t"\
00239 "psubw "#E", %%mm6 \n\t"\
00240 "paddw %4, %%mm6 \n\t"\
00241 "psraw $3, %%mm6 \n\t"\
00242 "packuswb %%mm6, %%mm6 \n\t"\
00243 OP(%%mm6, (%1), A, d) \
00244 "add %3, %1 \n\t"
00245
00246
00247 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
00248 "movd (%0), "#F" \n\t"\
00249 "movq "#C", %%mm6 \n\t"\
00250 "pmullw %6, %%mm6 \n\t"\
00251 "movq "#D", %%mm7 \n\t"\
00252 "pmullw %5, %%mm7 \n\t"\
00253 "psllw $3, "#B" \n\t"\
00254 "psubw "#B", %%mm6 \n\t"\
00255 "psraw $3, "#B" \n\t"\
00256 "paddw %%mm7, %%mm6 \n\t"\
00257 "paddw "#B", %%mm6 \n\t"\
00258 "paddw "#E", "#E" \n\t"\
00259 "pxor %%mm7, %%mm7 \n\t"\
00260 "add %2, %0 \n\t"\
00261 "punpcklbw %%mm7, "#F" \n\t"\
00262 "psubw "#E", %%mm6 \n\t"\
00263 "psraw $1, "#E" \n\t"\
00264 "psubw "#F", %%mm6 \n\t"\
00265 "paddw %4, %%mm6 \n\t"\
00266 "psraw $7, %%mm6 \n\t"\
00267 "packuswb %%mm6, %%mm6 \n\t"\
00268 OP(%%mm6, (%1), A, d) \
00269 "add %3, %1 \n\t"
00270
00271
00272 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
00273 int w= 2;\
00274 src -= 2*srcStride;\
00275 \
00276 while(w--){\
00277 asm volatile(\
00278 "pxor %%mm7, %%mm7 \n\t"\
00279 "movd (%0), %%mm0 \n\t"\
00280 "add %2, %0 \n\t"\
00281 "movd (%0), %%mm1 \n\t"\
00282 "add %2, %0 \n\t"\
00283 "movd (%0), %%mm2 \n\t"\
00284 "add %2, %0 \n\t"\
00285 "movd (%0), %%mm3 \n\t"\
00286 "add %2, %0 \n\t"\
00287 "movd (%0), %%mm4 \n\t"\
00288 "add %2, %0 \n\t"\
00289 "punpcklbw %%mm7, %%mm0 \n\t"\
00290 "punpcklbw %%mm7, %%mm1 \n\t"\
00291 "punpcklbw %%mm7, %%mm2 \n\t"\
00292 "punpcklbw %%mm7, %%mm3 \n\t"\
00293 "punpcklbw %%mm7, %%mm4 \n\t"\
00294 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
00295 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
00296 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
00297 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
00298 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
00299 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
00300 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
00301 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
00302 \
00303 : "+a"(src), "+c"(dst)\
00304 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
00305 : "memory"\
00306 );\
00307 if(h==16){\
00308 asm volatile(\
00309 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
00310 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
00311 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
00312 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
00313 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
00314 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
00315 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
00316 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
00317 \
00318 : "+a"(src), "+c"(dst)\
00319 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
00320 : "memory"\
00321 );\
00322 }\
00323 src += 4-(h+5)*srcStride;\
00324 dst += 4-h*dstStride;\
00325 }
00326
00327 #define QPEL_CAVS(OPNAME, OP, MMX)\
00328 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00329 int h=8;\
00330 asm volatile(\
00331 "pxor %%mm7, %%mm7 \n\t"\
00332 "movq %5, %%mm6 \n\t"\
00333 "1: \n\t"\
00334 "movq (%0), %%mm0 \n\t"\
00335 "movq 1(%0), %%mm2 \n\t"\
00336 "movq %%mm0, %%mm1 \n\t"\
00337 "movq %%mm2, %%mm3 \n\t"\
00338 "punpcklbw %%mm7, %%mm0 \n\t"\
00339 "punpckhbw %%mm7, %%mm1 \n\t"\
00340 "punpcklbw %%mm7, %%mm2 \n\t"\
00341 "punpckhbw %%mm7, %%mm3 \n\t"\
00342 "paddw %%mm2, %%mm0 \n\t"\
00343 "paddw %%mm3, %%mm1 \n\t"\
00344 "pmullw %%mm6, %%mm0 \n\t"\
00345 "pmullw %%mm6, %%mm1 \n\t"\
00346 "movq -1(%0), %%mm2 \n\t"\
00347 "movq 2(%0), %%mm4 \n\t"\
00348 "movq %%mm2, %%mm3 \n\t"\
00349 "movq %%mm4, %%mm5 \n\t"\
00350 "punpcklbw %%mm7, %%mm2 \n\t"\
00351 "punpckhbw %%mm7, %%mm3 \n\t"\
00352 "punpcklbw %%mm7, %%mm4 \n\t"\
00353 "punpckhbw %%mm7, %%mm5 \n\t"\
00354 "paddw %%mm4, %%mm2 \n\t"\
00355 "paddw %%mm3, %%mm5 \n\t"\
00356 "psubw %%mm2, %%mm0 \n\t"\
00357 "psubw %%mm5, %%mm1 \n\t"\
00358 "movq %6, %%mm5 \n\t"\
00359 "paddw %%mm5, %%mm0 \n\t"\
00360 "paddw %%mm5, %%mm1 \n\t"\
00361 "psraw $3, %%mm0 \n\t"\
00362 "psraw $3, %%mm1 \n\t"\
00363 "packuswb %%mm1, %%mm0 \n\t"\
00364 OP(%%mm0, (%1),%%mm5, q) \
00365 "add %3, %0 \n\t"\
00366 "add %4, %1 \n\t"\
00367 "decl %2 \n\t"\
00368 " jnz 1b \n\t"\
00369 : "+a"(src), "+c"(dst), "+m"(h)\
00370 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
00371 : "memory"\
00372 );\
00373 }\
00374 \
00375 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00376 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
00377 }\
00378 \
00379 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00380 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
00381 }\
00382 \
00383 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00384 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
00385 }\
00386 \
00387 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00388 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
00389 }\
00390 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00391 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
00392 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00393 }\
00394 \
00395 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00396 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
00397 }\
00398 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00399 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
00400 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00401 }\
00402 \
00403 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00404 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
00405 }\
00406 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00407 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
00408 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00409 }\
00410 \
00411 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00412 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
00413 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00414 src += 8*srcStride;\
00415 dst += 8*dstStride;\
00416 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
00417 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00418 }\
00419
00420 #define CAVS_MC(OPNAME, SIZE, MMX) \
00421 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00422 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
00423 }\
00424 \
00425 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00426 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
00427 }\
00428 \
00429 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00430 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
00431 }\
00432 \
00433 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00434 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
00435 }\
00436
00437 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
00438 #define AVG_3DNOW_OP(a,b,temp, size) \
00439 "mov" #size " " #b ", " #temp " \n\t"\
00440 "pavgusb " #temp ", " #a " \n\t"\
00441 "mov" #size " " #a ", " #b " \n\t"
00442 #define AVG_MMX2_OP(a,b,temp, size) \
00443 "mov" #size " " #b ", " #temp " \n\t"\
00444 "pavgb " #temp ", " #a " \n\t"\
00445 "mov" #size " " #a ", " #b " \n\t"
00446
00447 QPEL_CAVS(put_, PUT_OP, 3dnow)
00448 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
00449 QPEL_CAVS(put_, PUT_OP, mmx2)
00450 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
00451
00452 CAVS_MC(put_, 8, 3dnow)
00453 CAVS_MC(put_, 16,3dnow)
00454 CAVS_MC(avg_, 8, 3dnow)
00455 CAVS_MC(avg_, 16,3dnow)
00456 CAVS_MC(put_, 8, mmx2)
00457 CAVS_MC(put_, 16,mmx2)
00458 CAVS_MC(avg_, 8, mmx2)
00459 CAVS_MC(avg_, 16,mmx2)
00460
00461 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00462 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00463 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00464 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00465
00466 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
00467 #define dspfunc(PFX, IDX, NUM) \
00468 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
00469 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
00470 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
00471 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
00472 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
00473
00474 dspfunc(put_cavs_qpel, 0, 16);
00475 dspfunc(put_cavs_qpel, 1, 8);
00476 dspfunc(avg_cavs_qpel, 0, 16);
00477 dspfunc(avg_cavs_qpel, 1, 8);
00478 #undef dspfunc
00479 c->cavs_idct8_add = cavs_idct8_add_mmx;
00480 }
00481
00482 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
00483 #define dspfunc(PFX, IDX, NUM) \
00484 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
00485 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
00486 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
00487 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
00488 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
00489
00490 dspfunc(put_cavs_qpel, 0, 16);
00491 dspfunc(put_cavs_qpel, 1, 8);
00492 dspfunc(avg_cavs_qpel, 0, 16);
00493 dspfunc(avg_cavs_qpel, 1, 8);
00494 #undef dspfunc
00495 c->cavs_idct8_add = cavs_idct8_add_mmx;
00496 }