00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #undef MOVNTQ
00030 #undef EMMS
00031 #undef SFENCE
00032
00033 #ifdef HAVE_3DNOW
00034
00035 #define EMMS "femms"
00036 #else
00037 #define EMMS "emms"
00038 #endif
00039
00040 #ifdef HAVE_MMX2
00041 #define MOVNTQ "movntq"
00042 #define SFENCE "sfence"
00043 #else
00044 #define MOVNTQ "movq"
00045 #define SFENCE "/nop"
00046 #endif
00047
00048 #define YUV2RGB \
00049
00050
00051
00052
00053 \
00054 \
00055 "punpcklbw %%mm4, %%mm0;" \
00056 "punpcklbw %%mm4, %%mm1;" \
00057 \
00058 "psllw $3, %%mm0;" \
00059 "psllw $3, %%mm1;" \
00060 \
00061 "psubsw "U_OFFSET"(%4), %%mm0;" \
00062 "psubsw "V_OFFSET"(%4), %%mm1;" \
00063 \
00064 "movq %%mm0, %%mm2;" \
00065 "movq %%mm1, %%mm3;" \
00066 \
00067 "pmulhw "UG_COEFF"(%4), %%mm2;" \
00068 "pmulhw "VG_COEFF"(%4), %%mm3;" \
00069 \
00070 "pmulhw "UB_COEFF"(%4), %%mm0;" \
00071 "pmulhw "VR_COEFF"(%4), %%mm1;" \
00072 \
00073 "paddsw %%mm3, %%mm2;" \
00074 \
00075 \
00076 "movq %%mm6, %%mm7;" \
00077 "pand "MANGLE(mmx_00ffw)", %%mm6;" \
00078 \
00079 "psrlw $8, %%mm7;" \
00080 \
00081 "psllw $3, %%mm6;" \
00082 "psllw $3, %%mm7;" \
00083 \
00084 "psubw "Y_OFFSET"(%4), %%mm6;" \
00085 "psubw "Y_OFFSET"(%4), %%mm7;" \
00086 \
00087 "pmulhw "Y_COEFF"(%4), %%mm6;" \
00088 "pmulhw "Y_COEFF"(%4), %%mm7;" \
00089 \
00090
00091
00092
00093
00094 \
00095 "movq %%mm0, %%mm3;" \
00096 "movq %%mm1, %%mm4;" \
00097 "movq %%mm2, %%mm5;" \
00098 \
00099 "paddsw %%mm6, %%mm0;" \
00100 "paddsw %%mm7, %%mm3;" \
00101 \
00102 "paddsw %%mm6, %%mm1;" \
00103 "paddsw %%mm7, %%mm4;" \
00104 \
00105 "paddsw %%mm6, %%mm2;" \
00106 "paddsw %%mm7, %%mm5;" \
00107 \
00108 \
00109 "packuswb %%mm0, %%mm0;" \
00110 "packuswb %%mm1, %%mm1;" \
00111 "packuswb %%mm2, %%mm2;" \
00112 \
00113 \
00114 "packuswb %%mm3, %%mm3;" \
00115 "packuswb %%mm4, %%mm4;" \
00116 "packuswb %%mm5, %%mm5;" \
00117 \
00118 \
00119 "punpcklbw %%mm3, %%mm0;" \
00120 "punpcklbw %%mm4, %%mm1;" \
00121 "punpcklbw %%mm5, %%mm2;" \
00122
00123
00124 static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00125 int srcSliceH, uint8_t* dst[], int dstStride[]){
00126 int y, h_size;
00127
00128 if(c->srcFormat == PIX_FMT_YUV422P){
00129 srcStride[1] *= 2;
00130 srcStride[2] *= 2;
00131 }
00132
00133 h_size= (c->dstW+7)&~7;
00134 if(h_size*2 > FFABS(dstStride[0])) h_size-=8;
00135
00136 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00137
00138
00139 for (y= 0; y<srcSliceH; y++ ) {
00140 uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
00141 uint8_t *py = src[0] + y*srcStride[0];
00142 uint8_t *pu = src[1] + (y>>1)*srcStride[1];
00143 uint8_t *pv = src[2] + (y>>1)*srcStride[2];
00144 long index= -h_size/2;
00145
00146 b5Dither= dither8[y&1];
00147 g6Dither= dither4[y&1];
00148 g5Dither= dither8[y&1];
00149 r5Dither= dither8[(y+1)&1];
00150
00151
00152 __asm__ __volatile__ (
00153
00154 "movd (%2, %0), %%mm0;"
00155 "movd (%3, %0), %%mm1;"
00156 "movq (%5, %0, 2), %%mm6;"
00157
00158 "1: \n\t"
00159
00160
00161
00162
00163
00164
00165 YUV2RGB
00166
00167 #ifdef DITHER1XBPP
00168 "paddusb "MANGLE(b5Dither)", %%mm0;"
00169 "paddusb "MANGLE(g6Dither)", %%mm2;"
00170 "paddusb "MANGLE(r5Dither)", %%mm1;"
00171 #endif
00172
00173 "pand "MANGLE(mmx_redmask)", %%mm0;"
00174 "pand "MANGLE(mmx_grnmask)", %%mm2;"
00175 "pand "MANGLE(mmx_redmask)", %%mm1;"
00176
00177 "psrlw $3, %%mm0;"
00178 "pxor %%mm4, %%mm4;"
00179
00180 "movq %%mm0, %%mm5;"
00181 "movq %%mm2, %%mm7;"
00182
00183
00184 "punpcklbw %%mm4, %%mm2;"
00185 "punpcklbw %%mm1, %%mm0;"
00186
00187 "psllw $3, %%mm2;"
00188 "por %%mm2, %%mm0;"
00189
00190 "movq 8 (%5, %0, 2), %%mm6;"
00191 MOVNTQ " %%mm0, (%1);"
00192
00193
00194 "punpckhbw %%mm4, %%mm7;"
00195 "punpckhbw %%mm1, %%mm5;"
00196
00197 "psllw $3, %%mm7;"
00198 "movd 4 (%2, %0), %%mm0;"
00199
00200 "por %%mm7, %%mm5;"
00201 "movd 4 (%3, %0), %%mm1;"
00202
00203 MOVNTQ " %%mm5, 8 (%1);"
00204
00205 "add $16, %1 \n\t"
00206 "add $4, %0 \n\t"
00207 " js 1b \n\t"
00208
00209 : "+r" (index), "+r" (image)
00210 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
00211 );
00212 }
00213
00214 __asm__ __volatile__ (EMMS);
00215
00216 return srcSliceH;
00217 }
00218
00219 static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00220 int srcSliceH, uint8_t* dst[], int dstStride[]){
00221 int y, h_size;
00222
00223 if(c->srcFormat == PIX_FMT_YUV422P){
00224 srcStride[1] *= 2;
00225 srcStride[2] *= 2;
00226 }
00227
00228 h_size= (c->dstW+7)&~7;
00229 if(h_size*2 > FFABS(dstStride[0])) h_size-=8;
00230
00231 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00232
00233
00234 for (y= 0; y<srcSliceH; y++ ) {
00235 uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
00236 uint8_t *py = src[0] + y*srcStride[0];
00237 uint8_t *pu = src[1] + (y>>1)*srcStride[1];
00238 uint8_t *pv = src[2] + (y>>1)*srcStride[2];
00239 long index= -h_size/2;
00240
00241 b5Dither= dither8[y&1];
00242 g6Dither= dither4[y&1];
00243 g5Dither= dither8[y&1];
00244 r5Dither= dither8[(y+1)&1];
00245
00246
00247 __asm__ __volatile__ (
00248
00249 "movd (%2, %0), %%mm0;"
00250 "movd (%3, %0), %%mm1;"
00251 "movq (%5, %0, 2), %%mm6;"
00252
00253 "1: \n\t"
00254 YUV2RGB
00255
00256 #ifdef DITHER1XBPP
00257 "paddusb "MANGLE(b5Dither)", %%mm0 \n\t"
00258 "paddusb "MANGLE(g5Dither)", %%mm2 \n\t"
00259 "paddusb "MANGLE(r5Dither)", %%mm1 \n\t"
00260 #endif
00261
00262
00263 "pand "MANGLE(mmx_redmask)", %%mm0;"
00264 "pand "MANGLE(mmx_redmask)", %%mm2;"
00265 "pand "MANGLE(mmx_redmask)", %%mm1;"
00266
00267 "psrlw $3, %%mm0;"
00268 "psrlw $1, %%mm1;"
00269 "pxor %%mm4, %%mm4;"
00270
00271 "movq %%mm0, %%mm5;"
00272 "movq %%mm2, %%mm7;"
00273
00274
00275 "punpcklbw %%mm4, %%mm2;"
00276 "punpcklbw %%mm1, %%mm0;"
00277
00278 "psllw $2, %%mm2;"
00279 "por %%mm2, %%mm0;"
00280
00281 "movq 8 (%5, %0, 2), %%mm6;"
00282 MOVNTQ " %%mm0, (%1);"
00283
00284
00285 "punpckhbw %%mm4, %%mm7;"
00286 "punpckhbw %%mm1, %%mm5;"
00287
00288 "psllw $2, %%mm7;"
00289 "movd 4 (%2, %0), %%mm0;"
00290
00291 "por %%mm7, %%mm5;"
00292 "movd 4 (%3, %0), %%mm1;"
00293
00294 MOVNTQ " %%mm5, 8 (%1);"
00295
00296 "add $16, %1 \n\t"
00297 "add $4, %0 \n\t"
00298 " js 1b \n\t"
00299 : "+r" (index), "+r" (image)
00300 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
00301 );
00302 }
00303
00304 __asm__ __volatile__ (EMMS);
00305 return srcSliceH;
00306 }
00307
00308 static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00309 int srcSliceH, uint8_t* dst[], int dstStride[]){
00310 int y, h_size;
00311
00312 if(c->srcFormat == PIX_FMT_YUV422P){
00313 srcStride[1] *= 2;
00314 srcStride[2] *= 2;
00315 }
00316
00317 h_size= (c->dstW+7)&~7;
00318 if(h_size*3 > FFABS(dstStride[0])) h_size-=8;
00319
00320 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00321
00322 for (y= 0; y<srcSliceH; y++ ) {
00323 uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
00324 uint8_t *py = src[0] + y*srcStride[0];
00325 uint8_t *pu = src[1] + (y>>1)*srcStride[1];
00326 uint8_t *pv = src[2] + (y>>1)*srcStride[2];
00327 long index= -h_size/2;
00328
00329
00330
00331 __asm__ __volatile__ (
00332
00333 "movd (%2, %0), %%mm0;"
00334 "movd (%3, %0), %%mm1;"
00335 "movq (%5, %0, 2), %%mm6;"
00336
00337 "1: \n\t"
00338 YUV2RGB
00339
00340 #ifdef HAVE_MMX2
00341 "movq "MANGLE(ff_M24A)", %%mm4 \n\t"
00342 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"
00343 "pshufw $0x50, %%mm0, %%mm5 \n\t"
00344 "pshufw $0x50, %%mm2, %%mm3 \n\t"
00345 "pshufw $0x00, %%mm1, %%mm6 \n\t"
00346
00347 "pand %%mm4, %%mm5 \n\t"
00348 "pand %%mm4, %%mm3 \n\t"
00349 "pand %%mm7, %%mm6 \n\t"
00350
00351 "psllq $8, %%mm3 \n\t"
00352 "por %%mm5, %%mm6 \n\t"
00353 "por %%mm3, %%mm6 \n\t"
00354 MOVNTQ" %%mm6, (%1) \n\t"
00355
00356 "psrlq $8, %%mm2 \n\t"
00357 "pshufw $0xA5, %%mm0, %%mm5 \n\t"
00358 "pshufw $0x55, %%mm2, %%mm3 \n\t"
00359 "pshufw $0xA5, %%mm1, %%mm6 \n\t"
00360
00361 "pand "MANGLE(ff_M24B)", %%mm5 \n\t"
00362 "pand %%mm7, %%mm3 \n\t"
00363 "pand %%mm4, %%mm6 \n\t"
00364
00365 "por %%mm5, %%mm3 \n\t"
00366 "por %%mm3, %%mm6 \n\t"
00367 MOVNTQ" %%mm6, 8(%1) \n\t"
00368
00369 "pshufw $0xFF, %%mm0, %%mm5 \n\t"
00370 "pshufw $0xFA, %%mm2, %%mm3 \n\t"
00371 "pshufw $0xFA, %%mm1, %%mm6 \n\t"
00372 "movd 4 (%2, %0), %%mm0;"
00373
00374 "pand %%mm7, %%mm5 \n\t"
00375 "pand %%mm4, %%mm3 \n\t"
00376 "pand "MANGLE(ff_M24B)", %%mm6 \n\t"
00377 "movd 4 (%3, %0), %%mm1;"
00378 \
00379 "por %%mm5, %%mm3 \n\t"
00380 "por %%mm3, %%mm6 \n\t"
00381 MOVNTQ" %%mm6, 16(%1) \n\t"
00382 "movq 8 (%5, %0, 2), %%mm6;"
00383 "pxor %%mm4, %%mm4 \n\t"
00384
00385 #else
00386
00387 "pxor %%mm4, %%mm4 \n\t"
00388 "movq %%mm0, %%mm5 \n\t"
00389 "movq %%mm1, %%mm6 \n\t"
00390 "punpcklbw %%mm2, %%mm0 \n\t"
00391 "punpcklbw %%mm4, %%mm1 \n\t"
00392 "punpckhbw %%mm2, %%mm5 \n\t"
00393 "punpckhbw %%mm4, %%mm6 \n\t"
00394 "movq %%mm0, %%mm7 \n\t"
00395 "movq %%mm5, %%mm3 \n\t"
00396 "punpcklwd %%mm1, %%mm7 \n\t"
00397 "punpckhwd %%mm1, %%mm0 \n\t"
00398 "punpcklwd %%mm6, %%mm5 \n\t"
00399 "punpckhwd %%mm6, %%mm3 \n\t"
00400
00401 "movq %%mm7, %%mm2 \n\t"
00402 "movq %%mm0, %%mm6 \n\t"
00403 "movq %%mm5, %%mm1 \n\t"
00404 "movq %%mm3, %%mm4 \n\t"
00405
00406 "psllq $40, %%mm7 \n\t"
00407 "psllq $40, %%mm0 \n\t"
00408 "psllq $40, %%mm5 \n\t"
00409 "psllq $40, %%mm3 \n\t"
00410
00411 "punpckhdq %%mm2, %%mm7 \n\t"
00412 "punpckhdq %%mm6, %%mm0 \n\t"
00413 "punpckhdq %%mm1, %%mm5 \n\t"
00414 "punpckhdq %%mm4, %%mm3 \n\t"
00415
00416 "psrlq $8, %%mm7 \n\t"
00417 "movq %%mm0, %%mm6 \n\t"
00418 "psllq $40, %%mm0 \n\t"
00419 "por %%mm0, %%mm7 \n\t"
00420 MOVNTQ" %%mm7, (%1) \n\t"
00421
00422 "movd 4 (%2, %0), %%mm0;"
00423
00424 "psrlq $24, %%mm6 \n\t"
00425 "movq %%mm5, %%mm1 \n\t"
00426 "psllq $24, %%mm5 \n\t"
00427 "por %%mm5, %%mm6 \n\t"
00428 MOVNTQ" %%mm6, 8(%1) \n\t"
00429
00430 "movq 8 (%5, %0, 2), %%mm6;"
00431
00432 "psrlq $40, %%mm1 \n\t"
00433 "psllq $8, %%mm3 \n\t"
00434 "por %%mm3, %%mm1 \n\t"
00435 MOVNTQ" %%mm1, 16(%1) \n\t"
00436
00437 "movd 4 (%3, %0), %%mm1;"
00438 "pxor %%mm4, %%mm4 \n\t"
00439 #endif
00440
00441 "add $24, %1 \n\t"
00442 "add $4, %0 \n\t"
00443 " js 1b \n\t"
00444
00445 : "+r" (index), "+r" (image)
00446 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
00447 );
00448 }
00449
00450 __asm__ __volatile__ (EMMS);
00451 return srcSliceH;
00452 }
00453
00454 static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00455 int srcSliceH, uint8_t* dst[], int dstStride[]){
00456 int y, h_size;
00457
00458 if(c->srcFormat == PIX_FMT_YUV422P){
00459 srcStride[1] *= 2;
00460 srcStride[2] *= 2;
00461 }
00462
00463 h_size= (c->dstW+7)&~7;
00464 if(h_size*4 > FFABS(dstStride[0])) h_size-=8;
00465
00466 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00467
00468 for (y= 0; y<srcSliceH; y++ ) {
00469 uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
00470 uint8_t *py = src[0] + y*srcStride[0];
00471 uint8_t *pu = src[1] + (y>>1)*srcStride[1];
00472 uint8_t *pv = src[2] + (y>>1)*srcStride[2];
00473 long index= -h_size/2;
00474
00475
00476
00477 __asm__ __volatile__ (
00478
00479 "movd (%2, %0), %%mm0;"
00480 "movd (%3, %0), %%mm1;"
00481 "movq (%5, %0, 2), %%mm6;"
00482
00483 "1: \n\t"
00484 YUV2RGB
00485
00486
00487
00488
00489 "pxor %%mm3, %%mm3;"
00490
00491 "movq %%mm0, %%mm6;"
00492 "movq %%mm1, %%mm7;"
00493
00494 "movq %%mm0, %%mm4;"
00495 "movq %%mm1, %%mm5;"
00496
00497 "punpcklbw %%mm2, %%mm6;"
00498 "punpcklbw %%mm3, %%mm7;"
00499
00500 "punpcklwd %%mm7, %%mm6;"
00501 MOVNTQ " %%mm6, (%1);"
00502
00503 "movq %%mm0, %%mm6;"
00504 "punpcklbw %%mm2, %%mm6;"
00505
00506 "punpckhwd %%mm7, %%mm6;"
00507 MOVNTQ " %%mm6, 8 (%1);"
00508
00509 "punpckhbw %%mm2, %%mm4;"
00510 "punpckhbw %%mm3, %%mm5;"
00511
00512 "punpcklwd %%mm5, %%mm4;"
00513 MOVNTQ " %%mm4, 16 (%1);"
00514
00515 "movq %%mm0, %%mm4;"
00516 "punpckhbw %%mm2, %%mm4;"
00517
00518 "punpckhwd %%mm5, %%mm4;"
00519 MOVNTQ " %%mm4, 24 (%1);"
00520
00521 "movd 4 (%2, %0), %%mm0;"
00522 "movd 4 (%3, %0), %%mm1;"
00523
00524 "pxor %%mm4, %%mm4;"
00525 "movq 8 (%5, %0, 2), %%mm6;"
00526
00527 "add $32, %1 \n\t"
00528 "add $4, %0 \n\t"
00529 " js 1b \n\t"
00530
00531 : "+r" (index), "+r" (image)
00532 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
00533 );
00534 }
00535
00536 __asm__ __volatile__ (EMMS);
00537 return srcSliceH;
00538 }