00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "dsputil.h"
00022
00023 #include "gcc_fixes.h"
00024
00025 #include "dsputil_altivec.h"
00026
00027 static void vector_fmul_altivec(float *dst, const float *src, int len)
00028 {
00029 int i;
00030 vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
00031 for(i=0; i<len-7; i+=8) {
00032 d0 = vec_ld(0, dst+i);
00033 s = vec_ld(0, src+i);
00034 d1 = vec_ld(16, dst+i);
00035 d0 = vec_madd(d0, s, zero);
00036 d1 = vec_madd(d1, vec_ld(16,src+i), zero);
00037 vec_st(d0, 0, dst+i);
00038 vec_st(d1, 16, dst+i);
00039 }
00040 }
00041
00042 static void vector_fmul_reverse_altivec(float *dst, const float *src0,
00043 const float *src1, int len)
00044 {
00045 int i;
00046 vector float d, s0, s1, h0, l0,
00047 s2, s3, zero = (vector float)vec_splat_u32(0);
00048 src1 += len-4;
00049 for(i=0; i<len-7; i+=8) {
00050 s1 = vec_ld(0, src1-i);
00051 s0 = vec_ld(0, src0+i);
00052 l0 = vec_mergel(s1, s1);
00053 s3 = vec_ld(-16, src1-i);
00054 h0 = vec_mergeh(s1, s1);
00055 s2 = vec_ld(16, src0+i);
00056 s1 = vec_mergeh(vec_mergel(l0,h0),
00057 vec_mergeh(l0,h0));
00058
00059 l0 = vec_mergel(s3, s3);
00060 d = vec_madd(s0, s1, zero);
00061 h0 = vec_mergeh(s3, s3);
00062 vec_st(d, 0, dst+i);
00063 s3 = vec_mergeh(vec_mergel(l0,h0),
00064 vec_mergeh(l0,h0));
00065 d = vec_madd(s2, s3, zero);
00066 vec_st(d, 16, dst+i);
00067 }
00068 }
00069
00070 static void vector_fmul_add_add_altivec(float *dst, const float *src0,
00071 const float *src1, const float *src2,
00072 int src3, int len, int step)
00073 {
00074 int i;
00075 vector float d, s0, s1, s2, t0, t1, edges;
00076 vector unsigned char align = vec_lvsr(0,dst),
00077 mask = vec_lvsl(0, dst);
00078
00079 #if 0 //FIXME: there is still something wrong
00080 if (step == 2) {
00081 int y;
00082 vector float d0, d1, s3, t2;
00083 vector unsigned int sel =
00084 vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
00085 t1 = vec_ld(16, dst);
00086 for (i=0,y=0; i<len-3; i+=4,y+=8) {
00087
00088 s0 = vec_ld(0,src0+i);
00089 s1 = vec_ld(0,src1+i);
00090 s2 = vec_ld(0,src2+i);
00091
00092
00093
00094 t2 = vec_ld(31, dst+y);
00095
00096 d = vec_madd(s0,s1,s2);
00097
00098
00099
00100
00101
00102 d0 = vec_perm(t0, t1, mask);
00103
00104 d0 = vec_sel(vec_mergeh(d, d), d0, sel);
00105
00106 edges = vec_perm(t1, t0, mask);
00107
00108 t0 = vec_perm(edges, d0, align);
00109
00110 t1 = vec_perm(d0, edges, align);
00111
00112 vec_stl(t0, 0, dst+y);
00113
00114 d1 = vec_perm(t1, t2, mask);
00115
00116 d1 = vec_sel(vec_mergel(d, d), d1, sel);
00117
00118 edges = vec_perm(t2, t1, mask);
00119
00120 t1 = vec_perm(edges, d1, align);
00121
00122 t2 = vec_perm(d1, edges, align);
00123
00124 vec_stl(t1, 16, dst+y);
00125
00126 t0 = t1;
00127
00128 vec_stl(t2, 31, dst+y);
00129
00130 t1 = t2;
00131 }
00132 } else
00133 #endif
00134 if (step == 1 && src3 == 0)
00135 for (i=0; i<len-3; i+=4) {
00136 t0 = vec_ld(0, dst+i);
00137 t1 = vec_ld(15, dst+i);
00138 s0 = vec_ld(0, src0+i);
00139 s1 = vec_ld(0, src1+i);
00140 s2 = vec_ld(0, src2+i);
00141 edges = vec_perm(t1 ,t0, mask);
00142 d = vec_madd(s0,s1,s2);
00143 t1 = vec_perm(d, edges, align);
00144 t0 = vec_perm(edges, d, align);
00145 vec_st(t1, 15, dst+i);
00146 vec_st(t0, 0, dst+i);
00147 }
00148 else
00149 ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
00150 }
00151
00152 void float_to_int16_altivec(int16_t *dst, const float *src, int len)
00153 {
00154 int i;
00155 vector float s0, s1;
00156 vector signed int t0, t1;
00157 vector signed short d0, d1, d;
00158 vector unsigned char align;
00159 if(((long)dst)&15)
00160 for(i=0; i<len-7; i+=8) {
00161 s0 = vec_ld(0, src+i);
00162 s1 = vec_ld(16, src+i);
00163 t0 = vec_cts(s0, 0);
00164 d0 = vec_ld(0, dst+i);
00165 t1 = vec_cts(s1, 0);
00166 d1 = vec_ld(15, dst+i);
00167 d = vec_packs(t0,t1);
00168 d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
00169 align = vec_lvsr(0, dst+i);
00170 d0 = vec_perm(d1, d, align);
00171 d1 = vec_perm(d, d1, align);
00172 vec_st(d0, 0, dst+i);
00173 vec_st(d1,15, dst+i);
00174 }
00175 else
00176 for(i=0; i<len-7; i+=8) {
00177 s0 = vec_ld(0, src+i);
00178 s1 = vec_ld(16, src+i);
00179 t0 = vec_cts(s0, 0);
00180 t1 = vec_cts(s1, 0);
00181 d = vec_packs(t0,t1);
00182 vec_st(d, 0, dst+i);
00183 }
00184 }
00185
00186 void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
00187 {
00188 c->vector_fmul = vector_fmul_altivec;
00189 c->vector_fmul_reverse = vector_fmul_reverse_altivec;
00190 c->vector_fmul_add_add = vector_fmul_add_add_altivec;
00191 if(!(avctx->flags & CODEC_FLAG_BITEXACT))
00192 c->float_to_int16 = float_to_int16_altivec;
00193 }