00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #ifndef FFMPEG_VIS_H
00044 #define FFMPEG_VIS_H
00045
00046 #define vis_opc_base ((0x1 << 31) | (0x36 << 19))
00047 #define vis_opf(X) ((X) << 5)
00048 #define vis_sreg(X) (X)
00049 #define vis_dreg(X) (((X)&0x1f)|((X)>>5))
00050 #define vis_rs1_s(X) (vis_sreg(X) << 14)
00051 #define vis_rs1_d(X) (vis_dreg(X) << 14)
00052 #define vis_rs2_s(X) (vis_sreg(X) << 0)
00053 #define vis_rs2_d(X) (vis_dreg(X) << 0)
00054 #define vis_rd_s(X) (vis_sreg(X) << 25)
00055 #define vis_rd_d(X) (vis_dreg(X) << 25)
00056
00057 #define vis_ss2s(opf,rs1,rs2,rd) \
00058 __asm__ __volatile__ (".word %0" \
00059 : : "i" (vis_opc_base | vis_opf(opf) | \
00060 vis_rs1_s(rs1) | \
00061 vis_rs2_s(rs2) | \
00062 vis_rd_s(rd)))
00063
00064 #define vis_dd2d(opf,rs1,rs2,rd) \
00065 __asm__ __volatile__ (".word %0" \
00066 : : "i" (vis_opc_base | vis_opf(opf) | \
00067 vis_rs1_d(rs1) | \
00068 vis_rs2_d(rs2) | \
00069 vis_rd_d(rd)))
00070
00071 #define vis_ss2d(opf,rs1,rs2,rd) \
00072 __asm__ __volatile__ (".word %0" \
00073 : : "i" (vis_opc_base | vis_opf(opf) | \
00074 vis_rs1_s(rs1) | \
00075 vis_rs2_s(rs2) | \
00076 vis_rd_d(rd)))
00077
00078 #define vis_sd2d(opf,rs1,rs2,rd) \
00079 __asm__ __volatile__ (".word %0" \
00080 : : "i" (vis_opc_base | vis_opf(opf) | \
00081 vis_rs1_s(rs1) | \
00082 vis_rs2_d(rs2) | \
00083 vis_rd_d(rd)))
00084
00085 #define vis_d2s(opf,rs2,rd) \
00086 __asm__ __volatile__ (".word %0" \
00087 : : "i" (vis_opc_base | vis_opf(opf) | \
00088 vis_rs2_d(rs2) | \
00089 vis_rd_s(rd)))
00090
00091 #define vis_s2d(opf,rs2,rd) \
00092 __asm__ __volatile__ (".word %0" \
00093 : : "i" (vis_opc_base | vis_opf(opf) | \
00094 vis_rs2_s(rs2) | \
00095 vis_rd_d(rd)))
00096
00097 #define vis_d12d(opf,rs1,rd) \
00098 __asm__ __volatile__ (".word %0" \
00099 : : "i" (vis_opc_base | vis_opf(opf) | \
00100 vis_rs1_d(rs1) | \
00101 vis_rd_d(rd)))
00102
00103 #define vis_d22d(opf,rs2,rd) \
00104 __asm__ __volatile__ (".word %0" \
00105 : : "i" (vis_opc_base | vis_opf(opf) | \
00106 vis_rs2_d(rs2) | \
00107 vis_rd_d(rd)))
00108
00109 #define vis_s12s(opf,rs1,rd) \
00110 __asm__ __volatile__ (".word %0" \
00111 : : "i" (vis_opc_base | vis_opf(opf) | \
00112 vis_rs1_s(rs1) | \
00113 vis_rd_s(rd)))
00114
00115 #define vis_s22s(opf,rs2,rd) \
00116 __asm__ __volatile__ (".word %0" \
00117 : : "i" (vis_opc_base | vis_opf(opf) | \
00118 vis_rs2_s(rs2) | \
00119 vis_rd_s(rd)))
00120
00121 #define vis_s(opf,rd) \
00122 __asm__ __volatile__ (".word %0" \
00123 : : "i" (vis_opc_base | vis_opf(opf) | \
00124 vis_rd_s(rd)))
00125
00126 #define vis_d(opf,rd) \
00127 __asm__ __volatile__ (".word %0" \
00128 : : "i" (vis_opc_base | vis_opf(opf) | \
00129 vis_rd_d(rd)))
00130
00131 #define vis_r2m(op,rd,mem) \
00132 __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
00133
00134 #define vis_r2m_2(op,rd,mem1,mem2) \
00135 __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
00136
00137 #define vis_m2r(op,mem,rd) \
00138 __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
00139
00140 #define vis_m2r_2(op,mem1,mem2,rd) \
00141 __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
00142
00143 static inline void vis_set_gsr(unsigned int _val)
00144 {
00145 register unsigned int val asm("g1");
00146
00147 val = _val;
00148 __asm__ __volatile__(".word 0xa7804000"
00149 : : "r" (val));
00150 }
00151
00152 #define VIS_GSR_ALIGNADDR_MASK 0x0000007
00153 #define VIS_GSR_ALIGNADDR_SHIFT 0
00154 #define VIS_GSR_SCALEFACT_MASK 0x0000078
00155 #define VIS_GSR_SCALEFACT_SHIFT 3
00156
00157 #define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1)
00158 #define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1)
00159 #define vis_st32(rs1,mem) vis_r2m(st, rs1, mem)
00160 #define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2)
00161 #define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1)
00162 #define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1)
00163 #define vis_st64(rs1,mem) vis_r2m(std, rs1, mem)
00164 #define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2)
00165
00166 #define vis_ldblk(mem, rd) \
00167 do { register void *__mem asm("g1"); \
00168 __mem = &(mem); \
00169 __asm__ __volatile__(".word 0xc1985e00 | %1" \
00170 : \
00171 : "r" (__mem), \
00172 "i" (vis_rd_d(rd)) \
00173 : "memory"); \
00174 } while (0)
00175
00176 #define vis_stblk(rd, mem) \
00177 do { register void *__mem asm("g1"); \
00178 __mem = &(mem); \
00179 __asm__ __volatile__(".word 0xc1b85e00 | %1" \
00180 : \
00181 : "r" (__mem), \
00182 "i" (vis_rd_d(rd)) \
00183 : "memory"); \
00184 } while (0)
00185
00186 #define vis_membar_storestore() \
00187 __asm__ __volatile__(".word 0x8143e008" : : : "memory")
00188
00189 #define vis_membar_sync() \
00190 __asm__ __volatile__(".word 0x8143e040" : : : "memory")
00191
00192
00193
00194
00195
00196
00197
00198 #define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd)
00199 #define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd)
00200 #define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd)
00201 #define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd)
00202 #define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd)
00203 #define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd)
00204 #define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd)
00205 #define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd)
00206
00207
00208
00209 #define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd)
00210 #define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd)
00211 #define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd)
00212 #define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd)
00213 #define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd)
00214
00215
00216
00217 #define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd)
00218 #define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd)
00219 #define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd)
00220 #define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd)
00221 #define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd)
00222 #define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd)
00223 #define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd)
00224
00225
00226
00227 static inline void *vis_alignaddr(void *_ptr)
00228 {
00229 register void *ptr asm("g1");
00230
00231 ptr = _ptr;
00232
00233 __asm__ __volatile__(".word %2"
00234 : "=&r" (ptr)
00235 : "0" (ptr),
00236 "i" (vis_opc_base | vis_opf(0x18) |
00237 vis_rs1_s(1) |
00238 vis_rs2_s(0) |
00239 vis_rd_s(1)));
00240
00241 return ptr;
00242 }
00243
00244 static inline void vis_alignaddr_g0(void *_ptr)
00245 {
00246 register void *ptr asm("g1");
00247
00248 ptr = _ptr;
00249
00250 __asm__ __volatile__(".word %2"
00251 : "=&r" (ptr)
00252 : "0" (ptr),
00253 "i" (vis_opc_base | vis_opf(0x18) |
00254 vis_rs1_s(1) |
00255 vis_rs2_s(0) |
00256 vis_rd_s(0)));
00257 }
00258
00259 static inline void *vis_alignaddrl(void *_ptr)
00260 {
00261 register void *ptr asm("g1");
00262
00263 ptr = _ptr;
00264
00265 __asm__ __volatile__(".word %2"
00266 : "=&r" (ptr)
00267 : "0" (ptr),
00268 "i" (vis_opc_base | vis_opf(0x19) |
00269 vis_rs1_s(1) |
00270 vis_rs2_s(0) |
00271 vis_rd_s(1)));
00272
00273 return ptr;
00274 }
00275
00276 static inline void vis_alignaddrl_g0(void *_ptr)
00277 {
00278 register void *ptr asm("g1");
00279
00280 ptr = _ptr;
00281
00282 __asm__ __volatile__(".word %2"
00283 : "=&r" (ptr)
00284 : "0" (ptr),
00285 "i" (vis_opc_base | vis_opf(0x19) |
00286 vis_rs1_s(1) |
00287 vis_rs2_s(0) |
00288 vis_rd_s(0)));
00289 }
00290
00291 #define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd)
00292
00293
00294
00295 #define vis_fzero(rd) vis_d( 0x60, rd)
00296 #define vis_fzeros(rd) vis_s( 0x61, rd)
00297 #define vis_fone(rd) vis_d( 0x7e, rd)
00298 #define vis_fones(rd) vis_s( 0x7f, rd)
00299 #define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd)
00300 #define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd)
00301 #define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd)
00302 #define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd)
00303 #define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd)
00304 #define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd)
00305 #define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd)
00306 #define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd)
00307 #define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd)
00308 #define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd)
00309 #define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd)
00310 #define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd)
00311 #define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd)
00312 #define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd)
00313 #define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd)
00314 #define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd)
00315 #define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd)
00316 #define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd)
00317 #define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd)
00318 #define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd)
00319 #define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd)
00320 #define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd)
00321 #define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd)
00322 #define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd)
00323 #define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd)
00324 #define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd)
00325 #define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd)
00326 #define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd)
00327
00328
00329
00330 #define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd)
00331
00332 #endif