summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h')
-rw-r--r--kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h508
1 files changed, 508 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h b/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h
new file mode 100644
index 0000000..6b150d3
--- /dev/null
+++ b/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h
@@ -0,0 +1,508 @@
+#ifndef CAMM_UTIL_H
+#define CAMM_UTIL_H /*+ To stop multiple inclusions. +*/
+
+typedef struct {
+ float r,i;
+} Complex;
+
+typedef struct {
+ double r,i;
+} Dcomplex;
+
+#undef str
+#define str(a_) xstr(a_)
+#undef xstr
+#define xstr(a_) #a_
+
+#undef val
+#define val(a_) xval(a_)
+#undef xval
+#define xval(a_) a_
+
+#ifndef Mjoin
+#define Mjoin(a,b) mjoin(a,b)
+#ifdef mjoin
+ #undef mjoin
+#endif
+#define mjoin(a,b) a ## b
+#endif
+
+#undef VOLATILE
+#define VOLATILE __volatile__
+#undef ASM
+#define ASM __asm__ VOLATILE
+
+#ifdef BETA0
+#undef BL
+#define BL b0
+#endif
+#ifdef BETA1
+#undef BL
+#define BL b1
+#endif
+#ifdef BETAX
+#undef BL
+#define BL bX
+#endif
+#ifdef BETAXI0
+#undef BL
+#define BL bXi0
+#endif
+
+#ifdef NO_TRANSPOSE
+#ifdef GER
+#ifdef Conj_
+#undef FEXT
+#define FEXT Gc
+#else
+#undef FEXT
+#define FEXT Gu
+#endif
+#else
+#ifdef Conj_
+#undef FEXT
+#define FEXT Nc
+#else
+#undef FEXT
+#define FEXT N
+#endif
+#endif
+#else
+#ifdef Conj_
+#undef FEXT
+#define FEXT C
+#else
+#undef FEXT
+#define FEXT T
+#endif
+#endif
+
+#undef BLC
+#define BLC Mjoin(FEXT,BL)
+
+#ifdef __GNUC__
+#undef NO_INLINE
+#define NO_INLINE double sq(double x) {return x*x;}
+#else
+#undef NO_INLINE
+#define NO_INLINE
+#endif
+
+#undef lab
+#define lab(a_) "\n" str(MY_FUNCTION) "_" str(N) "_" str(a_) ":\n\t"
+#undef jmp
+#define jmp(a_) "jmp " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t"
+#undef je
+#define je(a_) "je " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t"
+#undef jge
+#define jge(a_) "jge " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t"
+#undef jle
+#define jle(a_) "jle " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t"
+#undef jl
+#define jl(a_) "jl " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t"
+#undef jne
+#define jne(a_) "jne " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t"
+#undef align
+#define align ".align 16\n\t"
+#undef test
+#define test(a_,b_) "testl $" str(a_) ",%%e" str(b_) "\n\t"
+#undef and
+#define and(a_,b_) "andl $" str(a_) ",%%e" str(b_) "\n\t"
+#undef sub
+#define sub(a_,b_) "subl $" str(a_) ",%%e" str(b_) "\n\t"
+#undef SS
+#define SS(a_,b_) a_ + b_
+#undef MM
+#define MM(a_,b_) a_ * b_
+#undef E4
+#define E4(a_) (( a_ >> 2 ) << 2 )
+
+#undef TYPE
+#undef SCALAR
+#undef PREC
+#undef CSHUF
+#undef LSHUF
+#undef HSHUF
+#undef ISHUF
+#undef RSHUF
+#undef SINGLE
+#undef REAL
+#undef DIV
+
+#ifdef SCPLX
+#define TYPE Complex
+#define SCALAR Complex *
+#define PREC c
+#define CSHUF 177
+#define LSHUF 160
+#define HSHUF 245
+#define ISHUF 13*17
+#define RSHUF 8*17
+#define SINGLE
+#define DIV 2
+/* #ifdef Conj_ */
+/* static const TYPE signd[2]={{-1.0,1.0},{-1.0,1.0}}; */
+/* #else */
+ static const TYPE signd[2]={{1.0,-1.0},{1.0,-1.0}};
+/* #endif */
+#endif
+
+#ifdef SREAL
+#define TYPE float
+#define SCALAR float
+#define PREC s
+#define SINGLE
+#define REAL
+#define DIV 1
+#endif
+
+#ifdef DREAL
+#define TYPE double
+#define SCALAR double
+#define PREC d
+#define REAL
+#define DIV 2
+#endif
+
+#ifdef DCPLX
+#define TYPE Dcomplex
+#define SCALAR Dcomplex *
+#define PREC z
+#define CSHUF 1
+#define LSHUF 0
+#define HSHUF 3
+#define ISHUF 3
+#define RSHUF 0
+#define DIV 4
+/* #ifdef Conj_ */
+/* static const TYPE signd[1]={{-1.0,1.0}}; */
+/* #else */
+ static const TYPE signd[1]={{1.0,-1.0}};
+/* #endif */
+#endif
+
+#undef M11
+#define M11 0
+#undef M12
+#define M12 1
+#undef M13
+#define M13 2
+#undef M14
+#define M14 3
+#undef M15
+#define M15 4
+#undef M16
+#define M16 5
+#undef M17
+#define M17 6
+#undef M18
+#define M18 7
+
+#undef M23
+#define M23 1
+#undef M24
+#define M24 2
+#undef M25
+#define M25 3
+#undef M26
+#define M26 4
+#undef M27
+#define M27 5
+#undef M28
+#define M28 6
+
+#undef M33
+#define M33 0
+#undef M34
+#define M34 1
+#undef M35
+#define M35 2
+#undef M36
+#define M36 3
+#undef M37
+#define M37 4
+#undef M38
+#define M38 5
+
+#undef P10
+#define P10 1
+#undef P11
+#define P11 2
+#undef P12
+#define P12 3
+#undef P13
+#define P13 4
+#undef P14
+#define P14 5
+#undef P15
+#define P15 6
+#undef P16
+#define P16 7
+
+#undef XM
+#define XM(a_,b_) M ## b_ ## a_
+#undef M
+#define M(a_,b_) XM(a_,b_)
+
+#undef XP
+#define XP(a_,b_) P ## b_ ## a_
+#undef P
+#define P(a_,b_) XP(a_,b_)
+
+#undef mex
+#define mex(a_) str(%%e ## a_)
+#undef msx
+#define msx(a_) "%%st(" str(a_) ")"
+
+#undef cmp
+#define cmp(a_,b_) "cmp " mex(a_) "," mex(b_) "\n\t"
+#undef icmpr
+#define icmpr(a_,b_) "cmp " mex(a_) ",(" mex(b_) ")\n\t"
+#undef f
+#define f(a_,b_,c_) "prefetch" str(a_) " " str(b_) "(%%e" #c_ ")\n\t"
+#undef pfx
+#define pfx(a_,b_,c_,d_,e_) "prefetch" str(a_) " " str(b_) "(%%e" #c_ ",%%e" #d_ "," str(e_) ")\n\t"
+#undef a
+#define a(a_,b_) "addl $" str(a_) "," mex(b_) "\n\t"
+#undef m
+#define m(a_,b_) "imul $" str(a_) "," mex(b_) "\n\t"
+#undef pop
+#define pop(a_) "popl %%e" str(a_) "\n\t"
+#undef push
+#define push(a_) "pushl %%e" str(a_) "\n\t"
+#undef d
+#define d(a_,b_) "idiv $" str(a_) "," mex(b_) "\n\t"
+#undef shl
+#define shl(a_,b_) "shl $" str(a_) "," mex(b_) "\n\t"
+#undef shr
+#define shr(a_,b_) "shr $" str(a_) "," mex(b_) "\n\t"
+#undef mm
+#define mm(a_,b_) "mov $" str(a_) "," mex(b_) "\n\t"
+#undef ra
+#define ra(a_,b_) "addl %%e" str(a_) "," mex(b_) "\n\t"
+#undef rs
+#define rs(a_,b_) "subl %%e" str(a_) "," mex(b_) "\n\t"
+
+#undef fl
+#define fl(a_,b_) "fldl " str(a_) "(" mex(b_) ")\n\t"
+#undef fp
+#define fp(a_,b_) "fstpl " str(a_) "(" mex(b_) ")\n\t"
+#undef fd
+#define fd(a_) "fld " msx(a_) "\n\t"
+#undef fap
+#define fap(a_,b_) "faddp " msx(a_) "," msx(b_) "\n\t"
+/* #define fsp(a_) fx(a_) "fsubp %%st," msx(a_) "\n\t" */
+#undef fsp
+#define fsp(a_) "fsubrp %%st," msx(a_) "\n\t"
+#undef fmp
+#define fmp(a_,b_) "fmulp " msx(a_) "," msx(b_) "\n\t"
+#undef fa
+#define fa(a_,b_) "fadd " msx(a_) "," msx(b_) "\n\t"
+#undef fm
+#define fm(a_,b_) "fmul " msx(a_) "," msx(b_) "\n\t"
+#undef faa
+#define faa(a_,b_) "faddl " str(a_) "(" mex(b_) ")\n\t"
+#undef fma
+#define fma(a_,b_) "fmull " str(a_) "(" mex(b_) ")\n\t"
+#undef fz
+#define fz "fldz\n\t"
+#undef fx
+#define fx(a_) "fxch " msx(a_) "\n\t"
+#undef fx1
+#define fx1 "fxch\n\t"
+#undef fc
+#define fc(a_) "fstp " msx(a_) "\n\t"
+
+
+#ifndef ATHLON
+
+
+#if defined(DREAL) || defined(DCPLX)
+#undef SSESUF
+#define SSESUF "d "
+#undef RS4
+#define RS4 16
+#undef RS
+#define RS 4
+#else
+#undef SSESUF
+#define SSESUF "s "
+#undef RS4
+#define RS4 16
+#undef RS
+#define RS 4
+#endif
+
+#undef mxx
+#define mxx(a_) str(%%xmm ## a_)
+#undef prp
+#define prp(a_,b_) "rcpp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef prps
+#define prps(a_,b_) "rcps" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pann
+#define pann(a_,b_) "andnp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef psqs
+#define psqs(a_,b_) "sqrts" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef por
+#define por(a_,b_) "orp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pan
+#define pan(a_,b_) "andp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pcm
+#define pcm(a_,b_,c_) "cmpp" SSESUF " $" str(a_) "," mxx(b_) "," mxx(c_) "\n\t"
+#undef pcms
+#define pcms(a_,b_,c_) "cmps" SSESUF " $" str(a_) "," mxx(b_) "," mxx(c_) "\n\t"
+#undef pax
+#define pax(a_,b_) "maxp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef paxs
+#define paxs(a_,b_) "maxs" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pd
+#define pd(a_,b_) "divp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pdsr
+#define pdsr(a_,b_) "divs" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pxx
+#define pxx(a_,b_) "xorp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef px
+#define px(a_) "xorp" SSESUF mxx(a_) "," mxx(a_) "\n\t"
+#undef pm
+#define pm(a_,b_) "mulp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pa
+#define pa(a_,b_) "addp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pmm
+#define pmm(a_,b_,c_) "mulp" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pam
+#define pam(a_,b_,c_) "addp" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pl
+#define pl(a_,b_,c_) "movup" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pla
+#define pla(a_,b_,c_) "movap" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pu
+#define pu(a_,b_,c_) "movup" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef punt
+#define punt(a_,b_,c_) "movntp" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef pua
+#define pua(a_,b_,c_) "movap" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef pud
+#define pud(a_,b_,c_) "movlp" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef pudr
+#define pudr(a_,b_) "movlp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pc
+#define pc(a_,b_) "movap" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef ps
+#define ps(a_,b_,c_) "shufp" SSESUF " $" str(a_) "," mxx(b_) "," mxx(c_) "\n\t"
+#undef phl
+#define phl(a_,b_) "movhlp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pus
+#define pus(a_,b_,c_) "movs" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef pls
+#define pls(a_,b_,c_) "movs" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pld
+#define pld(a_,b_,c_) "movlp" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef plh
+#define plh(a_,b_) "movlhp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pas
+#define pas(a_,b_,c_) "adds" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pms
+#define pms(a_,b_,c_) "muls" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pcs
+#define pcs(a_,b_) "movs" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pasr
+#define pasr(a_,b_) "adds" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pmsr
+#define pmsr(a_,b_) "muls" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef pul
+#define pul(a_,b_) "unpcklp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+#undef puh
+#define puh(a_,b_) "unpckhp" SSESUF mxx(a_) "," mxx(b_) "\n\t"
+
+#undef plsx
+#define plsx(a_,b_,c_,d_,e_) \
+ "movs" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef plx
+#define plx(a_,b_,c_,d_,e_) \
+ "movup" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef plax
+#define plax(a_,b_,c_,d_,e_) \
+ "movap" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef pasx
+#define pasx(a_,b_,c_,d_,e_) \
+ "adds" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef pusx
+#define pusx(a_,b_,c_,d_,e_) \
+ "movs" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t"
+#undef pux
+#define pux(a_,b_,c_,d_,e_) \
+ "movup" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t"
+#undef puax
+#define puax(a_,b_,c_,d_,e_) \
+ "movap" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t"
+#undef pudx
+#define pudx(a_,b_,c_,d_,e_) \
+ "movlp" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t"
+
+#undef pldx
+#define pldx(a_,b_,c_,d_,e_) \
+ "movlp" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+
+#else
+
+#undef RS4
+#define RS4 8
+#undef RS
+#define RS 2
+
+#undef mxx
+#define mxx(a_) str(%%mm ## a_)
+#undef pul
+#define pul(a_,b_) "punpckldq " mxx(a_) "," mxx(b_) "\n\t"
+#undef puh
+#define puh(a_,b_) "punpckhdq " mxx(a_) "," mxx(b_) "\n\t"
+
+#undef px
+#define px(a_) "pxor " mxx(a_) "," mxx(a_) "\n\t"
+#undef pm
+#define pm(a_,b_) "pfmul " mxx(a_) "," mxx(b_) "\n\t"
+#undef pa
+#define pa(a_,b_) "pfadd " mxx(a_) "," mxx(b_) "\n\t"
+#undef pac
+#define pac(a_,b_) "pfacc " mxx(a_) "," mxx(b_) "\n\t"
+#undef pmm
+#define pmm(a_,b_,c_) "pfmul " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pam
+#define pam(a_,b_,c_) "pfadd " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pl
+#define pl(a_,b_,c_) "movq " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pla
+#define pla(a_,b_,c_) "movq " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+#undef pu
+#define pu(a_,b_,c_) "movq " mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef pc
+#define pc(a_,b_) "movq " mxx(a_) "," mxx(b_) "\n\t"
+#undef ps
+#define ps(a_,b_,c_) "pswapd " mxx(b_) "," mxx(c_) "\n\t"
+#undef phl
+#define phl(a_,b_) "punpckhdq " mxx(a_) "," mxx(b_) "\n\t"
+#undef plh
+#define plh(a_,b_) "punpckldq " mxx(a_) "," mxx(b_) "\n\t"
+#undef pus
+#define pus(a_,b_,c_) "movd " mxx(a_) "," str(b_) "(" mex(c_) ")\n\t"
+#undef pls
+#define pls(a_,b_,c_) "movd " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t"
+
+#undef plsx
+#define plsx(a_,b_,c_,d_,e_) \
+ "movd " str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef plx
+#define plx(a_,b_,c_,d_,e_) \
+ "movq " str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef pasx
+#define pasx(a_,b_,c_,d_,e_) \
+ "addss " str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t"
+#undef pusx
+#define pusx(a_,b_,c_,d_,e_) \
+ "movd " mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t"
+#undef pux
+#define pux(a_,b_,c_,d_,e_) \
+ "movq " mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t"
+#endif
+
+#endif /* CAMM_UTIL_H */