From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- .../src/tools/ATLAS/include/contrib/camm_util.h | 508 +++++++++++++++++++++ 1 file changed, 508 insertions(+) create mode 100644 kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h (limited to 'kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h') diff --git a/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h b/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h new file mode 100644 index 0000000..6b150d3 --- /dev/null +++ b/kaldi_io/src/tools/ATLAS/include/contrib/camm_util.h @@ -0,0 +1,508 @@ +#ifndef CAMM_UTIL_H +#define CAMM_UTIL_H /*+ To stop multiple inclusions. +*/ + +typedef struct { + float r,i; +} Complex; + +typedef struct { + double r,i; +} Dcomplex; + +#undef str +#define str(a_) xstr(a_) +#undef xstr +#define xstr(a_) #a_ + +#undef val +#define val(a_) xval(a_) +#undef xval +#define xval(a_) a_ + +#ifndef Mjoin +#define Mjoin(a,b) mjoin(a,b) +#ifdef mjoin + #undef mjoin +#endif +#define mjoin(a,b) a ## b +#endif + +#undef VOLATILE +#define VOLATILE __volatile__ +#undef ASM +#define ASM __asm__ VOLATILE + +#ifdef BETA0 +#undef BL +#define BL b0 +#endif +#ifdef BETA1 +#undef BL +#define BL b1 +#endif +#ifdef BETAX +#undef BL +#define BL bX +#endif +#ifdef BETAXI0 +#undef BL +#define BL bXi0 +#endif + +#ifdef NO_TRANSPOSE +#ifdef GER +#ifdef Conj_ +#undef FEXT +#define FEXT Gc +#else +#undef FEXT +#define FEXT Gu +#endif +#else +#ifdef Conj_ +#undef FEXT +#define FEXT Nc +#else +#undef FEXT +#define FEXT N +#endif +#endif +#else +#ifdef Conj_ +#undef FEXT +#define FEXT C +#else +#undef FEXT +#define FEXT T +#endif +#endif + +#undef BLC +#define BLC Mjoin(FEXT,BL) + +#ifdef __GNUC__ +#undef NO_INLINE +#define NO_INLINE double sq(double x) {return x*x;} +#else +#undef NO_INLINE +#define NO_INLINE +#endif + +#undef lab +#define lab(a_) "\n" str(MY_FUNCTION) "_" str(N) "_" str(a_) ":\n\t" +#undef jmp +#define jmp(a_) "jmp " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t" +#undef je +#define je(a_) "je " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t" +#undef jge +#define jge(a_) "jge " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t" +#undef jle +#define jle(a_) "jle " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t" +#undef jl +#define jl(a_) "jl " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t" +#undef jne +#define jne(a_) "jne " str(MY_FUNCTION) "_" str(N) "_" str(a_) "\n\t" +#undef align +#define align ".align 16\n\t" +#undef test +#define test(a_,b_) "testl $" str(a_) ",%%e" str(b_) "\n\t" +#undef and +#define and(a_,b_) "andl $" str(a_) ",%%e" str(b_) "\n\t" +#undef sub +#define sub(a_,b_) "subl $" str(a_) ",%%e" str(b_) "\n\t" +#undef SS +#define SS(a_,b_) a_ + b_ +#undef MM +#define MM(a_,b_) a_ * b_ +#undef E4 +#define E4(a_) (( a_ >> 2 ) << 2 ) + +#undef TYPE +#undef SCALAR +#undef PREC +#undef CSHUF +#undef LSHUF +#undef HSHUF +#undef ISHUF +#undef RSHUF +#undef SINGLE +#undef REAL +#undef DIV + +#ifdef SCPLX +#define TYPE Complex +#define SCALAR Complex * +#define PREC c +#define CSHUF 177 +#define LSHUF 160 +#define HSHUF 245 +#define ISHUF 13*17 +#define RSHUF 8*17 +#define SINGLE +#define DIV 2 +/* #ifdef Conj_ */ +/* static const TYPE signd[2]={{-1.0,1.0},{-1.0,1.0}}; */ +/* #else */ + static const TYPE signd[2]={{1.0,-1.0},{1.0,-1.0}}; +/* #endif */ +#endif + +#ifdef SREAL +#define TYPE float +#define SCALAR float +#define PREC s +#define SINGLE +#define REAL +#define DIV 1 +#endif + +#ifdef DREAL +#define TYPE double +#define SCALAR double +#define PREC d +#define REAL +#define DIV 2 +#endif + +#ifdef DCPLX +#define TYPE Dcomplex +#define SCALAR Dcomplex * +#define PREC z +#define CSHUF 1 +#define LSHUF 0 +#define HSHUF 3 +#define ISHUF 3 +#define RSHUF 0 +#define DIV 4 +/* #ifdef Conj_ */ +/* static const TYPE signd[1]={{-1.0,1.0}}; */ +/* #else */ + static const TYPE signd[1]={{1.0,-1.0}}; +/* #endif */ +#endif + +#undef M11 +#define M11 0 +#undef M12 +#define M12 1 +#undef M13 +#define M13 2 +#undef M14 +#define M14 3 +#undef M15 +#define M15 4 +#undef M16 +#define M16 5 +#undef M17 +#define M17 6 +#undef M18 +#define M18 7 + +#undef M23 +#define M23 1 +#undef M24 +#define M24 2 +#undef M25 +#define M25 3 +#undef M26 +#define M26 4 +#undef M27 +#define M27 5 +#undef M28 +#define M28 6 + +#undef M33 +#define M33 0 +#undef M34 +#define M34 1 +#undef M35 +#define M35 2 +#undef M36 +#define M36 3 +#undef M37 +#define M37 4 +#undef M38 +#define M38 5 + +#undef P10 +#define P10 1 +#undef P11 +#define P11 2 +#undef P12 +#define P12 3 +#undef P13 +#define P13 4 +#undef P14 +#define P14 5 +#undef P15 +#define P15 6 +#undef P16 +#define P16 7 + +#undef XM +#define XM(a_,b_) M ## b_ ## a_ +#undef M +#define M(a_,b_) XM(a_,b_) + +#undef XP +#define XP(a_,b_) P ## b_ ## a_ +#undef P +#define P(a_,b_) XP(a_,b_) + +#undef mex +#define mex(a_) str(%%e ## a_) +#undef msx +#define msx(a_) "%%st(" str(a_) ")" + +#undef cmp +#define cmp(a_,b_) "cmp " mex(a_) "," mex(b_) "\n\t" +#undef icmpr +#define icmpr(a_,b_) "cmp " mex(a_) ",(" mex(b_) ")\n\t" +#undef f +#define f(a_,b_,c_) "prefetch" str(a_) " " str(b_) "(%%e" #c_ ")\n\t" +#undef pfx +#define pfx(a_,b_,c_,d_,e_) "prefetch" str(a_) " " str(b_) "(%%e" #c_ ",%%e" #d_ "," str(e_) ")\n\t" +#undef a +#define a(a_,b_) "addl $" str(a_) "," mex(b_) "\n\t" +#undef m +#define m(a_,b_) "imul $" str(a_) "," mex(b_) "\n\t" +#undef pop +#define pop(a_) "popl %%e" str(a_) "\n\t" +#undef push +#define push(a_) "pushl %%e" str(a_) "\n\t" +#undef d +#define d(a_,b_) "idiv $" str(a_) "," mex(b_) "\n\t" +#undef shl +#define shl(a_,b_) "shl $" str(a_) "," mex(b_) "\n\t" +#undef shr +#define shr(a_,b_) "shr $" str(a_) "," mex(b_) "\n\t" +#undef mm +#define mm(a_,b_) "mov $" str(a_) "," mex(b_) "\n\t" +#undef ra +#define ra(a_,b_) "addl %%e" str(a_) "," mex(b_) "\n\t" +#undef rs +#define rs(a_,b_) "subl %%e" str(a_) "," mex(b_) "\n\t" + +#undef fl +#define fl(a_,b_) "fldl " str(a_) "(" mex(b_) ")\n\t" +#undef fp +#define fp(a_,b_) "fstpl " str(a_) "(" mex(b_) ")\n\t" +#undef fd +#define fd(a_) "fld " msx(a_) "\n\t" +#undef fap +#define fap(a_,b_) "faddp " msx(a_) "," msx(b_) "\n\t" +/* #define fsp(a_) fx(a_) "fsubp %%st," msx(a_) "\n\t" */ +#undef fsp +#define fsp(a_) "fsubrp %%st," msx(a_) "\n\t" +#undef fmp +#define fmp(a_,b_) "fmulp " msx(a_) "," msx(b_) "\n\t" +#undef fa +#define fa(a_,b_) "fadd " msx(a_) "," msx(b_) "\n\t" +#undef fm +#define fm(a_,b_) "fmul " msx(a_) "," msx(b_) "\n\t" +#undef faa +#define faa(a_,b_) "faddl " str(a_) "(" mex(b_) ")\n\t" +#undef fma +#define fma(a_,b_) "fmull " str(a_) "(" mex(b_) ")\n\t" +#undef fz +#define fz "fldz\n\t" +#undef fx +#define fx(a_) "fxch " msx(a_) "\n\t" +#undef fx1 +#define fx1 "fxch\n\t" +#undef fc +#define fc(a_) "fstp " msx(a_) "\n\t" + + +#ifndef ATHLON + + +#if defined(DREAL) || defined(DCPLX) +#undef SSESUF +#define SSESUF "d " +#undef RS4 +#define RS4 16 +#undef RS +#define RS 4 +#else +#undef SSESUF +#define SSESUF "s " +#undef RS4 +#define RS4 16 +#undef RS +#define RS 4 +#endif + +#undef mxx +#define mxx(a_) str(%%xmm ## a_) +#undef prp +#define prp(a_,b_) "rcpp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef prps +#define prps(a_,b_) "rcps" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pann +#define pann(a_,b_) "andnp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef psqs +#define psqs(a_,b_) "sqrts" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef por +#define por(a_,b_) "orp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pan +#define pan(a_,b_) "andp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pcm +#define pcm(a_,b_,c_) "cmpp" SSESUF " $" str(a_) "," mxx(b_) "," mxx(c_) "\n\t" +#undef pcms +#define pcms(a_,b_,c_) "cmps" SSESUF " $" str(a_) "," mxx(b_) "," mxx(c_) "\n\t" +#undef pax +#define pax(a_,b_) "maxp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef paxs +#define paxs(a_,b_) "maxs" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pd +#define pd(a_,b_) "divp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pdsr +#define pdsr(a_,b_) "divs" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pxx +#define pxx(a_,b_) "xorp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef px +#define px(a_) "xorp" SSESUF mxx(a_) "," mxx(a_) "\n\t" +#undef pm +#define pm(a_,b_) "mulp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pa +#define pa(a_,b_) "addp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pmm +#define pmm(a_,b_,c_) "mulp" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pam +#define pam(a_,b_,c_) "addp" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pl +#define pl(a_,b_,c_) "movup" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pla +#define pla(a_,b_,c_) "movap" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pu +#define pu(a_,b_,c_) "movup" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef punt +#define punt(a_,b_,c_) "movntp" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef pua +#define pua(a_,b_,c_) "movap" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef pud +#define pud(a_,b_,c_) "movlp" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef pudr +#define pudr(a_,b_) "movlp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pc +#define pc(a_,b_) "movap" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef ps +#define ps(a_,b_,c_) "shufp" SSESUF " $" str(a_) "," mxx(b_) "," mxx(c_) "\n\t" +#undef phl +#define phl(a_,b_) "movhlp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pus +#define pus(a_,b_,c_) "movs" SSESUF mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef pls +#define pls(a_,b_,c_) "movs" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pld +#define pld(a_,b_,c_) "movlp" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef plh +#define plh(a_,b_) "movlhp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pas +#define pas(a_,b_,c_) "adds" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pms +#define pms(a_,b_,c_) "muls" SSESUF str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pcs +#define pcs(a_,b_) "movs" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pasr +#define pasr(a_,b_) "adds" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pmsr +#define pmsr(a_,b_) "muls" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef pul +#define pul(a_,b_) "unpcklp" SSESUF mxx(a_) "," mxx(b_) "\n\t" +#undef puh +#define puh(a_,b_) "unpckhp" SSESUF mxx(a_) "," mxx(b_) "\n\t" + +#undef plsx +#define plsx(a_,b_,c_,d_,e_) \ + "movs" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef plx +#define plx(a_,b_,c_,d_,e_) \ + "movup" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef plax +#define plax(a_,b_,c_,d_,e_) \ + "movap" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef pasx +#define pasx(a_,b_,c_,d_,e_) \ + "adds" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef pusx +#define pusx(a_,b_,c_,d_,e_) \ + "movs" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t" +#undef pux +#define pux(a_,b_,c_,d_,e_) \ + "movup" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t" +#undef puax +#define puax(a_,b_,c_,d_,e_) \ + "movap" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t" +#undef pudx +#define pudx(a_,b_,c_,d_,e_) \ + "movlp" SSESUF mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t" + +#undef pldx +#define pldx(a_,b_,c_,d_,e_) \ + "movlp" SSESUF str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" + +#else + +#undef RS4 +#define RS4 8 +#undef RS +#define RS 2 + +#undef mxx +#define mxx(a_) str(%%mm ## a_) +#undef pul +#define pul(a_,b_) "punpckldq " mxx(a_) "," mxx(b_) "\n\t" +#undef puh +#define puh(a_,b_) "punpckhdq " mxx(a_) "," mxx(b_) "\n\t" + +#undef px +#define px(a_) "pxor " mxx(a_) "," mxx(a_) "\n\t" +#undef pm +#define pm(a_,b_) "pfmul " mxx(a_) "," mxx(b_) "\n\t" +#undef pa +#define pa(a_,b_) "pfadd " mxx(a_) "," mxx(b_) "\n\t" +#undef pac +#define pac(a_,b_) "pfacc " mxx(a_) "," mxx(b_) "\n\t" +#undef pmm +#define pmm(a_,b_,c_) "pfmul " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pam +#define pam(a_,b_,c_) "pfadd " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pl +#define pl(a_,b_,c_) "movq " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pla +#define pla(a_,b_,c_) "movq " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" +#undef pu +#define pu(a_,b_,c_) "movq " mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef pc +#define pc(a_,b_) "movq " mxx(a_) "," mxx(b_) "\n\t" +#undef ps +#define ps(a_,b_,c_) "pswapd " mxx(b_) "," mxx(c_) "\n\t" +#undef phl +#define phl(a_,b_) "punpckhdq " mxx(a_) "," mxx(b_) "\n\t" +#undef plh +#define plh(a_,b_) "punpckldq " mxx(a_) "," mxx(b_) "\n\t" +#undef pus +#define pus(a_,b_,c_) "movd " mxx(a_) "," str(b_) "(" mex(c_) ")\n\t" +#undef pls +#define pls(a_,b_,c_) "movd " str(a_) "(" mex(b_) ")," mxx(c_) "\n\t" + +#undef plsx +#define plsx(a_,b_,c_,d_,e_) \ + "movd " str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef plx +#define plx(a_,b_,c_,d_,e_) \ + "movq " str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef pasx +#define pasx(a_,b_,c_,d_,e_) \ + "addss " str(a_) "(" mex(b_) "," mex(c_) "," #d_ ")," mxx(e_) "\n\t" +#undef pusx +#define pusx(a_,b_,c_,d_,e_) \ + "movd " mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t" +#undef pux +#define pux(a_,b_,c_,d_,e_) \ + "movq " mxx(a_) "," str(b_) "(" mex(c_) "," mex(d_) "," #e_ ")\n\t" +#endif + +#endif /* CAMM_UTIL_H */ -- cgit v1.2.3