summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/ATLAS/include/atlas_misc.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/ATLAS/include/atlas_misc.h')
-rw-r--r--kaldi_io/src/tools/ATLAS/include/atlas_misc.h416
1 files changed, 416 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/ATLAS/include/atlas_misc.h b/kaldi_io/src/tools/ATLAS/include/atlas_misc.h
new file mode 100644
index 0000000..88f754d
--- /dev/null
+++ b/kaldi_io/src/tools/ATLAS/include/atlas_misc.h
@@ -0,0 +1,416 @@
+/*
+ * Automatically Tuned Linear Algebra Software v3.8.3
+ * (C) Copyright 1997 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions, and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the ATLAS group or the names of its contributers may
+ * not be used to endorse or promote products derived from this
+ * software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "atlas_enum.h"
+
+#ifndef ATLAS_MISC_H
+#define ATLAS_MISC_H
+#include "atlas_type.h"
+#ifdef ATL_PROFILE
+ extern int ATL_ProfGemmCameFrom;
+#endif
+/*
+ * Some useful macro functions
+ */
+#if (defined(PentiumCPS) || defined(ATL_USEPTHREADS)) && !defined(WALL)
+ #define WALL
+#endif
+#ifndef time00
+ #if defined(WALL)
+ #define time00 ATL_walltime
+ #else
+ #define time00 ATL_cputime
+ #endif
+#endif
+#define Mabs(x) ( (x) >= 0 ? (x) : -(x) )
+#define Mmax(x, y) ( (x) > (y) ? (x) : (y) )
+#define Mmin(x, y) ( (x) > (y) ? (y) : (x) )
+#define Mlowcase(C) ( ((C) > 64 && (C) < 91) ? (C) | 32 : (C) )
+#define Mupcase(C) ( ((C) > 96 && (C) < 123) ? (C) & 0xDF : (C) )
+/*
+ * packed indexing functions (upper & lower)
+ */
+
+#define Mjoin(pre, nam) my_join(pre, nam)
+#define my_join(pre, nam) pre ## nam
+#define Mstr2(m) # m
+#define Mstr(m) Mstr2(m)
+
+#define ATL_assert(n_) \
+{ \
+ if (!(n_)) \
+ { \
+ ATL_xerbla(0, __FILE__, "assertion %s failed, line %d of file %s\n", \
+ Mstr(n_), __LINE__, __FILE__); \
+ } \
+}
+
+/*
+ * Define some C99 features that we use when we know the compiler supports them
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999)
+ #define INLINE inline
+ #define RESTRICT restrict
+#else
+ #define INLINE
+ #define RESTRICT
+#endif
+
+#if defined(SREAL)
+ #define EPS 5.0e-7
+ #define TYPE float
+ #define PRE s
+ #define UPR s
+ #define PREU S
+ #define PATL ATL_s
+ #define PATU ATLU_s
+ #define UATL ATLU_s
+ #define CBLA cblas_s
+ #define PATLU ATL_s
+ #define ATL_rone 1.0f
+ #define ATL_rnone -1.0f
+ #define ATL_rzero 0.0f
+ #define ATL_typify(m_) Mjoin(m_,f)
+ #include "atlas_ssysinfo.h"
+#elif defined(DREAL)
+ #define EPS 1.0e-15
+ #define TYPE double
+ #define PRE d
+ #define UPR d
+ #define PREU D
+ #define PATL ATL_d
+ #define PATU ATLU_d
+ #define UATL ATLU_d
+ #define CBLA cblas_d
+ #define PATLU ATL_d
+ #define ATL_rone 1.0
+ #define ATL_rnone -1.0
+ #define ATL_rzero 0.0
+ #define ATL_typify(m_) m_
+ #include "atlas_dsysinfo.h"
+#elif defined (QREAL)
+ #define EPS 1.9259299443872358530559779425849273E-34L
+ #define TYPE long double
+ #define PRE q
+ #define UPR q
+ #define PREU Q
+ #define PATL ATL_q
+ #define PATU ATLU_q
+ #define CBLA cblas_q
+#elif defined(SCPLX)
+ #define EPS 5.0e-7
+ #define TYPE float
+ #define PRE c
+ #define UPR s
+ #define PREU C
+ #define PATL ATL_c
+ #define PATLU ATL_s
+ #define PATU ATLU_c
+ #define UATL ATLU_s
+ #define ATL_rone 1.0f
+ #define ATL_rnone -1.0f
+ #define ATL_rzero 0.0f
+ #define ATL_typify(m_) Mjoin(m_,f)
+ #define CBLA cblas_c
+ #include "atlas_csysinfo.h"
+#elif defined(DCPLX)
+ #define TYPE double
+ #define PRE z
+ #define UPR d
+ #define PREU Z
+ #define PATL ATL_z
+ #define PATLU ATL_d
+ #define PATU ATLU_z
+ #define UATL ATLU_d
+ #define EPS 1.0e-15
+ #define ATL_rone 1.0
+ #define ATL_rnone -1.0
+ #define ATL_rzero 0.0
+ #define ATL_typify(m_) m_
+ #define CBLA cblas_z
+ #include "atlas_zsysinfo.h"
+#endif
+
+#if defined (SREAL) || defined (DREAL) || defined (SCPLX) || defined (DCPLX)
+ #define ATL_sizeof Mjoin(PATL,size)
+ #define ATL_MulBySize Mjoin(PATL,MulBySize)
+ #define ATL_DivBySize Mjoin(PATL,DivBySize)
+#endif
+
+#if ( defined(SREAL) || defined(DREAL) || defined(QREAL) )
+ #define TREAL
+ #define SHIFT
+ #define SCALAR TYPE
+ #define SADD &
+ #define SVAL
+ #define SVVAL *
+ #define SCALAR_IS_ONE(M_scalar) ((M_scalar) == ATL_rone)
+ #define SCALAR_IS_NONE(M_scalar) ((M_scalar) == ATL_rnone)
+ #define SCALAR_IS_ZERO(M_scalar) ((M_scalar) == ATL_rzero)
+#elif defined(SCPLX) || defined(DCPLX)
+ #define TCPLX
+/*
+ * c = b*c + v;
+ */
+ #define CMULT2(v, a, b, tmp) \
+ { \
+ tmp = *(a) * *(b) - *(a+1) * *(b+1); \
+ *(b+1) = *(a) * *(b+1) + *(a+1) * *(b) + *(v+1); \
+ *(b) = tmp + *v; \
+ }
+ #define SHIFT << 1
+ #define SCALAR TYPE *
+ #define SADD
+ #define SVAL *
+ #define SVVAL
+ #define SCALAR_IS_ONE(M_scalar) \
+ ( (*(M_scalar) == ATL_rone) && ((M_scalar)[1] == ATL_rzero) )
+ #define SCALAR_IS_NONE(M_scalar) \
+ ( (*(M_scalar) == ATL_rnone) && ((M_scalar)[1] == ATL_rzero) )
+ #define SCALAR_IS_ZERO(M_scalar) \
+ ( (*(M_scalar) == ATL_rzero) && ((M_scalar)[1] == ATL_rzero) )
+#endif
+
+#if defined(ALPHA1)
+ #define ATL_MulByALPHA(x_) (x_)
+ #define NM _a1
+#elif defined (ALPHA0)
+ #define ATL_MulByALPHA(x_) ATL_rzero
+ #define NM _a0
+#elif defined (ALPHAN1)
+ #define ATL_MulByALPHA(x_) (-(x_))
+ #define NM _an1
+#elif defined (ALPHAXI0)
+ #define ATL_MulByALPHA(x_) (ralpha*(x_))
+ #define NM _aXi0
+#elif defined (ALPHA1C)
+ #define NM _a1c
+#elif defined (ALPHAN1C)
+ #define NM _an1c
+#elif defined (ALPHAXI0C)
+ #define NM _aXi0c
+#elif defined (ALPHAXC)
+ #define NM _aXc
+#elif defined (ALPHAX)
+ #define ATL_MulByALPHA(x_) (alpha*(x_))
+ #define NM _aX
+#endif
+
+#if defined(BETA1)
+ #define ATL_MulByBETA(x_) (x_)
+ #define MSTAT A[i] += v[i]
+ #define BNM _b1
+#elif defined(BETA1C)
+ #define BNM _b1c
+#elif defined(BETAN1)
+ #define ATL_MulByBETA(x_) (-(x_))
+ #define MSTAT A[i] = v[i] - A[i]
+ #define BNM _bn1
+#elif defined(BETAN1C)
+ #define BNM _bn1c
+#elif defined(BETA0)
+ #define ATL_MulByBETA(x_) ATL_rzero
+ #define MSTAT A[i] = v[i]
+ #define BNM _b0
+#elif defined (BETAXI0)
+ #define BNM _bXi0
+ #define ATL_MulByBETA(x_) (rbeta*(x_))
+#elif defined (BETAXI0C)
+ #define BNM _bXi0c
+#elif defined (BETAX)
+ #define ATL_MulByBETA(x_) (beta*(x_))
+ #define MSTAT A[i] = beta*A[i] + v[i]
+ #define BNM _bX
+#elif defined (BETAXC)
+ #define BNM _bXc
+#endif
+
+/* any alignment below this forces data copy in gemm */
+#ifndef ATL_MinMMAlign
+ #define ATL_MinMMAlign 16
+#endif
+#if (ATL_MinMMAlign == 1 || ATL_MinMMAlign == 0)
+ #define ATL_DataIsMinAligned(ptr) 1
+#elif (ATL_MinMMAlign == 2)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>1)<<1 == (size_t) (ptr) )
+#elif (ATL_MinMMAlign == 4)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>2)<<2 == (size_t) (ptr) )
+#elif (ATL_MinMMAlign == 8)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>3)<<3 == (size_t) (ptr) )
+#elif (ATL_MinMMAlign == 16)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>4)<<4 == (size_t) (ptr) )
+#elif (ATL_MinMMAlign == 32)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>5)<<5 == (size_t) (ptr) )
+#elif (ATL_MinMMAlign == 64)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>6)<<6 == (size_t) (ptr) )
+#elif (ATL_MinMMAlign == 128)
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))>>7)<<7 == (size_t) (ptr) )
+#else
+ #define ATL_DataIsMinAligned(ptr) \
+ ( (((size_t) (ptr))/ATL_MinMMAlign)*ATL_MinMMAlign == (size_t) (ptr) )
+#endif
+
+#define ATL_Cachelen 32
+#if (ATL_Cachelen == 4)
+ #define ATL_MulByCachelen(N_) ( (N_) << 2 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 2 )
+#elif (ATL_Cachelen == 8)
+ #define ATL_MulByCachelen(N_) ( (N_) << 3 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 3 )
+#elif (ATL_Cachelen == 16)
+ #define ATL_MulByCachelen(N_) ( (N_) << 4 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 4 )
+#elif (ATL_Cachelen == 32)
+ #define ATL_MulByCachelen(N_) ( (N_) << 5 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 5 )
+#elif (ATL_Cachelen == 64)
+ #define ATL_MulByCachelen(N_) ( (N_) << 6 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 6 )
+#elif (ATL_Cachelen == 128)
+ #define ATL_MulByCachelen(N_) ( (N_) << 7 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 7 )
+#elif (ATL_Cachelen == 256)
+ #define ATL_MulByCachelen(N_) ( (N_) << 8 )
+ #define ATL_DivByCachelen(N_) ( (N_) >> 8 )
+#else
+ #define ATL_MulByCachelen(N_) ( (N_) * ATL_Cachelen )
+ #define ATL_DivByCachelen(N_) ( (N_) / ATL_Cachelen )
+#endif
+
+#if (ATL_Cachelen < ATL_MinMMAlign)
+ Force a compilation error if our required alignment is at least the
+ minimum!!@^
+#endif
+
+#define ATL_AlignPtr(vp) \
+ (void*) (ATL_Cachelen + ATL_MulByCachelen(ATL_DivByCachelen((size_t) (vp))))
+
+#define ATL_FindPtrAdjust(vp, iadj_) \
+{ \
+ (iadj_) = ((size_t)(vp))-ATL_MulByCachelen(ATL_DivByCachelen((size_t)(vp)));\
+ if (iadj_) \
+ { \
+ if ( (iadj_) == ATL_MulBySize(ATL_DivBySize(iadj_)) ) \
+ (iadj_) = ATL_DivBySize(iadj_); \
+ else (iadj_) = 0; \
+ }\
+}
+#define ATL_FindMatAdjust(vp_, lda_, iadj_) \
+{ \
+ if (ATL_MulByCachelen(ATL_DivByCachelen(ATL_MulBySize(lda_))) \
+ == ATL_MulBySize(lda_)) \
+ { \
+ ATL_FindPtrAdjust(vp_, iadj_); \
+ } \
+ else (iadj_) = 0; \
+}
+
+#define ATL_sqrtLL(x, res) \
+ asm ("fsqrt" : "=t" (res) : "0" (x));
+
+/*
+ * Find N necessary for alignment. Written as function for optimization,
+ * declared static to encourage inlining
+ */
+static int ATL_AlignOffset
+(const int N, /* max return value */
+ const void *vp, /* pointer to be aligned */
+ const int inc, /* size of each elt, in bytes */
+ const int align) /* required alignment, in bytes */
+{
+ const int p = align/inc;
+ const size_t k=(size_t)vp, j=k/inc;
+ int iret;
+ if (k == (j)*inc && p*inc == align)
+ {
+ iret = ((j+p-1) / p)*p - j;
+ if (iret <= N) return(iret);
+ }
+ return(N);
+}
+
+/*
+ * Gcc links in crap that MSVC++ and DVF can't handle if you use stdout
+ * or stderr, so use this beautiful kludge to avoid this problem -- RCW
+ */
+#ifdef GCCWIN
+
+#include <stdarg.h>
+static int WINFPRINTF(FILE *fpout, char *form, ...)
+{
+ int ierr=0;
+ va_list argptr;
+
+ va_start(argptr, form);
+ if (fpout == NULL) ierr = vprintf(form, argptr);
+ else ierr = vfprintf(fpout, form, argptr);
+ va_end(argptr);
+
+ return(ierr);
+}
+
+#ifdef stdout
+ #undef stdout
+#endif
+#ifdef stderr
+ #undef stderr
+#endif
+#ifdef assert
+ #undef assert
+#endif
+
+#define stdout NULL
+#define stderr NULL
+#define fprintf WINFPRINTF
+#define assert WINASSERT
+#define WINASSERT(n_) \
+{ \
+ if (!(n_)) \
+ { \
+ printf("assertion %s failed, line %d of file %s\n", \
+ Mstr(n_), __LINE__, __FILE__); \
+ exit(1); \
+ } \
+}
+
+#endif
+
+#include "atlas_aux.h"
+
+#endif