/* * Automatically Tuned Linear Algebra Software v3.8.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the ATLAS group or the names of its contributers may * not be used to endorse or promote products derived from this * software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include "atlas_enum.h" #ifndef ATLAS_MISC_H #define ATLAS_MISC_H #include "atlas_type.h" #ifdef ATL_PROFILE extern int ATL_ProfGemmCameFrom; #endif /* * Some useful macro functions */ #if (defined(PentiumCPS) || defined(ATL_USEPTHREADS)) && !defined(WALL) #define WALL #endif #ifndef time00 #if defined(WALL) #define time00 ATL_walltime #else #define time00 ATL_cputime #endif #endif #define Mabs(x) ( (x) >= 0 ? (x) : -(x) ) #define Mmax(x, y) ( (x) > (y) ? (x) : (y) ) #define Mmin(x, y) ( (x) > (y) ? (y) : (x) ) #define Mlowcase(C) ( ((C) > 64 && (C) < 91) ? (C) | 32 : (C) ) #define Mupcase(C) ( ((C) > 96 && (C) < 123) ? (C) & 0xDF : (C) ) /* * packed indexing functions (upper & lower) */ #define Mjoin(pre, nam) my_join(pre, nam) #define my_join(pre, nam) pre ## nam #define Mstr2(m) # m #define Mstr(m) Mstr2(m) #define ATL_assert(n_) \ { \ if (!(n_)) \ { \ ATL_xerbla(0, __FILE__, "assertion %s failed, line %d of file %s\n", \ Mstr(n_), __LINE__, __FILE__); \ } \ } /* * Define some C99 features that we use when we know the compiler supports them */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) #define INLINE inline #define RESTRICT restrict #else #define INLINE #define RESTRICT #endif #if defined(SREAL) #define EPS 5.0e-7 #define TYPE float #define PRE s #define UPR s #define PREU S #define PATL ATL_s #define PATU ATLU_s #define UATL ATLU_s #define CBLA cblas_s #define PATLU ATL_s #define ATL_rone 1.0f #define ATL_rnone -1.0f #define ATL_rzero 0.0f #define ATL_typify(m_) Mjoin(m_,f) #include "atlas_ssysinfo.h" #elif defined(DREAL) #define EPS 1.0e-15 #define TYPE double #define PRE d #define UPR d #define PREU D #define PATL ATL_d #define PATU ATLU_d #define UATL ATLU_d #define CBLA cblas_d #define PATLU ATL_d #define ATL_rone 1.0 #define ATL_rnone -1.0 #define ATL_rzero 0.0 #define ATL_typify(m_) m_ #include "atlas_dsysinfo.h" #elif defined (QREAL) #define EPS 1.9259299443872358530559779425849273E-34L #define TYPE long double #define PRE q #define UPR q #define PREU Q #define PATL ATL_q #define PATU ATLU_q #define CBLA cblas_q #elif defined(SCPLX) #define EPS 5.0e-7 #define TYPE float #define PRE c #define UPR s #define PREU C #define PATL ATL_c #define PATLU ATL_s #define PATU ATLU_c #define UATL ATLU_s #define ATL_rone 1.0f #define ATL_rnone -1.0f #define ATL_rzero 0.0f #define ATL_typify(m_) Mjoin(m_,f) #define CBLA cblas_c #include "atlas_csysinfo.h" #elif defined(DCPLX) #define TYPE double #define PRE z #define UPR d #define PREU Z #define PATL ATL_z #define PATLU ATL_d #define PATU ATLU_z #define UATL ATLU_d #define EPS 1.0e-15 #define ATL_rone 1.0 #define ATL_rnone -1.0 #define ATL_rzero 0.0 #define ATL_typify(m_) m_ #define CBLA cblas_z #include "atlas_zsysinfo.h" #endif #if defined (SREAL) || defined (DREAL) || defined (SCPLX) || defined (DCPLX) #define ATL_sizeof Mjoin(PATL,size) #define ATL_MulBySize Mjoin(PATL,MulBySize) #define ATL_DivBySize Mjoin(PATL,DivBySize) #endif #if ( defined(SREAL) || defined(DREAL) || defined(QREAL) ) #define TREAL #define SHIFT #define SCALAR TYPE #define SADD & #define SVAL #define SVVAL * #define SCALAR_IS_ONE(M_scalar) ((M_scalar) == ATL_rone) #define SCALAR_IS_NONE(M_scalar) ((M_scalar) == ATL_rnone) #define SCALAR_IS_ZERO(M_scalar) ((M_scalar) == ATL_rzero) #elif defined(SCPLX) || defined(DCPLX) #define TCPLX /* * c = b*c + v; */ #define CMULT2(v, a, b, tmp) \ { \ tmp = *(a) * *(b) - *(a+1) * *(b+1); \ *(b+1) = *(a) * *(b+1) + *(a+1) * *(b) + *(v+1); \ *(b) = tmp + *v; \ } #define SHIFT << 1 #define SCALAR TYPE * #define SADD #define SVAL * #define SVVAL #define SCALAR_IS_ONE(M_scalar) \ ( (*(M_scalar) == ATL_rone) && ((M_scalar)[1] == ATL_rzero) ) #define SCALAR_IS_NONE(M_scalar) \ ( (*(M_scalar) == ATL_rnone) && ((M_scalar)[1] == ATL_rzero) ) #define SCALAR_IS_ZERO(M_scalar) \ ( (*(M_scalar) == ATL_rzero) && ((M_scalar)[1] == ATL_rzero) ) #endif #if defined(ALPHA1) #define ATL_MulByALPHA(x_) (x_) #define NM _a1 #elif defined (ALPHA0) #define ATL_MulByALPHA(x_) ATL_rzero #define NM _a0 #elif defined (ALPHAN1) #define ATL_MulByALPHA(x_) (-(x_)) #define NM _an1 #elif defined (ALPHAXI0) #define ATL_MulByALPHA(x_) (ralpha*(x_)) #define NM _aXi0 #elif defined (ALPHA1C) #define NM _a1c #elif defined (ALPHAN1C) #define NM _an1c #elif defined (ALPHAXI0C) #define NM _aXi0c #elif defined (ALPHAXC) #define NM _aXc #elif defined (ALPHAX) #define ATL_MulByALPHA(x_) (alpha*(x_)) #define NM _aX #endif #if defined(BETA1) #define ATL_MulByBETA(x_) (x_) #define MSTAT A[i] += v[i] #define BNM _b1 #elif defined(BETA1C) #define BNM _b1c #elif defined(BETAN1) #define ATL_MulByBETA(x_) (-(x_)) #define MSTAT A[i] = v[i] - A[i] #define BNM _bn1 #elif defined(BETAN1C) #define BNM _bn1c #elif defined(BETA0) #define ATL_MulByBETA(x_) ATL_rzero #define MSTAT A[i] = v[i] #define BNM _b0 #elif defined (BETAXI0) #define BNM _bXi0 #define ATL_MulByBETA(x_) (rbeta*(x_)) #elif defined (BETAXI0C) #define BNM _bXi0c #elif defined (BETAX) #define ATL_MulByBETA(x_) (beta*(x_)) #define MSTAT A[i] = beta*A[i] + v[i] #define BNM _bX #elif defined (BETAXC) #define BNM _bXc #endif /* any alignment below this forces data copy in gemm */ #ifndef ATL_MinMMAlign #define ATL_MinMMAlign 16 #endif #if (ATL_MinMMAlign == 1 || ATL_MinMMAlign == 0) #define ATL_DataIsMinAligned(ptr) 1 #elif (ATL_MinMMAlign == 2) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>1)<<1 == (size_t) (ptr) ) #elif (ATL_MinMMAlign == 4) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>2)<<2 == (size_t) (ptr) ) #elif (ATL_MinMMAlign == 8) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>3)<<3 == (size_t) (ptr) ) #elif (ATL_MinMMAlign == 16) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>4)<<4 == (size_t) (ptr) ) #elif (ATL_MinMMAlign == 32) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>5)<<5 == (size_t) (ptr) ) #elif (ATL_MinMMAlign == 64) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>6)<<6 == (size_t) (ptr) ) #elif (ATL_MinMMAlign == 128) #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))>>7)<<7 == (size_t) (ptr) ) #else #define ATL_DataIsMinAligned(ptr) \ ( (((size_t) (ptr))/ATL_MinMMAlign)*ATL_MinMMAlign == (size_t) (ptr) ) #endif #define ATL_Cachelen 32 #if (ATL_Cachelen == 4) #define ATL_MulByCachelen(N_) ( (N_) << 2 ) #define ATL_DivByCachelen(N_) ( (N_) >> 2 ) #elif (ATL_Cachelen == 8) #define ATL_MulByCachelen(N_) ( (N_) << 3 ) #define ATL_DivByCachelen(N_) ( (N_) >> 3 ) #elif (ATL_Cachelen == 16) #define ATL_MulByCachelen(N_) ( (N_) << 4 ) #define ATL_DivByCachelen(N_) ( (N_) >> 4 ) #elif (ATL_Cachelen == 32) #define ATL_MulByCachelen(N_) ( (N_) << 5 ) #define ATL_DivByCachelen(N_) ( (N_) >> 5 ) #elif (ATL_Cachelen == 64) #define ATL_MulByCachelen(N_) ( (N_) << 6 ) #define ATL_DivByCachelen(N_) ( (N_) >> 6 ) #elif (ATL_Cachelen == 128) #define ATL_MulByCachelen(N_) ( (N_) << 7 ) #define ATL_DivByCachelen(N_) ( (N_) >> 7 ) #elif (ATL_Cachelen == 256) #define ATL_MulByCachelen(N_) ( (N_) << 8 ) #define ATL_DivByCachelen(N_) ( (N_) >> 8 ) #else #define ATL_MulByCachelen(N_) ( (N_) * ATL_Cachelen ) #define ATL_DivByCachelen(N_) ( (N_) / ATL_Cachelen ) #endif #if (ATL_Cachelen < ATL_MinMMAlign) Force a compilation error if our required alignment is at least the minimum!!@^ #endif #define ATL_AlignPtr(vp) \ (void*) (ATL_Cachelen + ATL_MulByCachelen(ATL_DivByCachelen((size_t) (vp)))) #define ATL_FindPtrAdjust(vp, iadj_) \ { \ (iadj_) = ((size_t)(vp))-ATL_MulByCachelen(ATL_DivByCachelen((size_t)(vp)));\ if (iadj_) \ { \ if ( (iadj_) == ATL_MulBySize(ATL_DivBySize(iadj_)) ) \ (iadj_) = ATL_DivBySize(iadj_); \ else (iadj_) = 0; \ }\ } #define ATL_FindMatAdjust(vp_, lda_, iadj_) \ { \ if (ATL_MulByCachelen(ATL_DivByCachelen(ATL_MulBySize(lda_))) \ == ATL_MulBySize(lda_)) \ { \ ATL_FindPtrAdjust(vp_, iadj_); \ } \ else (iadj_) = 0; \ } #define ATL_sqrtLL(x, res) \ asm ("fsqrt" : "=t" (res) : "0" (x)); /* * Find N necessary for alignment. Written as function for optimization, * declared static to encourage inlining */ static int ATL_AlignOffset (const int N, /* max return value */ const void *vp, /* pointer to be aligned */ const int inc, /* size of each elt, in bytes */ const int align) /* required alignment, in bytes */ { const int p = align/inc; const size_t k=(size_t)vp, j=k/inc; int iret; if (k == (j)*inc && p*inc == align) { iret = ((j+p-1) / p)*p - j; if (iret <= N) return(iret); } return(N); } /* * Gcc links in crap that MSVC++ and DVF can't handle if you use stdout * or stderr, so use this beautiful kludge to avoid this problem -- RCW */ #ifdef GCCWIN #include static int WINFPRINTF(FILE *fpout, char *form, ...) { int ierr=0; va_list argptr; va_start(argptr, form); if (fpout == NULL) ierr = vprintf(form, argptr); else ierr = vfprintf(fpout, form, argptr); va_end(argptr); return(ierr); } #ifdef stdout #undef stdout #endif #ifdef stderr #undef stderr #endif #ifdef assert #undef assert #endif #define stdout NULL #define stderr NULL #define fprintf WINFPRINTF #define assert WINASSERT #define WINASSERT(n_) \ { \ if (!(n_)) \ { \ printf("assertion %s failed, line %d of file %s\n", \ Mstr(n_), __LINE__, __FILE__); \ exit(1); \ } \ } #endif #include "atlas_aux.h" #endif