summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/ATLAS/include/atlas_prefetch.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/ATLAS/include/atlas_prefetch.h')
-rw-r--r--kaldi_io/src/tools/ATLAS/include/atlas_prefetch.h197
1 files changed, 0 insertions, 197 deletions
diff --git a/kaldi_io/src/tools/ATLAS/include/atlas_prefetch.h b/kaldi_io/src/tools/ATLAS/include/atlas_prefetch.h
deleted file mode 100644
index 83ee2df..0000000
--- a/kaldi_io/src/tools/ATLAS/include/atlas_prefetch.h
+++ /dev/null
@@ -1,197 +0,0 @@
-#ifndef ATLAS_PREFETCH_H
-#define ATLAS_PREFETCH_H
-/*
- * Altivec prefetch model not well utilized by SSE-like prefetch, so have
- * special commands for it.
- */
-#if defined(ATL_AltiVec)
- #include "atlas_altivec.h"
-#endif
-/*
- *
- * ATL_pfl1R(mem) : fetch location mem to L1, with intent to read *only*
- * ATL_pfl1W(mem) : fetch location mem to L1, with intent to read/write
- * ATL_pfl1WO(mem) : fetch location mem to L1, with intent to write ONLY
- */
-
-#if defined(ATL_3DNow)
- #ifdef __GNUC__
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("prefetch %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("prefetchw %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1WO ATL_pfl1W
- #define ATL_GOT_L1PREFETCH
- #ifdef ATL_SSE1
- #define ATL_pfl2R(mem) \
- __asm__ __volatile__ ("prefetcht1 %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl2W(mem) \
- __asm__ __volatile__ ("prefetcht1 %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl2WO ATL_pfl2W
- #define ATL_GOT_L2PREFETCH
- #endif
- #endif
-#elif defined(ATL_SSE1) || defined (ATL_SSE2) /* SSE prefetch is available */
- #ifdef __GNUC__
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("prefetchnta %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("prefetchnta %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1WO ATL_pfl1W
- #define ATL_GOT_L1PREFETCH
-
- #define ATL_pfl2R(mem) \
- __asm__ __volatile__ ("prefetcht1 %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl2W(mem) \
- __asm__ __volatile__ ("prefetcht1 %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl2WO ATL_pfl2W
- #define ATL_GOT_L2PREFETCH
- #endif
-#elif defined(__SUNPRO_C) && defined(__sparc) /* && __SUNPRO_CC > 0x600 */
- #include <sun_prefetch.h>
- #define ATL_pfl1R(mem) sparc_prefetch_read_many((void*)(mem))
- #define ATL_pfl1W(mem) sparc_prefetch_write_many((void*)(mem))
- #define ATL_GOT_L1PREFETCH
- #define ATL_pfl2R(mem) sparc_prefetch_read_many((void*)(mem))
- #define ATL_pfl2W(mem) sparc_prefetch_write_many((void*)(mem))
- #define ATL_GOT_L2PREFETCH
-#elif defined(ATL_ARCH_21264)
- #ifdef __GNUC__
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("ldt $f31, %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("lds $f31, %0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1WO(mem) \
- __asm__ __volatile__ ("wh64 %0" : : "m" (*((char *)(mem))))
- #define ATL_GOT_L1PREFETCH
- #elif defined(__DECC)
- #include "c_asm.h"
- #define ATL_pfl1R(mem) asm ("ldt %f31,(%a0) ;", mem)
- #define ATL_pfl1W(mem) asm ("lds %f31,(%a0) ;", mem)
- #define ATL_pfl1WO(mem) asm ("wh64 (%a0) ;", mem)
- #define ATL_GOT_L1PREFETCH
- #endif
-/*
- * Note: SunUS5/10 seems to get no benefit from prefetch, so don't enable
- */
-#elif defined(ATL_ARCH_USIV) || defined(ATL_ARCH_SunUSIII) || \
- defined(ATL_ARCH_SunUSII) || defined(ATL_ARCH_SunUSI)
- #ifdef __GNUC__
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("prefetch %0,0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("prefetch %0,2" : : "m" (*((char *)(mem))))
- #define ATL_GOT_L1PREFETCH
- #define ATL_pfl2R(mem) \
- __asm__ __volatile__ ("prefetch %0,3" : : "m" (*((char *)(mem))))
- #define ATL_pfl2W(mem) \
- __asm__ __volatile__ ("prefetch %0,2" : : "m" (*((char *)(mem))))
- #define ATL_GOT_L2PREFETCH
- #endif
-/*
- * Gives gigantic slowdown on POWER4, so don't enable there, just use gcc
- * builtin
- */
-#elif defined(ATL_ARCH_PPCG5) || defined(ATL_ARCH_PPCG5) || \
- defined(ATL_ARCH_POWER5)
- #if defined(__GNUC__) || defined(__IBM_GCC_ASM)
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("dcbt 0, %0, 0" : : "r" ((mem)))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("dcbtst 0, %0" : : "r" ((mem)))
- #define ATL_pfST(mem) \
- __asm__ __volatile__ ("dcbt 0, %0, 1" : : "r" ((mem)))
- #define ATL_pfl1STi(mem, str) \
- __asm__ __volatile__ ("rlwinm %0, %0, 0, 0, 24\n\t" \
- "ori %0, %0, 96+%2\n\t" \
- "dcbt 0, %0, 8" \
- : "=r" (mem) \
- : "0" (mem), "i" (str))
-
- #define ATL_GOT_L1PREFETCH
- #define ATL_L1LS 128
- #endif
-#elif defined(ATL_ARCH_IA64Itan) || defined(ATL_ARCH_IA64Itan2)
-/*
- * Have to use nt2, 'cause fpu ignored L1.
- * NOTE: just let icc to prefetch, keep inst here for reference
- */
- #if defined(__ECC) && 0
- #include "ia64intrin.h"
- #define ATL_pfl1R(mem) __lfetch(2, (mem))
- #define ATL_pfl1W(mem) __lfetch_excl(2, (mem))
- #define ATL_GOT_L1PREFETCH
- #elif defined(__GNUC__) && !defined(__ECC)
- #define ATL_pfl1R(mem) \
- __asm__ (" lfetch.nt2 [%0]": : "r"((void *)(mem)))
- #define ATL_pfl1W(mem) \
- __asm__ (" lfetch.excl [%0]": : "r"((void *)(mem)))
- #define ATL_GOT_L1PREFETCH
- #endif
-#elif defined(ATL_ARCH_HPPA20) && defined(__GNUC__)
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("ldw %0, %%r0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("ldd %0, %%r0" : : "m" (*((char *)(mem))))
- #define ATL_GOT_L1PREFETCH
-#elif defined(ATL_AltiVec) && !defined(ATL_pfl1R)
- #ifndef ATL_NoFakePF
- /* 33619968 is ATL_GetCtrl(0, 1, 2), or fetch 1 32-byte block */
- #define ATL_pfl1R(mem) ATL_pfavR(mem, 33619968, 3)
- #define ATL_pfl1W(mem) ATL_pfavW(mem, 33619968, 2)
- #define ATL_GOT_L1PREFETCH
- #endif
-#elif defined(ATL_ARCH_MIPSICE9) && defined(__GNUC__)
- #define ATL_pfl1R(mem) \
- __asm__ __volatile__ ("pref 6,%0" : : "m" (*((char *)(mem))))
- #define ATL_pfl1W(mem) \
- __asm__ __volatile__ ("pref 7,%0" : : "m" (*((char *)(mem))))
- #define ATL_GOT_L1PREFETCH
- #define ATL_L1LS 32
- #define ATL_L2LS 64
-#elif defined(__GNUC__) /* last ditch, use gcc predefined func */
- #define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
- #define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
- #define ATL_GOT_L1PREFETCH
-#endif
-#if defined(ATL_pfl1W) && !defined(ATL_pfl1WO)
- #define ATL_pfl1WO ATL_pfl1W
-#endif
-
-#ifdef ATL_NOL1PREFETCH
- #ifdef ATL_GOT_L1PREFETCH
- #undef ATL_pfl1R
- #undef ATL_pfl1W
- #undef ATL_pfl1WO
- #undef ATL_GOT_L1PREFETCH
- #endif
-#endif
-#ifdef ATL_NOL2PREFETCH
- #ifdef ATL_GOT_L2PREFETCH
- #undef ATL_pfl2R
- #undef ATL_pfl2W
- #undef ATL_pfl2WO
- #undef ATL_GOT_L2PREFETCH
- #endif
-#endif
-#ifndef ATL_GOT_L1PREFETCH /* dummy calls cpp takes out of code */
- #define ATL_pfl1R(mem)
- #define ATL_pfl1W(mem)
- #define ATL_pfl1WO(mem)
-#endif
-#ifndef ATL_GOT_L2PREFETCH /* dummy calls cpp takes out of code */
- #define ATL_pfl2R(mem)
- #define ATL_pfl2W(mem)
-#endif
-
-/*
- * Define Cache line sizes for L1 and L2
- */
-#ifndef ATL_L1LS
- #define ATL_L1LS 64
-#endif
-#ifndef ATL_L2LS
- #define ATL_L2LS ATL_L1LS
-#endif
-
-#endif