From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/tools/ATLAS/include/atlas_ptmisc.h | 410 ++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 kaldi_io/src/tools/ATLAS/include/atlas_ptmisc.h (limited to 'kaldi_io/src/tools/ATLAS/include/atlas_ptmisc.h') diff --git a/kaldi_io/src/tools/ATLAS/include/atlas_ptmisc.h b/kaldi_io/src/tools/ATLAS/include/atlas_ptmisc.h new file mode 100644 index 0000000..4c3db23 --- /dev/null +++ b/kaldi_io/src/tools/ATLAS/include/atlas_ptmisc.h @@ -0,0 +1,410 @@ +/* --------------------------------------------------------------------- + * + * -- Automatically Tuned Linear Algebra Software (ATLAS) + * (C) Copyright 2000 All Rights Reserved + * + * -- ATLAS routine -- Version 3.2 -- December 25, 2000 + * + * Author : Antoine P. Petitet + * Originally developed at the University of Tennessee, + * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA. + * + * --------------------------------------------------------------------- + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in + * the documentation and/or other materials provided with the distri- + * bution. + * 3. The name of the University, the ATLAS group, or the names of its + * contributors may not be used to endorse or promote products deri- + * ved from this software without specific written permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO- + * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN- + * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --------------------------------------------------------------------- + */ +#ifndef ATLAS_PTMISC_H +#define ATLAS_PTMISC_H +/* + * ===================================================================== + * Include Files + * ===================================================================== + */ +#include +#include + +#include "atlas_misc.h" +#include "atlas_pthreads.h" +/* + * ===================================================================== + * #define macro constants + * ===================================================================== + * + * ATL_XOVER_MI_DEFAULT is the smallest number of NB-by-NB blocks for + * which threading is enabled, where NB is the value returned by the + * ATLAS function Mjoin( PATL, GetNB ). + */ +#ifdef TREAL +#define ATL_XOVER_MI_DEFAULT 8 /* number of NB x NB blocks */ +#else +#define ATL_XOVER_MI_DEFAULT 4 +#endif + +#define NOSPLIT 0 /* For convenience */ +#define SPLIT_M 1 +#define SPLIT_N 2 +#define SPLIT_K 3 + +/* + * ===================================================================== + * macro functions + * ===================================================================== + */ +#define Mptm( a_, i_, siz_ ) ( ( (char*)(a_) + ( (i_) * (siz_) ) ) ) +#define Mvptm( a_, i_, siz_ ) ( (void *)(Mptm( (a_), (i_), (siz_) ))) +/* + * ===================================================================== + * typedef definitions + * ===================================================================== + * + * Definition of the Binary (recursive) task tree: Each node of the tree + * mainly consist a node number, a reference counter to enforce depen- + * dencies, a argument structure and a function to be applied. + */ +typedef void * PT_DATA_T; +typedef void * PT_FUN_VAL_T; +typedef void * PT_FUN_ARG_T; +typedef PT_FUN_VAL_T (*PT_FUN_T) ( PT_FUN_ARG_T ); + +typedef struct PT_node_T +{ + pthread_t pid; + pthread_mutex_t mutex; + pthread_cond_t cond; + struct PT_node_T * left; + struct PT_node_T * right; + PT_DATA_T data; + PT_FUN_VAL_T * val; + PT_FUN_T fun; + PT_FUN_ARG_T arg; + unsigned int node; + unsigned int count; +} PT_NODE_T; + +typedef PT_NODE_T * PT_TREE_T; +typedef void (*PT_APPLY_FUN_T)( PT_TREE_T ); + +enum DIM_1DSPLIT_E +{ + Atlas1dSplit = 100, + Atlas1dNoSplit = 199 +}; + +enum DIM_TZSPLIT_E +{ + AtlasTzSplitMrow = 200, + AtlasTzSplitKrow = 201, + AtlasTzSplitKcol = 202, + AtlasTzSplitNcol = 203, + AtlasTzNoSplit = 299 +}; + +typedef enum DIM_1DSPLIT_E DIM_1DSPLIT_T; +typedef enum DIM_TZSPLIT_E DIM_TZSPLIT_T; + +/* + * Type definitions for some auxiliaries that have been multi-threaded + * as well. + */ +typedef struct +{ + size_t size; + PT_FUN_T fun; +} PT_MISC_TYPE_T; + +typedef struct +{ + const void * al, * be; + const void * a; + void * c; + int la, lc, m, n; +} PT_GEADD_ARGS_T; + +typedef struct +{ + void * a; + int la, m, n; +} PT_GEZERO_ARGS_T; + +typedef struct +{ + const void * al; + void * a; + int la, m, n; +} PT_GESCAL_ARGS_T; + +typedef struct +{ + enum ATLAS_UPLO up; + const void * al; + void * a; + int k, la, m, n; +} PT_TZSCAL_ARGS_T; + +/* + * ===================================================================== + * Function prototypes + * ===================================================================== + */ +int ATL_sGetNB ( void ); +int ATL_dGetNB ( void ); +int ATL_cGetNB ( void ); +int ATL_zGetNB ( void ); + +DIM_1DSPLIT_T ATL_1dsplit +( + const unsigned int, + const int, + const int, + unsigned int *, + unsigned int *, + int *, + int *, + double * +); + +DIM_TZSPLIT_T ATL_tzsplit +( + const enum ATLAS_UPLO, + const unsigned int, + const int, + const int, + const int, + const int, + unsigned int *, + unsigned int *, + int *, + int * +); +/* + * Task tree management + */ +PT_TREE_T ATL_init_node +( unsigned int, PT_TREE_T, PT_TREE_T, PT_DATA_T, + PT_FUN_VAL_T *, PT_FUN_T, PT_FUN_ARG_T ); + +void ATL_traverse_tree ( PT_TREE_T ); +void ATL_apply_tree ( PT_TREE_T, PT_APPLY_FUN_T ); +void ATL_free_tree ( PT_TREE_T ); +void ATL_free_node ( PT_TREE_T ); +void ATL_print_node_id ( PT_TREE_T ); + +void ATL_thread_init ( pthread_attr_t * ); +void ATL_thread_exit ( pthread_attr_t * ); +void ATL_wait_tree ( PT_TREE_T ); +void ATL_signal_tree ( PT_TREE_T ); +void ATL_thread_tree ( PT_TREE_T, pthread_attr_t * ); +void ATL_join_tree ( PT_TREE_T ); + +PT_TREE_T ATL_create_tree +( unsigned int *, const int, const int ); +/* + * Typeless auxiliary functions + */ +PT_TREE_T ATL_Sgeadd +( const PT_MISC_TYPE_T *, const unsigned int, + const unsigned int, pthread_attr_t *, const int, + const int, const int, const void *, const void *, + const int, const void *, void *, const int ); +PT_TREE_T ATL_Sgescal +( const PT_MISC_TYPE_T *, const unsigned int, + const unsigned int, pthread_attr_t *, const int, + const int, const int, const void *, void *, + const int ); +PT_TREE_T ATL_Sgezero +( const PT_MISC_TYPE_T *, const unsigned int, + const unsigned int, pthread_attr_t *, const int, + const int, const int, void *, const int ); +PT_TREE_T ATL_Stzscal +( const PT_MISC_TYPE_T *, const unsigned int, + const unsigned int, pthread_attr_t *, const int, + const enum ATLAS_UPLO, const int, const int, + const int, const void *, void *, const int ); +/* + * Single precision real auxiliary functions + */ +PT_FUN_ARG_T ATL_sptgeadd0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_sptgescal0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_sptgezero0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_spttzscal0 ( PT_FUN_ARG_T ); + +PT_TREE_T ATL_sptgeadd_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, const void *, const int, + const void *, void *, const int ); +PT_TREE_T ATL_sptgescal_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, void *, const int ); +PT_TREE_T ATL_sptgezero_nt +( const unsigned int, pthread_attr_t *, const int, + const int, void *, const int ); +PT_TREE_T ATL_spttrscal_nt +( const unsigned int, pthread_attr_t *, + const enum ATLAS_UPLO, const int, const int, + const void *, void *, const int ); + +void ATL_sptgeadd +( const int, const int, const float, const float *, + const int, const float, float *, const int ); +void ATL_sptgescal +( const int, const int, const float, float *, + const int ); +void ATL_sptgezero +( const int, const int, float *, const int ); +void ATL_spttrscal +( const enum ATLAS_UPLO, const int, const int, + const float, float *, const int ); + +/* + * Double precision real auxiliary functions + */ +PT_FUN_ARG_T ATL_dptgeadd0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_dptgescal0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_dptgezero0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_dpttzscal0 ( PT_FUN_ARG_T ); + +PT_TREE_T ATL_dptgeadd_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, const void *, const int, + const void *, void *, const int ); +PT_TREE_T ATL_dptgescal_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, void *, const int ); +PT_TREE_T ATL_dptgezero_nt +( const unsigned int, pthread_attr_t *, const int, + const int, void *, const int ); +PT_TREE_T ATL_dpttrscal_nt +( const unsigned int, pthread_attr_t *, + const enum ATLAS_UPLO, const int, const int, + const void *, void *, const int ); + +void ATL_dptgeadd +( const int, const int, const double, const double *, + const int, const double, double *, const int ); +void ATL_dptgescal +( const int, const int, const double, double *, + const int ); +void ATL_dptgezero +( const int, const int, double *, const int ); +void ATL_dpttrscal +( const enum ATLAS_UPLO, const int, const int, + const double, double *, const int ); +/* + * Single precision complex auxiliary functions + */ +PT_FUN_ARG_T ATL_cptgeadd0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_cptgescal0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_cptgezero0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_cpthescal0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_cpttzscal0 ( PT_FUN_ARG_T ); + +PT_TREE_T ATL_cptgeadd_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, const void *, const int, + const void *, void *, const int ); +PT_TREE_T ATL_cptgescal_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, void *, const int ); +PT_TREE_T ATL_cptgezero_nt +( const unsigned int, pthread_attr_t *, const int, + const int, void *, const int ); +PT_TREE_T ATL_cpttrscal_nt +( const unsigned int, pthread_attr_t *, + const enum ATLAS_UPLO, const int, const int, + const void *, void *, const int ); +PT_TREE_T ATL_cpthescal_nt +( const unsigned int, pthread_attr_t *, + const enum ATLAS_UPLO, const int, const int, + const void *, void *, const int ); + +void ATL_cptgeadd +( const int, const int, const float *, const float *, + const int, const float *, float *, const int ); +void ATL_cptgezero +( const int, const int, float *, const int ); +void ATL_cptgescal +( const int, const int, const float *, float *, + const int ); +void ATL_cpttrscal +( const enum ATLAS_UPLO, const int, const int, + const float *, float *, const int ); +void ATL_cpthescal +( const enum ATLAS_UPLO, const int, const int, + const float, float *, const int ); +/* + * Double precision complex auxiliary functions + */ +PT_FUN_ARG_T ATL_zptgeadd0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_zptgescal0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_zptgezero0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_zpthescal0 ( PT_FUN_ARG_T ); +PT_FUN_ARG_T ATL_zpttzscal0 ( PT_FUN_ARG_T ); + +PT_TREE_T ATL_zptgeadd_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, const void *, const int, + const void *, void *, const int ); +PT_TREE_T ATL_zptgescal_nt +( const unsigned int, pthread_attr_t *, const int, + const int, const void *, void *, const int ); +PT_TREE_T ATL_zptgezero_nt +( const unsigned int, pthread_attr_t *, const int, + const int, void *, const int ); +PT_TREE_T ATL_zpttrscal_nt +( const unsigned int, pthread_attr_t *, + const enum ATLAS_UPLO, const int, const int, + const void *, void *, const int ); +PT_TREE_T ATL_zpthescal_nt +( const unsigned int, pthread_attr_t *, + const enum ATLAS_UPLO, const int, const int, + const void *, void *, const int ); + +void ATL_zptgeadd +( const int, const int, const double *, const double *, + const int, const double *, double *, const int ); +void ATL_zptgezero +( const int, const int, double *, const int ); +void ATL_zptgescal +( const int, const int, const double *, double *, + const int ); +void ATL_zpttrscal +( const enum ATLAS_UPLO, const int, const int, + const double *, double *, const int ); +void ATL_zpthescal +( const enum ATLAS_UPLO, const int, const int, + const double, double *, const int ); + +#endif +/* + * End of atlas_ptmisc.h + */ -- cgit v1.2.3