/* --------------------------------------------------------------------- * * -- Automatically Tuned Linear Algebra Software (ATLAS) * (C) Copyright 2000 All Rights Reserved * * -- ATLAS routine -- Version 3.2 -- December 25, 2000 * * Author : Antoine P. Petitet * Originally developed at the University of Tennessee, * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA. * * --------------------------------------------------------------------- * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in * the documentation and/or other materials provided with the distri- * bution. * 3. The name of the University, the ATLAS group, or the names of its * contributors may not be used to endorse or promote products deri- * ved from this software without specific written permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO- * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN- * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * --------------------------------------------------------------------- */ #ifndef ATLAS_PTMISC_H #define ATLAS_PTMISC_H /* * ===================================================================== * Include Files * ===================================================================== */ #include #include #include "atlas_misc.h" #include "atlas_pthreads.h" /* * ===================================================================== * #define macro constants * ===================================================================== * * ATL_XOVER_MI_DEFAULT is the smallest number of NB-by-NB blocks for * which threading is enabled, where NB is the value returned by the * ATLAS function Mjoin( PATL, GetNB ). */ #ifdef TREAL #define ATL_XOVER_MI_DEFAULT 8 /* number of NB x NB blocks */ #else #define ATL_XOVER_MI_DEFAULT 4 #endif #define NOSPLIT 0 /* For convenience */ #define SPLIT_M 1 #define SPLIT_N 2 #define SPLIT_K 3 /* * ===================================================================== * macro functions * ===================================================================== */ #define Mptm( a_, i_, siz_ ) ( ( (char*)(a_) + ( (i_) * (siz_) ) ) ) #define Mvptm( a_, i_, siz_ ) ( (void *)(Mptm( (a_), (i_), (siz_) ))) /* * ===================================================================== * typedef definitions * ===================================================================== * * Definition of the Binary (recursive) task tree: Each node of the tree * mainly consist a node number, a reference counter to enforce depen- * dencies, a argument structure and a function to be applied. */ typedef void * PT_DATA_T; typedef void * PT_FUN_VAL_T; typedef void * PT_FUN_ARG_T; typedef PT_FUN_VAL_T (*PT_FUN_T) ( PT_FUN_ARG_T ); typedef struct PT_node_T { pthread_t pid; pthread_mutex_t mutex; pthread_cond_t cond; struct PT_node_T * left; struct PT_node_T * right; PT_DATA_T data; PT_FUN_VAL_T * val; PT_FUN_T fun; PT_FUN_ARG_T arg; unsigned int node; unsigned int count; } PT_NODE_T; typedef PT_NODE_T * PT_TREE_T; typedef void (*PT_APPLY_FUN_T)( PT_TREE_T ); enum DIM_1DSPLIT_E { Atlas1dSplit = 100, Atlas1dNoSplit = 199 }; enum DIM_TZSPLIT_E { AtlasTzSplitMrow = 200, AtlasTzSplitKrow = 201, AtlasTzSplitKcol = 202, AtlasTzSplitNcol = 203, AtlasTzNoSplit = 299 }; typedef enum DIM_1DSPLIT_E DIM_1DSPLIT_T; typedef enum DIM_TZSPLIT_E DIM_TZSPLIT_T; /* * Type definitions for some auxiliaries that have been multi-threaded * as well. */ typedef struct { size_t size; PT_FUN_T fun; } PT_MISC_TYPE_T; typedef struct { const void * al, * be; const void * a; void * c; int la, lc, m, n; } PT_GEADD_ARGS_T; typedef struct { void * a; int la, m, n; } PT_GEZERO_ARGS_T; typedef struct { const void * al; void * a; int la, m, n; } PT_GESCAL_ARGS_T; typedef struct { enum ATLAS_UPLO up; const void * al; void * a; int k, la, m, n; } PT_TZSCAL_ARGS_T; /* * ===================================================================== * Function prototypes * ===================================================================== */ int ATL_sGetNB ( void ); int ATL_dGetNB ( void ); int ATL_cGetNB ( void ); int ATL_zGetNB ( void ); DIM_1DSPLIT_T ATL_1dsplit ( const unsigned int, const int, const int, unsigned int *, unsigned int *, int *, int *, double * ); DIM_TZSPLIT_T ATL_tzsplit ( const enum ATLAS_UPLO, const unsigned int, const int, const int, const int, const int, unsigned int *, unsigned int *, int *, int * ); /* * Task tree management */ PT_TREE_T ATL_init_node ( unsigned int, PT_TREE_T, PT_TREE_T, PT_DATA_T, PT_FUN_VAL_T *, PT_FUN_T, PT_FUN_ARG_T ); void ATL_traverse_tree ( PT_TREE_T ); void ATL_apply_tree ( PT_TREE_T, PT_APPLY_FUN_T ); void ATL_free_tree ( PT_TREE_T ); void ATL_free_node ( PT_TREE_T ); void ATL_print_node_id ( PT_TREE_T ); void ATL_thread_init ( pthread_attr_t * ); void ATL_thread_exit ( pthread_attr_t * ); void ATL_wait_tree ( PT_TREE_T ); void ATL_signal_tree ( PT_TREE_T ); void ATL_thread_tree ( PT_TREE_T, pthread_attr_t * ); void ATL_join_tree ( PT_TREE_T ); PT_TREE_T ATL_create_tree ( unsigned int *, const int, const int ); /* * Typeless auxiliary functions */ PT_TREE_T ATL_Sgeadd ( const PT_MISC_TYPE_T *, const unsigned int, const unsigned int, pthread_attr_t *, const int, const int, const int, const void *, const void *, const int, const void *, void *, const int ); PT_TREE_T ATL_Sgescal ( const PT_MISC_TYPE_T *, const unsigned int, const unsigned int, pthread_attr_t *, const int, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_Sgezero ( const PT_MISC_TYPE_T *, const unsigned int, const unsigned int, pthread_attr_t *, const int, const int, const int, void *, const int ); PT_TREE_T ATL_Stzscal ( const PT_MISC_TYPE_T *, const unsigned int, const unsigned int, pthread_attr_t *, const int, const enum ATLAS_UPLO, const int, const int, const int, const void *, void *, const int ); /* * Single precision real auxiliary functions */ PT_FUN_ARG_T ATL_sptgeadd0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_sptgescal0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_sptgezero0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_spttzscal0 ( PT_FUN_ARG_T ); PT_TREE_T ATL_sptgeadd_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, const void *, const int, const void *, void *, const int ); PT_TREE_T ATL_sptgescal_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_sptgezero_nt ( const unsigned int, pthread_attr_t *, const int, const int, void *, const int ); PT_TREE_T ATL_spttrscal_nt ( const unsigned int, pthread_attr_t *, const enum ATLAS_UPLO, const int, const int, const void *, void *, const int ); void ATL_sptgeadd ( const int, const int, const float, const float *, const int, const float, float *, const int ); void ATL_sptgescal ( const int, const int, const float, float *, const int ); void ATL_sptgezero ( const int, const int, float *, const int ); void ATL_spttrscal ( const enum ATLAS_UPLO, const int, const int, const float, float *, const int ); /* * Double precision real auxiliary functions */ PT_FUN_ARG_T ATL_dptgeadd0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_dptgescal0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_dptgezero0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_dpttzscal0 ( PT_FUN_ARG_T ); PT_TREE_T ATL_dptgeadd_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, const void *, const int, const void *, void *, const int ); PT_TREE_T ATL_dptgescal_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_dptgezero_nt ( const unsigned int, pthread_attr_t *, const int, const int, void *, const int ); PT_TREE_T ATL_dpttrscal_nt ( const unsigned int, pthread_attr_t *, const enum ATLAS_UPLO, const int, const int, const void *, void *, const int ); void ATL_dptgeadd ( const int, const int, const double, const double *, const int, const double, double *, const int ); void ATL_dptgescal ( const int, const int, const double, double *, const int ); void ATL_dptgezero ( const int, const int, double *, const int ); void ATL_dpttrscal ( const enum ATLAS_UPLO, const int, const int, const double, double *, const int ); /* * Single precision complex auxiliary functions */ PT_FUN_ARG_T ATL_cptgeadd0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_cptgescal0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_cptgezero0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_cpthescal0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_cpttzscal0 ( PT_FUN_ARG_T ); PT_TREE_T ATL_cptgeadd_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, const void *, const int, const void *, void *, const int ); PT_TREE_T ATL_cptgescal_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_cptgezero_nt ( const unsigned int, pthread_attr_t *, const int, const int, void *, const int ); PT_TREE_T ATL_cpttrscal_nt ( const unsigned int, pthread_attr_t *, const enum ATLAS_UPLO, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_cpthescal_nt ( const unsigned int, pthread_attr_t *, const enum ATLAS_UPLO, const int, const int, const void *, void *, const int ); void ATL_cptgeadd ( const int, const int, const float *, const float *, const int, const float *, float *, const int ); void ATL_cptgezero ( const int, const int, float *, const int ); void ATL_cptgescal ( const int, const int, const float *, float *, const int ); void ATL_cpttrscal ( const enum ATLAS_UPLO, const int, const int, const float *, float *, const int ); void ATL_cpthescal ( const enum ATLAS_UPLO, const int, const int, const float, float *, const int ); /* * Double precision complex auxiliary functions */ PT_FUN_ARG_T ATL_zptgeadd0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_zptgescal0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_zptgezero0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_zpthescal0 ( PT_FUN_ARG_T ); PT_FUN_ARG_T ATL_zpttzscal0 ( PT_FUN_ARG_T ); PT_TREE_T ATL_zptgeadd_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, const void *, const int, const void *, void *, const int ); PT_TREE_T ATL_zptgescal_nt ( const unsigned int, pthread_attr_t *, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_zptgezero_nt ( const unsigned int, pthread_attr_t *, const int, const int, void *, const int ); PT_TREE_T ATL_zpttrscal_nt ( const unsigned int, pthread_attr_t *, const enum ATLAS_UPLO, const int, const int, const void *, void *, const int ); PT_TREE_T ATL_zpthescal_nt ( const unsigned int, pthread_attr_t *, const enum ATLAS_UPLO, const int, const int, const void *, void *, const int ); void ATL_zptgeadd ( const int, const int, const double *, const double *, const int, const double *, double *, const int ); void ATL_zptgezero ( const int, const int, double *, const int ); void ATL_zptgescal ( const int, const int, const double *, double *, const int ); void ATL_zpttrscal ( const enum ATLAS_UPLO, const int, const int, const double *, double *, const int ); void ATL_zpthescal ( const enum ATLAS_UPLO, const int, const int, const double, double *, const int ); #endif /* * End of atlas_ptmisc.h */