diff options
author | Determinant <[email protected]> | 2015-08-14 11:51:42 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-08-14 11:51:42 +0800 |
commit | 96a32415ab43377cf1575bd3f4f2980f58028209 (patch) | |
tree | 30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/kaldi/matrix | |
parent | c177a7549bd90670af4b29fa813ddea32cfe0f78 (diff) |
add implementation for kaldi io (by ymz)
Diffstat (limited to 'kaldi_io/src/kaldi/matrix')
20 files changed, 5813 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/matrix/cblas-wrappers.h b/kaldi_io/src/kaldi/matrix/cblas-wrappers.h new file mode 100644 index 0000000..ebec0a3 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/cblas-wrappers.h @@ -0,0 +1,491 @@ +// matrix/cblas-wrappers.h + +// Copyright 2012 Johns Hopkins University (author: Daniel Povey); +// Haihua Xu; Wei Shi + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_ +#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1 + + +#include <limits> +#include "matrix/sp-matrix.h" +#include "matrix/kaldi-vector.h" +#include "matrix/kaldi-matrix.h" +#include "matrix/matrix-functions.h" + +// Do not include this file directly. It is to be included +// by .cc files in this directory. + +namespace kaldi { + + +inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y, + const int incY) { + cblas_scopy(N, X, incX, Y, incY); +} + +inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y, + const int incY) { + cblas_dcopy(N, X, incX, Y, incY); +} + + +inline float cblas_Xasum(const int N, const float *X, const int incX) { + return cblas_sasum(N, X, incX); +} + +inline double cblas_Xasum(const int N, const double *X, const int incX) { + return cblas_dasum(N, X, incX); +} + +inline void cblas_Xrot(const int N, float *X, const int incX, float *Y, + const int incY, const float c, const float s) { + cblas_srot(N, X, incX, Y, incY, c, s); +} +inline void cblas_Xrot(const int N, double *X, const int incX, double *Y, + const int incY, const double c, const double s) { + cblas_drot(N, X, incX, Y, incY, c, s); +} +inline float cblas_Xdot(const int N, const float *const X, + const int incX, const float *const Y, + const int incY) { + return cblas_sdot(N, X, incX, Y, incY); +} +inline double cblas_Xdot(const int N, const double *const X, + const int incX, const double *const Y, + const int incY) { + return cblas_ddot(N, X, incX, Y, incY); +} +inline void cblas_Xaxpy(const int N, const float alpha, const float *X, + const int incX, float *Y, const int incY) { + cblas_saxpy(N, alpha, X, incX, Y, incY); +} +inline void cblas_Xaxpy(const int N, const double alpha, const double *X, + const int incX, double *Y, const int incY) { + cblas_daxpy(N, alpha, X, incX, Y, incY); +} +inline void cblas_Xscal(const int N, const float alpha, float *data, + const int inc) { + cblas_sscal(N, alpha, data, inc); +} +inline void cblas_Xscal(const int N, const double alpha, double *data, + const int inc) { + cblas_dscal(N, alpha, data, inc); +} +inline void cblas_Xspmv(const float alpha, const int num_rows, const float *Mdata, + const float *v, const int v_inc, + const float beta, float *y, const int y_inc) { + cblas_sspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc); +} +inline void cblas_Xspmv(const double alpha, const int num_rows, const double *Mdata, + const double *v, const int v_inc, + const double beta, double *y, const int y_inc) { + cblas_dspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc); +} +inline void cblas_Xtpmv(MatrixTransposeType trans, const float *Mdata, + const int num_rows, float *y, const int y_inc) { + cblas_stpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} +inline void cblas_Xtpmv(MatrixTransposeType trans, const double *Mdata, + const int num_rows, double *y, const int y_inc) { + cblas_dtpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} + + +inline void cblas_Xtpsv(MatrixTransposeType trans, const float *Mdata, + const int num_rows, float *y, const int y_inc) { + cblas_stpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} +inline void cblas_Xtpsv(MatrixTransposeType trans, const double *Mdata, + const int num_rows, double *y, const int y_inc) { + cblas_dtpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} + +// x = alpha * M * y + beta * x +inline void cblas_Xspmv(MatrixIndexT dim, float alpha, const float *Mdata, + const float *ydata, MatrixIndexT ystride, + float beta, float *xdata, MatrixIndexT xstride) { + cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata, + ydata, ystride, beta, xdata, xstride); +} +inline void cblas_Xspmv(MatrixIndexT dim, double alpha, const double *Mdata, + const double *ydata, MatrixIndexT ystride, + double beta, double *xdata, MatrixIndexT xstride) { + cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata, + ydata, ystride, beta, xdata, xstride); +} + +// Implements A += alpha * (x y' + y x'); A is symmetric matrix. +inline void cblas_Xspr2(MatrixIndexT dim, float alpha, const float *Xdata, + MatrixIndexT incX, const float *Ydata, MatrixIndexT incY, + float *Adata) { + cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata, + incX, Ydata, incY, Adata); +} +inline void cblas_Xspr2(MatrixIndexT dim, double alpha, const double *Xdata, + MatrixIndexT incX, const double *Ydata, MatrixIndexT incY, + double *Adata) { + cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata, + incX, Ydata, incY, Adata); +} + +// Implements A += alpha * (x x'); A is symmetric matrix. +inline void cblas_Xspr(MatrixIndexT dim, float alpha, const float *Xdata, + MatrixIndexT incX, float *Adata) { + cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata); +} +inline void cblas_Xspr(MatrixIndexT dim, double alpha, const double *Xdata, + MatrixIndexT incX, double *Adata) { + cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata); +} + +// sgemv,dgemv: y = alpha M x + beta y. +inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows, + MatrixIndexT num_cols, float alpha, const float *Mdata, + MatrixIndexT stride, const float *xdata, + MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) { + cblas_sgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows, + num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY); +} +inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows, + MatrixIndexT num_cols, double alpha, const double *Mdata, + MatrixIndexT stride, const double *xdata, + MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) { + cblas_dgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows, + num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY); +} + +// sgbmv, dgmmv: y = alpha M x + + beta * y. +inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows, + MatrixIndexT num_cols, MatrixIndexT num_below, + MatrixIndexT num_above, float alpha, const float *Mdata, + MatrixIndexT stride, const float *xdata, + MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) { + cblas_sgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows, + num_cols, num_below, num_above, alpha, Mdata, stride, xdata, + incX, beta, ydata, incY); +} +inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows, + MatrixIndexT num_cols, MatrixIndexT num_below, + MatrixIndexT num_above, double alpha, const double *Mdata, + MatrixIndexT stride, const double *xdata, + MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) { + cblas_dgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows, + num_cols, num_below, num_above, alpha, Mdata, stride, xdata, + incX, beta, ydata, incY); +} + + +template<typename Real> +inline void Xgemv_sparsevec(MatrixTransposeType trans, MatrixIndexT num_rows, + MatrixIndexT num_cols, Real alpha, const Real *Mdata, + MatrixIndexT stride, const Real *xdata, + MatrixIndexT incX, Real beta, Real *ydata, + MatrixIndexT incY) { + if (trans == kNoTrans) { + if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY); + for (MatrixIndexT i = 0; i < num_cols; i++) { + Real x_i = xdata[i * incX]; + if (x_i == 0.0) continue; + // Add to ydata, the i'th column of M, times alpha * x_i + cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY); + } + } else { + if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY); + for (MatrixIndexT i = 0; i < num_rows; i++) { + Real x_i = xdata[i * incX]; + if (x_i == 0.0) continue; + // Add to ydata, the i'th row of M, times alpha * x_i + cblas_Xaxpy(num_cols, x_i * alpha, + Mdata + (i * stride), 1, ydata, incY); + } + } +} + +inline void cblas_Xgemm(const float alpha, + MatrixTransposeType transA, + const float *Adata, + MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride, + MatrixTransposeType transB, + const float *Bdata, MatrixIndexT b_stride, + const float beta, + float *Mdata, + MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) { + cblas_sgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), + static_cast<CBLAS_TRANSPOSE>(transB), + num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows, + alpha, Adata, a_stride, Bdata, b_stride, + beta, Mdata, stride); +} +inline void cblas_Xgemm(const double alpha, + MatrixTransposeType transA, + const double *Adata, + MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride, + MatrixTransposeType transB, + const double *Bdata, MatrixIndexT b_stride, + const double beta, + double *Mdata, + MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) { + cblas_dgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), + static_cast<CBLAS_TRANSPOSE>(transB), + num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows, + alpha, Adata, a_stride, Bdata, b_stride, + beta, Mdata, stride); +} + + +inline void cblas_Xsymm(const float alpha, + MatrixIndexT sz, + const float *Adata,MatrixIndexT a_stride, + const float *Bdata,MatrixIndexT b_stride, + const float beta, + float *Mdata, MatrixIndexT stride) { + cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata, + a_stride, Bdata, b_stride, beta, Mdata, stride); +} +inline void cblas_Xsymm(const double alpha, + MatrixIndexT sz, + const double *Adata,MatrixIndexT a_stride, + const double *Bdata,MatrixIndexT b_stride, + const double beta, + double *Mdata, MatrixIndexT stride) { + cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata, + a_stride, Bdata, b_stride, beta, Mdata, stride); +} +// ger: M += alpha x y^T. +inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, float alpha, + const float *xdata, MatrixIndexT incX, const float *ydata, + MatrixIndexT incY, float *Mdata, MatrixIndexT stride) { + cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1, + Mdata, stride); +} +inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, double alpha, + const double *xdata, MatrixIndexT incX, const double *ydata, + MatrixIndexT incY, double *Mdata, MatrixIndexT stride) { + cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1, + Mdata, stride); +} + +// syrk: symmetric rank-k update. +// if trans==kNoTrans, then C = alpha A A^T + beta C +// else C = alpha A^T A + beta C. +// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e. +// num-cols(A) if kNoTrans, or num-rows(A) if kTrans. +// We only need the row-major and lower-triangular option of this, and this +// is hard-coded. +inline void cblas_Xsyrk ( + const MatrixTransposeType trans, const MatrixIndexT dim_c, + const MatrixIndexT other_dim_a, const float alpha, const float *A, + const MatrixIndexT a_stride, const float beta, float *C, + const MatrixIndexT c_stride) { + cblas_ssyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), + dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride); +} + +inline void cblas_Xsyrk( + const MatrixTransposeType trans, const MatrixIndexT dim_c, + const MatrixIndexT other_dim_a, const double alpha, const double *A, + const MatrixIndexT a_stride, const double beta, double *C, + const MatrixIndexT c_stride) { + cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), + dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride); +} + +/// matrix-vector multiply using a banded matrix; we always call this +/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for +/// elementwise multiplication. We miss some of the arguments out of this +/// wrapper. +inline void cblas_Xsbmv1( + const MatrixIndexT dim, + const double *A, + const double alpha, + const double *x, + const double beta, + double *y) { + cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A, + 1, x, 1, beta, y, 1); +} + +inline void cblas_Xsbmv1( + const MatrixIndexT dim, + const float *A, + const float alpha, + const float *x, + const float beta, + float *y) { + cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A, + 1, x, 1, beta, y, 1); +} + + +/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could +/// extend this somehow. +inline void mul_elements( + const MatrixIndexT dim, + const double *a, + double *b) { // does b *= a, elementwise. + double c1, c2, c3, c4; + MatrixIndexT i; + for (i = 0; i + 4 <= dim; i += 4) { + c1 = a[i] * b[i]; + c2 = a[i+1] * b[i+1]; + c3 = a[i+2] * b[i+2]; + c4 = a[i+3] * b[i+3]; + b[i] = c1; + b[i+1] = c2; + b[i+2] = c3; + b[i+3] = c4; + } + for (; i < dim; i++) + b[i] *= a[i]; +} + +inline void mul_elements( + const MatrixIndexT dim, + const float *a, + float *b) { // does b *= a, elementwise. + float c1, c2, c3, c4; + MatrixIndexT i; + for (i = 0; i + 4 <= dim; i += 4) { + c1 = a[i] * b[i]; + c2 = a[i+1] * b[i+1]; + c3 = a[i+2] * b[i+2]; + c4 = a[i+3] * b[i+3]; + b[i] = c1; + b[i+1] = c2; + b[i+2] = c3; + b[i+3] = c4; + } + for (; i < dim; i++) + b[i] *= a[i]; +} + + + +// add clapack here +#if !defined(HAVE_ATLAS) +inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) { + stptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result); +} +inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) { + dtptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result); +} +// +inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, + float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, + KaldiBlasInt *result) { + sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result); +} +inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, + double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, + KaldiBlasInt *result) { + dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result); +} + +// +inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride, + KaldiBlasInt *pivot, float *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result); +} +inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride, + KaldiBlasInt *pivot, double *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result); +} +// +inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols, + KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride, + float *sv, float *Vdata, KaldiBlasInt *vstride, + float *Udata, KaldiBlasInt *ustride, float *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + sgesvd_(v, u, + num_cols, num_rows, Mdata, stride, + sv, Vdata, vstride, Udata, ustride, + p_work, l_work, result); +} +inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols, + KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride, + double *sv, double *Vdata, KaldiBlasInt *vstride, + double *Udata, KaldiBlasInt *ustride, double *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + dgesvd_(v, u, + num_cols, num_rows, Mdata, stride, + sv, Vdata, vstride, Udata, ustride, + p_work, l_work, result); +} +// +void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata, + KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) { + ssptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result); +} +void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata, + KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) { + dsptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result); +} +// +void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata, + KaldiBlasInt *ipiv, KaldiBlasInt *result) { + ssptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result); +} +void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata, + KaldiBlasInt *ipiv, KaldiBlasInt *result) { + dsptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result); +} +#else +inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols, + float *Mdata, MatrixIndexT stride, + int *pivot, int *result) { + *result = clapack_sgetrf(CblasColMajor, num_rows, num_cols, + Mdata, stride, pivot); +} + +inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols, + double *Mdata, MatrixIndexT stride, + int *pivot, int *result) { + *result = clapack_dgetrf(CblasColMajor, num_rows, num_cols, + Mdata, stride, pivot); +} +// +inline int clapack_Xtrtri(int num_rows, float *Mdata, MatrixIndexT stride) { + return clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows, + Mdata, stride); +} + +inline int clapack_Xtrtri(int num_rows, double *Mdata, MatrixIndexT stride) { + return clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows, + Mdata, stride); +} +// +inline void clapack_Xgetri(MatrixIndexT num_rows, float *Mdata, MatrixIndexT stride, + int *pivot, int *result) { + *result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot); +} +inline void clapack_Xgetri(MatrixIndexT num_rows, double *Mdata, MatrixIndexT stride, + int *pivot, int *result) { + *result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot); +} +#endif + +} +// namespace kaldi + +#endif diff --git a/kaldi_io/src/kaldi/matrix/compressed-matrix.h b/kaldi_io/src/kaldi/matrix/compressed-matrix.h new file mode 100644 index 0000000..746cab3 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/compressed-matrix.h @@ -0,0 +1,179 @@ +// matrix/compressed-matrix.h + +// Copyright 2012 Johns Hopkins University (author: Daniel Povey) +// Frantisek Skala, Wei Shi + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_ +#define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1 + +#include "kaldi-matrix.h" + +namespace kaldi { + +/// \addtogroup matrix_group +/// @{ + +/// This class does lossy compression of a matrix. It only +/// supports copying to-from a KaldiMatrix. For large matrices, +/// each element is compressed into about one byte, but there +/// is a little overhead on top of that (globally, and also per +/// column). + +/// The basic idea is for each column (in the normal configuration) +/// we work out the values at the 0th, 25th, 50th and 100th percentiles +/// and store them as 16-bit integers; we then encode each value in +/// the column as a single byte, in 3 separate ranges with different +/// linear encodings (0-25th, 25-50th, 50th-100th). +/// If the matrix has 8 rows or fewer, we simply store all values as +/// uint16. + +class CompressedMatrix { + public: + CompressedMatrix(): data_(NULL) { } + + ~CompressedMatrix() { Destroy(); } + + template<typename Real> + CompressedMatrix(const MatrixBase<Real> &mat): data_(NULL) { CopyFromMat(mat); } + + /// Initializer that can be used to select part of an existing + /// CompressedMatrix without un-compressing and re-compressing (note: unlike + /// similar initializers for class Matrix, it doesn't point to the same memory + /// location). + CompressedMatrix(const CompressedMatrix &mat, + const MatrixIndexT row_offset, + const MatrixIndexT num_rows, + const MatrixIndexT col_offset, + const MatrixIndexT num_cols); + + void *Data() const { return this->data_; } + + /// This will resize *this and copy the contents of mat to *this. + template<typename Real> + void CopyFromMat(const MatrixBase<Real> &mat); + + CompressedMatrix(const CompressedMatrix &mat); + + CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator. + + template<typename Real> + CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator. + + /// Copies contents to matrix. Note: mat must have the correct size, + /// CopyToMat no longer attempts to resize it. + template<typename Real> + void CopyToMat(MatrixBase<Real> *mat) const; + + void Write(std::ostream &os, bool binary) const; + + void Read(std::istream &is, bool binary); + + /// Returns number of rows (or zero for emtpy matrix). + inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 : + (*reinterpret_cast<GlobalHeader*>(data_)).num_rows; } + + /// Returns number of columns (or zero for emtpy matrix). + inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 : + (*reinterpret_cast<GlobalHeader*>(data_)).num_cols; } + + /// Copies row #row of the matrix into vector v. + /// Note: v must have same size as #cols. + template<typename Real> + void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const; + + /// Copies column #col of the matrix into vector v. + /// Note: v must have same size as #rows. + template<typename Real> + void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const; + + /// Copies submatrix of compressed matrix into matrix dest. + /// Submatrix starts at row row_offset and column column_offset and its size + /// is defined by size of provided matrix dest + template<typename Real> + void CopyToMat(int32 row_offset, + int32 column_offset, + MatrixBase<Real> *dest) const; + + void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); } + + friend class Matrix<float>; + friend class Matrix<double>; + private: + + // allocates data using new [], ensures byte alignment + // sufficient for float. + static void *AllocateData(int32 num_bytes); + + // the "format" will be 1 for the original format where each column has a + // PerColHeader, and 2 for the format now used for matrices with 8 or fewer + // rows, where everything is represented as 16-bit integers. + struct GlobalHeader { + int32 format; + float min_value; + float range; + int32 num_rows; + int32 num_cols; + }; + + static MatrixIndexT DataSize(const GlobalHeader &header); + + struct PerColHeader { + uint16 percentile_0; + uint16 percentile_25; + uint16 percentile_75; + uint16 percentile_100; + }; + + template<typename Real> + static void CompressColumn(const GlobalHeader &global_header, + const Real *data, MatrixIndexT stride, + int32 num_rows, PerColHeader *header, + unsigned char *byte_data); + template<typename Real> + static void ComputeColHeader(const GlobalHeader &global_header, + const Real *data, MatrixIndexT stride, + int32 num_rows, PerColHeader *header); + + static inline uint16 FloatToUint16(const GlobalHeader &global_header, + float value); + + static inline float Uint16ToFloat(const GlobalHeader &global_header, + uint16 value); + static inline unsigned char FloatToChar(float p0, float p25, + float p75, float p100, + float value); + static inline float CharToFloat(float p0, float p25, + float p75, float p100, + unsigned char value); + + void Destroy(); + + void *data_; // first GlobalHeader, then PerColHeader (repeated), then + // the byte data for each column (repeated). Note: don't intersperse + // the byte data with the PerColHeaders, because of alignment issues. + +}; + + +/// @} end of \addtogroup matrix_group + + +} // namespace kaldi + + +#endif // KALDI_MATRIX_COMPRESSED_MATRIX_H_ diff --git a/kaldi_io/src/kaldi/matrix/jama-eig.h b/kaldi_io/src/kaldi/matrix/jama-eig.h new file mode 100644 index 0000000..c7278bc --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/jama-eig.h @@ -0,0 +1,924 @@ +// matrix/jama-eig.h + +// Copyright 2009-2011 Microsoft Corporation + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +// This file consists of a port and modification of materials from +// JAMA: A Java Matrix Package +// under the following notice: This software is a cooperative product of +// The MathWorks and the National Institute of Standards and Technology (NIST) +// which has been released to the public. This notice and the original code are +// available at http://math.nist.gov/javanumerics/jama/domain.notice + + + +#ifndef KALDI_MATRIX_JAMA_EIG_H_ +#define KALDI_MATRIX_JAMA_EIG_H_ 1 + +#include "matrix/kaldi-matrix.h" + +namespace kaldi { + +// This class is not to be used externally. See the Eig function in the Matrix +// class in kaldi-matrix.h. This is the external interface. + +template<typename Real> class EigenvalueDecomposition { + // This class is based on the EigenvalueDecomposition class from the JAMA + // library (version 1.0.2). + public: + EigenvalueDecomposition(const MatrixBase<Real> &A); + + ~EigenvalueDecomposition(); // free memory. + + void GetV(MatrixBase<Real> *V_out) { // V is what we call P externally; it's the matrix of + // eigenvectors. + KALDI_ASSERT(V_out->NumRows() == static_cast<MatrixIndexT>(n_) + && V_out->NumCols() == static_cast<MatrixIndexT>(n_)); + for (int i = 0; i < n_; i++) + for (int j = 0; j < n_; j++) + (*V_out)(i, j) = V(i, j); // V(i, j) is member function. + } + void GetRealEigenvalues(VectorBase<Real> *r_out) { + // returns real part of eigenvalues. + KALDI_ASSERT(r_out->Dim() == static_cast<MatrixIndexT>(n_)); + for (int i = 0; i < n_; i++) + (*r_out)(i) = d_[i]; + } + void GetImagEigenvalues(VectorBase<Real> *i_out) { + // returns imaginary part of eigenvalues. + KALDI_ASSERT(i_out->Dim() == static_cast<MatrixIndexT>(n_)); + for (int i = 0; i < n_; i++) + (*i_out)(i) = e_[i]; + } + private: + + inline Real &H(int r, int c) { return H_[r*n_ + c]; } + inline Real &V(int r, int c) { return V_[r*n_ + c]; } + + // complex division + inline static void cdiv(Real xr, Real xi, Real yr, Real yi, Real *cdivr, Real *cdivi) { + Real r, d; + if (std::abs(yr) > std::abs(yi)) { + r = yi/yr; + d = yr + r*yi; + *cdivr = (xr + r*xi)/d; + *cdivi = (xi - r*xr)/d; + } else { + r = yr/yi; + d = yi + r*yr; + *cdivr = (r*xr + xi)/d; + *cdivi = (r*xi - xr)/d; + } + } + + // Nonsymmetric reduction from Hessenberg to real Schur form. + void Hqr2 (); + + + int n_; // matrix dimension. + + Real *d_, *e_; // real and imaginary parts of eigenvalues. + Real *V_; // the eigenvectors (P in our external notation) + Real *H_; // the nonsymmetric Hessenberg form. + Real *ort_; // working storage for nonsymmetric algorithm. + + // Symmetric Householder reduction to tridiagonal form. + void Tred2 (); + + // Symmetric tridiagonal QL algorithm. + void Tql2 (); + + // Nonsymmetric reduction to Hessenberg form. + void Orthes (); + +}; + +template class EigenvalueDecomposition<float>; // force instantiation. +template class EigenvalueDecomposition<double>; // force instantiation. + +template<typename Real> void EigenvalueDecomposition<Real>::Tred2() { + // This is derived from the Algol procedures tred2 by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + for (int j = 0; j < n_; j++) { + d_[j] = V(n_-1, j); + } + + // Householder reduction to tridiagonal form. + + for (int i = n_-1; i > 0; i--) { + + // Scale to avoid under/overflow. + + Real scale = 0.0; + Real h = 0.0; + for (int k = 0; k < i; k++) { + scale = scale + std::abs(d_[k]); + } + if (scale == 0.0) { + e_[i] = d_[i-1]; + for (int j = 0; j < i; j++) { + d_[j] = V(i-1, j); + V(i, j) = 0.0; + V(j, i) = 0.0; + } + } else { + + // Generate Householder vector. + + for (int k = 0; k < i; k++) { + d_[k] /= scale; + h += d_[k] * d_[k]; + } + Real f = d_[i-1]; + Real g = std::sqrt(h); + if (f > 0) { + g = -g; + } + e_[i] = scale * g; + h = h - f * g; + d_[i-1] = f - g; + for (int j = 0; j < i; j++) { + e_[j] = 0.0; + } + + // Apply similarity transformation to remaining columns. + + for (int j = 0; j < i; j++) { + f = d_[j]; + V(j, i) = f; + g =e_[j] + V(j, j) * f; + for (int k = j+1; k <= i-1; k++) { + g += V(k, j) * d_[k]; + e_[k] += V(k, j) * f; + } + e_[j] = g; + } + f = 0.0; + for (int j = 0; j < i; j++) { + e_[j] /= h; + f += e_[j] * d_[j]; + } + Real hh = f / (h + h); + for (int j = 0; j < i; j++) { + e_[j] -= hh * d_[j]; + } + for (int j = 0; j < i; j++) { + f = d_[j]; + g = e_[j]; + for (int k = j; k <= i-1; k++) { + V(k, j) -= (f * e_[k] + g * d_[k]); + } + d_[j] = V(i-1, j); + V(i, j) = 0.0; + } + } + d_[i] = h; + } + + // Accumulate transformations. + + for (int i = 0; i < n_-1; i++) { + V(n_-1, i) = V(i, i); + V(i, i) = 1.0; + Real h = d_[i+1]; + if (h != 0.0) { + for (int k = 0; k <= i; k++) { + d_[k] = V(k, i+1) / h; + } + for (int j = 0; j <= i; j++) { + Real g = 0.0; + for (int k = 0; k <= i; k++) { + g += V(k, i+1) * V(k, j); + } + for (int k = 0; k <= i; k++) { + V(k, j) -= g * d_[k]; + } + } + } + for (int k = 0; k <= i; k++) { + V(k, i+1) = 0.0; + } + } + for (int j = 0; j < n_; j++) { + d_[j] = V(n_-1, j); + V(n_-1, j) = 0.0; + } + V(n_-1, n_-1) = 1.0; + e_[0] = 0.0; +} + +template<typename Real> void EigenvalueDecomposition<Real>::Tql2() { + // This is derived from the Algol procedures tql2, by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + for (int i = 1; i < n_; i++) { + e_[i-1] = e_[i]; + } + e_[n_-1] = 0.0; + + Real f = 0.0; + Real tst1 = 0.0; + Real eps = std::numeric_limits<Real>::epsilon(); + for (int l = 0; l < n_; l++) { + + // Find small subdiagonal element + + tst1 = std::max(tst1, std::abs(d_[l]) + std::abs(e_[l])); + int m = l; + while (m < n_) { + if (std::abs(e_[m]) <= eps*tst1) { + break; + } + m++; + } + + // If m == l, d_[l] is an eigenvalue, + // otherwise, iterate. + + if (m > l) { + int iter = 0; + do { + iter = iter + 1; // (Could check iteration count here.) + + // Compute implicit shift + + Real g = d_[l]; + Real p = (d_[l+1] - g) / (2.0 *e_[l]); + Real r = Hypot(p, static_cast<Real>(1.0)); // This is a Kaldi version of hypot that works with templates. + if (p < 0) { + r = -r; + } + d_[l] =e_[l] / (p + r); + d_[l+1] =e_[l] * (p + r); + Real dl1 = d_[l+1]; + Real h = g - d_[l]; + for (int i = l+2; i < n_; i++) { + d_[i] -= h; + } + f = f + h; + + // Implicit QL transformation. + + p = d_[m]; + Real c = 1.0; + Real c2 = c; + Real c3 = c; + Real el1 =e_[l+1]; + Real s = 0.0; + Real s2 = 0.0; + for (int i = m-1; i >= l; i--) { + c3 = c2; + c2 = c; + s2 = s; + g = c *e_[i]; + h = c * p; + r = Hypot(p, e_[i]); // This is a Kaldi version of Hypot that works with templates. + e_[i+1] = s * r; + s =e_[i] / r; + c = p / r; + p = c * d_[i] - s * g; + d_[i+1] = h + s * (c * g + s * d_[i]); + + // Accumulate transformation. + + for (int k = 0; k < n_; k++) { + h = V(k, i+1); + V(k, i+1) = s * V(k, i) + c * h; + V(k, i) = c * V(k, i) - s * h; + } + } + p = -s * s2 * c3 * el1 *e_[l] / dl1; + e_[l] = s * p; + d_[l] = c * p; + + // Check for convergence. + + } while (std::abs(e_[l]) > eps*tst1); + } + d_[l] = d_[l] + f; + e_[l] = 0.0; + } + + // Sort eigenvalues and corresponding vectors. + + for (int i = 0; i < n_-1; i++) { + int k = i; + Real p = d_[i]; + for (int j = i+1; j < n_; j++) { + if (d_[j] < p) { + k = j; + p = d_[j]; + } + } + if (k != i) { + d_[k] = d_[i]; + d_[i] = p; + for (int j = 0; j < n_; j++) { + p = V(j, i); + V(j, i) = V(j, k); + V(j, k) = p; + } + } + } +} + +template<typename Real> +void EigenvalueDecomposition<Real>::Orthes() { + + // This is derived from the Algol procedures orthes and ortran, + // by Martin and Wilkinson, Handbook for Auto. Comp., + // Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutines in EISPACK. + + int low = 0; + int high = n_-1; + + for (int m = low+1; m <= high-1; m++) { + + // Scale column. + + Real scale = 0.0; + for (int i = m; i <= high; i++) { + scale = scale + std::abs(H(i, m-1)); + } + if (scale != 0.0) { + + // Compute Householder transformation. + + Real h = 0.0; + for (int i = high; i >= m; i--) { + ort_[i] = H(i, m-1)/scale; + h += ort_[i] * ort_[i]; + } + Real g = std::sqrt(h); + if (ort_[m] > 0) { + g = -g; + } + h = h - ort_[m] * g; + ort_[m] = ort_[m] - g; + + // Apply Householder similarity transformation + // H = (I-u*u'/h)*H*(I-u*u')/h) + + for (int j = m; j < n_; j++) { + Real f = 0.0; + for (int i = high; i >= m; i--) { + f += ort_[i]*H(i, j); + } + f = f/h; + for (int i = m; i <= high; i++) { + H(i, j) -= f*ort_[i]; + } + } + + for (int i = 0; i <= high; i++) { + Real f = 0.0; + for (int j = high; j >= m; j--) { + f += ort_[j]*H(i, j); + } + f = f/h; + for (int j = m; j <= high; j++) { + H(i, j) -= f*ort_[j]; + } + } + ort_[m] = scale*ort_[m]; + H(m, m-1) = scale*g; + } + } + + // Accumulate transformations (Algol's ortran). + + for (int i = 0; i < n_; i++) { + for (int j = 0; j < n_; j++) { + V(i, j) = (i == j ? 1.0 : 0.0); + } + } + + for (int m = high-1; m >= low+1; m--) { + if (H(m, m-1) != 0.0) { + for (int i = m+1; i <= high; i++) { + ort_[i] = H(i, m-1); + } + for (int j = m; j <= high; j++) { + Real g = 0.0; + for (int i = m; i <= high; i++) { + g += ort_[i] * V(i, j); + } + // Double division avoids possible underflow + g = (g / ort_[m]) / H(m, m-1); + for (int i = m; i <= high; i++) { + V(i, j) += g * ort_[i]; + } + } + } + } +} + +template<typename Real> void EigenvalueDecomposition<Real>::Hqr2() { + // This is derived from the Algol procedure hqr2, + // by Martin and Wilkinson, Handbook for Auto. Comp., + // Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + int nn = n_; + int n = nn-1; + int low = 0; + int high = nn-1; + Real eps = std::numeric_limits<Real>::epsilon(); + Real exshift = 0.0; + Real p = 0, q = 0, r = 0, s = 0, z=0, t, w, x, y; + + // Store roots isolated by balanc and compute matrix norm + + Real norm = 0.0; + for (int i = 0; i < nn; i++) { + if (i < low || i > high) { + d_[i] = H(i, i); + e_[i] = 0.0; + } + for (int j = std::max(i-1, 0); j < nn; j++) { + norm = norm + std::abs(H(i, j)); + } + } + + // Outer loop over eigenvalue index + + int iter = 0; + while (n >= low) { + + // Look for single small sub-diagonal element + + int l = n; + while (l > low) { + s = std::abs(H(l-1, l-1)) + std::abs(H(l, l)); + if (s == 0.0) { + s = norm; + } + if (std::abs(H(l, l-1)) < eps * s) { + break; + } + l--; + } + + // Check for convergence + // One root found + + if (l == n) { + H(n, n) = H(n, n) + exshift; + d_[n] = H(n, n); + e_[n] = 0.0; + n--; + iter = 0; + + // Two roots found + + } else if (l == n-1) { + w = H(n, n-1) * H(n-1, n); + p = (H(n-1, n-1) - H(n, n)) / 2.0; + q = p * p + w; + z = std::sqrt(std::abs(q)); + H(n, n) = H(n, n) + exshift; + H(n-1, n-1) = H(n-1, n-1) + exshift; + x = H(n, n); + + // Real pair + + if (q >= 0) { + if (p >= 0) { + z = p + z; + } else { + z = p - z; + } + d_[n-1] = x + z; + d_[n] = d_[n-1]; + if (z != 0.0) { + d_[n] = x - w / z; + } + e_[n-1] = 0.0; + e_[n] = 0.0; + x = H(n, n-1); + s = std::abs(x) + std::abs(z); + p = x / s; + q = z / s; + r = std::sqrt(p * p+q * q); + p = p / r; + q = q / r; + + // Row modification + + for (int j = n-1; j < nn; j++) { + z = H(n-1, j); + H(n-1, j) = q * z + p * H(n, j); + H(n, j) = q * H(n, j) - p * z; + } + + // Column modification + + for (int i = 0; i <= n; i++) { + z = H(i, n-1); + H(i, n-1) = q * z + p * H(i, n); + H(i, n) = q * H(i, n) - p * z; + } + + // Accumulate transformations + + for (int i = low; i <= high; i++) { + z = V(i, n-1); + V(i, n-1) = q * z + p * V(i, n); + V(i, n) = q * V(i, n) - p * z; + } + + // Complex pair + + } else { + d_[n-1] = x + p; + d_[n] = x + p; + e_[n-1] = z; + e_[n] = -z; + } + n = n - 2; + iter = 0; + + // No convergence yet + + } else { + + // Form shift + + x = H(n, n); + y = 0.0; + w = 0.0; + if (l < n) { + y = H(n-1, n-1); + w = H(n, n-1) * H(n-1, n); + } + + // Wilkinson's original ad hoc shift + + if (iter == 10) { + exshift += x; + for (int i = low; i <= n; i++) { + H(i, i) -= x; + } + s = std::abs(H(n, n-1)) + std::abs(H(n-1, n-2)); + x = y = 0.75 * s; + w = -0.4375 * s * s; + } + + // MATLAB's new ad hoc shift + + if (iter == 30) { + s = (y - x) / 2.0; + s = s * s + w; + if (s > 0) { + s = std::sqrt(s); + if (y < x) { + s = -s; + } + s = x - w / ((y - x) / 2.0 + s); + for (int i = low; i <= n; i++) { + H(i, i) -= s; + } + exshift += s; + x = y = w = 0.964; + } + } + + iter = iter + 1; // (Could check iteration count here.) + + // Look for two consecutive small sub-diagonal elements + + int m = n-2; + while (m >= l) { + z = H(m, m); + r = x - z; + s = y - z; + p = (r * s - w) / H(m+1, m) + H(m, m+1); + q = H(m+1, m+1) - z - r - s; + r = H(m+2, m+1); + s = std::abs(p) + std::abs(q) + std::abs(r); + p = p / s; + q = q / s; + r = r / s; + if (m == l) { + break; + } + if (std::abs(H(m, m-1)) * (std::abs(q) + std::abs(r)) < + eps * (std::abs(p) * (std::abs(H(m-1, m-1)) + std::abs(z) + + std::abs(H(m+1, m+1))))) { + break; + } + m--; + } + + for (int i = m+2; i <= n; i++) { + H(i, i-2) = 0.0; + if (i > m+2) { + H(i, i-3) = 0.0; + } + } + + // Double QR step involving rows l:n and columns m:n + + for (int k = m; k <= n-1; k++) { + bool notlast = (k != n-1); + if (k != m) { + p = H(k, k-1); + q = H(k+1, k-1); + r = (notlast ? H(k+2, k-1) : 0.0); + x = std::abs(p) + std::abs(q) + std::abs(r); + if (x != 0.0) { + p = p / x; + q = q / x; + r = r / x; + } + } + if (x == 0.0) { + break; + } + s = std::sqrt(p * p + q * q + r * r); + if (p < 0) { + s = -s; + } + if (s != 0) { + if (k != m) { + H(k, k-1) = -s * x; + } else if (l != m) { + H(k, k-1) = -H(k, k-1); + } + p = p + s; + x = p / s; + y = q / s; + z = r / s; + q = q / p; + r = r / p; + + // Row modification + + for (int j = k; j < nn; j++) { + p = H(k, j) + q * H(k+1, j); + if (notlast) { + p = p + r * H(k+2, j); + H(k+2, j) = H(k+2, j) - p * z; + } + H(k, j) = H(k, j) - p * x; + H(k+1, j) = H(k+1, j) - p * y; + } + + // Column modification + + for (int i = 0; i <= std::min(n, k+3); i++) { + p = x * H(i, k) + y * H(i, k+1); + if (notlast) { + p = p + z * H(i, k+2); + H(i, k+2) = H(i, k+2) - p * r; + } + H(i, k) = H(i, k) - p; + H(i, k+1) = H(i, k+1) - p * q; + } + + // Accumulate transformations + + for (int i = low; i <= high; i++) { + p = x * V(i, k) + y * V(i, k+1); + if (notlast) { + p = p + z * V(i, k+2); + V(i, k+2) = V(i, k+2) - p * r; + } + V(i, k) = V(i, k) - p; + V(i, k+1) = V(i, k+1) - p * q; + } + } // (s != 0) + } // k loop + } // check convergence + } // while (n >= low) + + // Backsubstitute to find vectors of upper triangular form + + if (norm == 0.0) { + return; + } + + for (n = nn-1; n >= 0; n--) { + p = d_[n]; + q = e_[n]; + + // Real vector + + if (q == 0) { + int l = n; + H(n, n) = 1.0; + for (int i = n-1; i >= 0; i--) { + w = H(i, i) - p; + r = 0.0; + for (int j = l; j <= n; j++) { + r = r + H(i, j) * H(j, n); + } + if (e_[i] < 0.0) { + z = w; + s = r; + } else { + l = i; + if (e_[i] == 0.0) { + if (w != 0.0) { + H(i, n) = -r / w; + } else { + H(i, n) = -r / (eps * norm); + } + + // Solve real equations + + } else { + x = H(i, i+1); + y = H(i+1, i); + q = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i]; + t = (x * s - z * r) / q; + H(i, n) = t; + if (std::abs(x) > std::abs(z)) { + H(i+1, n) = (-r - w * t) / x; + } else { + H(i+1, n) = (-s - y * t) / z; + } + } + + // Overflow control + + t = std::abs(H(i, n)); + if ((eps * t) * t > 1) { + for (int j = i; j <= n; j++) { + H(j, n) = H(j, n) / t; + } + } + } + } + + // Complex vector + + } else if (q < 0) { + int l = n-1; + + // Last vector component imaginary so matrix is triangular + + if (std::abs(H(n, n-1)) > std::abs(H(n-1, n))) { + H(n-1, n-1) = q / H(n, n-1); + H(n-1, n) = -(H(n, n) - p) / H(n, n-1); + } else { + Real cdivr, cdivi; + cdiv(0.0, -H(n-1, n), H(n-1, n-1)-p, q, &cdivr, &cdivi); + H(n-1, n-1) = cdivr; + H(n-1, n) = cdivi; + } + H(n, n-1) = 0.0; + H(n, n) = 1.0; + for (int i = n-2; i >= 0; i--) { + Real ra, sa, vr, vi; + ra = 0.0; + sa = 0.0; + for (int j = l; j <= n; j++) { + ra = ra + H(i, j) * H(j, n-1); + sa = sa + H(i, j) * H(j, n); + } + w = H(i, i) - p; + + if (e_[i] < 0.0) { + z = w; + r = ra; + s = sa; + } else { + l = i; + if (e_[i] == 0) { + Real cdivr, cdivi; + cdiv(-ra, -sa, w, q, &cdivr, &cdivi); + H(i, n-1) = cdivr; + H(i, n) = cdivi; + } else { + Real cdivr, cdivi; + // Solve complex equations + + x = H(i, i+1); + y = H(i+1, i); + vr = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i] - q * q; + vi = (d_[i] - p) * 2.0 * q; + if (vr == 0.0 && vi == 0.0) { + vr = eps * norm * (std::abs(w) + std::abs(q) + + std::abs(x) + std::abs(y) + std::abs(z)); + } + cdiv(x*r-z*ra+q*sa, x*s-z*sa-q*ra, vr, vi, &cdivr, &cdivi); + H(i, n-1) = cdivr; + H(i, n) = cdivi; + if (std::abs(x) > (std::abs(z) + std::abs(q))) { + H(i+1, n-1) = (-ra - w * H(i, n-1) + q * H(i, n)) / x; + H(i+1, n) = (-sa - w * H(i, n) - q * H(i, n-1)) / x; + } else { + cdiv(-r-y*H(i, n-1), -s-y*H(i, n), z, q, &cdivr, &cdivi); + H(i+1, n-1) = cdivr; + H(i+1, n) = cdivi; + } + } + + // Overflow control + + t = std::max(std::abs(H(i, n-1)), std::abs(H(i, n))); + if ((eps * t) * t > 1) { + for (int j = i; j <= n; j++) { + H(j, n-1) = H(j, n-1) / t; + H(j, n) = H(j, n) / t; + } + } + } + } + } + } + + // Vectors of isolated roots + + for (int i = 0; i < nn; i++) { + if (i < low || i > high) { + for (int j = i; j < nn; j++) { + V(i, j) = H(i, j); + } + } + } + + // Back transformation to get eigenvectors of original matrix + + for (int j = nn-1; j >= low; j--) { + for (int i = low; i <= high; i++) { + z = 0.0; + for (int k = low; k <= std::min(j, high); k++) { + z = z + V(i, k) * H(k, j); + } + V(i, j) = z; + } + } +} + +template<typename Real> +EigenvalueDecomposition<Real>::EigenvalueDecomposition(const MatrixBase<Real> &A) { + KALDI_ASSERT(A.NumCols() == A.NumRows() && A.NumCols() >= 1); + n_ = A.NumRows(); + V_ = new Real[n_*n_]; + d_ = new Real[n_]; + e_ = new Real[n_]; + H_ = NULL; + ort_ = NULL; + if (A.IsSymmetric(0.0)) { + + for (int i = 0; i < n_; i++) + for (int j = 0; j < n_; j++) + V(i, j) = A(i, j); // Note that V(i, j) is a member function; A(i, j) is an operator + // of the matrix A. + // Tridiagonalize. + Tred2(); + + // Diagonalize. + Tql2(); + } else { + H_ = new Real[n_*n_]; + ort_ = new Real[n_]; + for (int i = 0; i < n_; i++) + for (int j = 0; j < n_; j++) + H(i, j) = A(i, j); // as before: H is member function, A(i, j) is operator of matrix. + + // Reduce to Hessenberg form. + Orthes(); + + // Reduce Hessenberg to real Schur form. + Hqr2(); + } +} + +template<typename Real> +EigenvalueDecomposition<Real>::~EigenvalueDecomposition() { + delete [] d_; + delete [] e_; + delete [] V_; + if (H_) delete [] H_; + if (ort_) delete [] ort_; +} + +// see function MatrixBase<Real>::Eig in kaldi-matrix.cc + + +} // namespace kaldi + +#endif // KALDI_MATRIX_JAMA_EIG_H_ diff --git a/kaldi_io/src/kaldi/matrix/jama-svd.h b/kaldi_io/src/kaldi/matrix/jama-svd.h new file mode 100644 index 0000000..8304dac --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/jama-svd.h @@ -0,0 +1,531 @@ +// matrix/jama-svd.h + +// Copyright 2009-2011 Microsoft Corporation + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +// This file consists of a port and modification of materials from +// JAMA: A Java Matrix Package +// under the following notice: This software is a cooperative product of +// The MathWorks and the National Institute of Standards and Technology (NIST) +// which has been released to the public. This notice and the original code are +// available at http://math.nist.gov/javanumerics/jama/domain.notice + + +#ifndef KALDI_MATRIX_JAMA_SVD_H_ +#define KALDI_MATRIX_JAMA_SVD_H_ 1 + + +#include "matrix/kaldi-matrix.h" +#include "matrix/sp-matrix.h" +#include "matrix/cblas-wrappers.h" + +namespace kaldi { + +#if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD) +// using ATLAS as our math library, which doesn't have SVD -> need +// to implement it. + +// This routine is a modified form of jama_svd.h which is part of the TNT distribution. +// (originally comes from JAMA). + +/** Singular Value Decomposition. + * <P> + * For an m-by-n matrix A with m >= n, the singular value decomposition is + * an m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and + * an n-by-n orthogonal matrix V so that A = U*S*V'. + * <P> + * The singular values, sigma[k] = S(k, k), are ordered so that + * sigma[0] >= sigma[1] >= ... >= sigma[n-1]. + * <P> + * The singular value decompostion always exists, so the constructor will + * never fail. The matrix condition number and the effective numerical + * rank can be computed from this decomposition. + + * <p> + * (Adapted from JAMA, a Java Matrix Library, developed by jointly + * by the Mathworks and NIST; see http://math.nist.gov/javanumerics/jama). + */ + + +template<typename Real> +bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in, + MatrixBase<Real> *U_in, + MatrixBase<Real> *V_in) { // Destructive! + KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this); + int wantu = (U_in != NULL), wantv = (V_in != NULL); + Matrix<Real> Utmp, Vtmp; + MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp); + VectorBase<Real> &s = *s_in; + + int m = num_rows_, n = num_cols_; + KALDI_ASSERT(m>=n && m != 0 && n != 0); + if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n); + if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n); + KALDI_ASSERT((int)s.Dim() == n); // n<=m so n is min. + + int nu = n; + U.SetZero(); // make sure all zero. + Vector<Real> e(n); + Vector<Real> work(m); + MatrixBase<Real> &A(*this); + Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(), + *udata = U.Data(), *vdata = V.Data(); + int astride = static_cast<int>(A.Stride()), + ustride = static_cast<int>(U.Stride()), + vstride = static_cast<int>(V.Stride()); + int i = 0, j = 0, k = 0; + + // Reduce A to bidiagonal form, storing the diagonal elements + // in s and the super-diagonal elements in e. + + int nct = std::min(m-1, n); + int nrt = std::max(0, std::min(n-2, m)); + for (k = 0; k < std::max(nct, nrt); k++) { + if (k < nct) { + + // Compute the transformation for the k-th column and + // place the k-th diagonal in s(k). + // Compute 2-norm of k-th column without under/overflow. + s(k) = 0; + for (i = k; i < m; i++) { + s(k) = hypot(s(k), A(i, k)); + } + if (s(k) != 0.0) { + if (A(k, k) < 0.0) { + s(k) = -s(k); + } + for (i = k; i < m; i++) { + A(i, k) /= s(k); + } + A(k, k) += 1.0; + } + s(k) = -s(k); + } + for (j = k+1; j < n; j++) { + if ((k < nct) && (s(k) != 0.0)) { + + // Apply the transformation. + + Real t = cblas_Xdot(m - k, adata + astride*k + k, astride, + adata + astride*k + j, astride); + /*for (i = k; i < m; i++) { + t += adata[i*astride + k]*adata[i*astride + j]; // A(i, k)*A(i, j); // 3 + }*/ + t = -t/A(k, k); + cblas_Xaxpy(m - k, t, adata + k*astride + k, astride, + adata + k*astride + j, astride); + /*for (i = k; i < m; i++) { + adata[i*astride + j] += t*adata[i*astride + k]; // A(i, j) += t*A(i, k); // 5 + }*/ + } + + // Place the k-th row of A into e for the + // subsequent calculation of the row transformation. + + e(j) = A(k, j); + } + if (wantu & (k < nct)) { + + // Place the transformation in U for subsequent back + // multiplication. + + for (i = k; i < m; i++) { + U(i, k) = A(i, k); + } + } + if (k < nrt) { + + // Compute the k-th row transformation and place the + // k-th super-diagonal in e(k). + // Compute 2-norm without under/overflow. + e(k) = 0; + for (i = k+1; i < n; i++) { + e(k) = hypot(e(k), e(i)); + } + if (e(k) != 0.0) { + if (e(k+1) < 0.0) { + e(k) = -e(k); + } + for (i = k+1; i < n; i++) { + e(i) /= e(k); + } + e(k+1) += 1.0; + } + e(k) = -e(k); + if ((k+1 < m) & (e(k) != 0.0)) { + + // Apply the transformation. + + for (i = k+1; i < m; i++) { + work(i) = 0.0; + } + for (j = k+1; j < n; j++) { + for (i = k+1; i < m; i++) { + workdata[i] += edata[j] * adata[i*astride + j]; // work(i) += e(j)*A(i, j); // 5 + } + } + for (j = k+1; j < n; j++) { + Real t(-e(j)/e(k+1)); + cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1, + adata + (k+1)*astride + j, astride); + /* + for (i = k+1; i < m; i++) { + adata[i*astride + j] += t*workdata[i]; // A(i, j) += t*work(i); // 5 + }*/ + } + } + if (wantv) { + + // Place the transformation in V for subsequent + // back multiplication. + + for (i = k+1; i < n; i++) { + V(i, k) = e(i); + } + } + } + } + + // Set up the final bidiagonal matrix or order p. + + int p = std::min(n, m+1); + if (nct < n) { + s(nct) = A(nct, nct); + } + if (m < p) { + s(p-1) = 0.0; + } + if (nrt+1 < p) { + e(nrt) = A(nrt, p-1); + } + e(p-1) = 0.0; + + // If required, generate U. + + if (wantu) { + for (j = nct; j < nu; j++) { + for (i = 0; i < m; i++) { + U(i, j) = 0.0; + } + U(j, j) = 1.0; + } + for (k = nct-1; k >= 0; k--) { + if (s(k) != 0.0) { + for (j = k+1; j < nu; j++) { + Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride); + //for (i = k; i < m; i++) { + // t += udata[i*ustride + k]*udata[i*ustride + j]; // t += U(i, k)*U(i, j); // 8 + // } + t = -t/U(k, k); + cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride, + udata + k*ustride + j, ustride); + /*for (i = k; i < m; i++) { + udata[i*ustride + j] += t*udata[i*ustride + k]; // U(i, j) += t*U(i, k); // 4 + }*/ + } + for (i = k; i < m; i++ ) { + U(i, k) = -U(i, k); + } + U(k, k) = 1.0 + U(k, k); + for (i = 0; i < k-1; i++) { + U(i, k) = 0.0; + } + } else { + for (i = 0; i < m; i++) { + U(i, k) = 0.0; + } + U(k, k) = 1.0; + } + } + } + + // If required, generate V. + + if (wantv) { + for (k = n-1; k >= 0; k--) { + if ((k < nrt) & (e(k) != 0.0)) { + for (j = k+1; j < nu; j++) { + Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride, + vdata + (k+1)*vstride + j, vstride); + /*Real t (0.0); + for (i = k+1; i < n; i++) { + t += vdata[i*vstride + k]*vdata[i*vstride + j]; // t += V(i, k)*V(i, j); // 7 + }*/ + t = -t/V(k+1, k); + cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride, + vdata + (k+1)*vstride + j, vstride); + /*for (i = k+1; i < n; i++) { + vdata[i*vstride + j] += t*vdata[i*vstride + k]; // V(i, j) += t*V(i, k); // 7 + }*/ + } + } + for (i = 0; i < n; i++) { + V(i, k) = 0.0; + } + V(k, k) = 1.0; + } + } + + // Main iteration loop for the singular values. + + int pp = p-1; + int iter = 0; + // note: -52.0 is from Jama code; the -23 is the extension + // to float, because mantissa length in (double, float) + // is (52, 23) bits respectively. + Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0)); + // Note: the -966 was taken from Jama code, but the -120 is a guess + // of how to extend this to float... the exponent in double goes + // from -1022 .. 1023, and in float from -126..127. I'm not sure + // what the significance of 966 is, so -120 just represents a number + // that's a bit less negative than -126. If we get convergence + // failure in float only, this may mean that we have to make the + // -120 value less negative. + Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 )); + + while (p > 0) { + int k = 0; + int kase = 0; + + if (iter == 500 || iter == 750) { + KALDI_WARN << "Svd taking a long time: making convergence criterion less exact."; + eps = pow(static_cast<Real>(0.8), eps); + tiny = pow(static_cast<Real>(0.8), tiny); + } + if (iter > 1000) { + KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n; + return false; + } + + // This section of the program inspects for + // negligible elements in the s and e arrays. On + // completion the variables kase and k are set as follows. + + // kase = 1 if s(p) and e(k-1) are negligible and k < p + // kase = 2 if s(k) is negligible and k < p + // kase = 3 if e(k-1) is negligible, k < p, and + // s(k), ..., s(p) are not negligible (qr step). + // kase = 4 if e(p-1) is negligible (convergence). + + for (k = p-2; k >= -1; k--) { + if (k == -1) { + break; + } + if (std::abs(e(k)) <= + tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) { + e(k) = 0.0; + break; + } + } + if (k == p-2) { + kase = 4; + } else { + int ks; + for (ks = p-1; ks >= k; ks--) { + if (ks == k) { + break; + } + Real t( (ks != p ? std::abs(e(ks)) : 0.) + + (ks != k+1 ? std::abs(e(ks-1)) : 0.)); + if (std::abs(s(ks)) <= tiny + eps*t) { + s(ks) = 0.0; + break; + } + } + if (ks == k) { + kase = 3; + } else if (ks == p-1) { + kase = 1; + } else { + kase = 2; + k = ks; + } + } + k++; + + // Perform the task indicated by kase. + + switch (kase) { + + // Deflate negligible s(p). + + case 1: { + Real f(e(p-2)); + e(p-2) = 0.0; + for (j = p-2; j >= k; j--) { + Real t( hypot(s(j), f)); + Real cs(s(j)/t); + Real sn(f/t); + s(j) = t; + if (j != k) { + f = -sn*e(j-1); + e(j-1) = cs*e(j-1); + } + if (wantv) { + for (i = 0; i < n; i++) { + t = cs*V(i, j) + sn*V(i, p-1); + V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1); + V(i, j) = t; + } + } + } + } + break; + + // Split at negligible s(k). + + case 2: { + Real f(e(k-1)); + e(k-1) = 0.0; + for (j = k; j < p; j++) { + Real t(hypot(s(j), f)); + Real cs( s(j)/t); + Real sn(f/t); + s(j) = t; + f = -sn*e(j); + e(j) = cs*e(j); + if (wantu) { + for (i = 0; i < m; i++) { + t = cs*U(i, j) + sn*U(i, k-1); + U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1); + U(i, j) = t; + } + } + } + } + break; + + // Perform one qr step. + + case 3: { + + // Calculate the shift. + + Real scale = std::max(std::max(std::max(std::max( + std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))), + std::abs(s(k))), std::abs(e(k))); + Real sp = s(p-1)/scale; + Real spm1 = s(p-2)/scale; + Real epm1 = e(p-2)/scale; + Real sk = s(k)/scale; + Real ek = e(k)/scale; + Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0; + Real c = (sp*epm1)*(sp*epm1); + Real shift = 0.0; + if ((b != 0.0) || (c != 0.0)) { + shift = std::sqrt(b*b + c); + if (b < 0.0) { + shift = -shift; + } + shift = c/(b + shift); + } + Real f = (sk + sp)*(sk - sp) + shift; + Real g = sk*ek; + + // Chase zeros. + + for (j = k; j < p-1; j++) { + Real t = hypot(f, g); + Real cs = f/t; + Real sn = g/t; + if (j != k) { + e(j-1) = t; + } + f = cs*s(j) + sn*e(j); + e(j) = cs*e(j) - sn*s(j); + g = sn*s(j+1); + s(j+1) = cs*s(j+1); + if (wantv) { + cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn); + /*for (i = 0; i < n; i++) { + t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1]; // t = cs*V(i, j) + sn*V(i, j+1); // 13 + vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1]; // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5 + vdata[i*vstride + j] = t; // V(i, j) = t; // 4 + }*/ + } + t = hypot(f, g); + cs = f/t; + sn = g/t; + s(j) = t; + f = cs*e(j) + sn*s(j+1); + s(j+1) = -sn*e(j) + cs*s(j+1); + g = sn*e(j+1); + e(j+1) = cs*e(j+1); + if (wantu && (j < m-1)) { + cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn); + /*for (i = 0; i < m; i++) { + t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1]; // t = cs*U(i, j) + sn*U(i, j+1); // 7 + udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1]; // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8 + udata[i*ustride + j] = t; // U(i, j) = t; // 1 + }*/ + } + } + e(p-2) = f; + iter = iter + 1; + } + break; + + // Convergence. + + case 4: { + + // Make the singular values positive. + + if (s(k) <= 0.0) { + s(k) = (s(k) < 0.0 ? -s(k) : 0.0); + if (wantv) { + for (i = 0; i <= pp; i++) { + V(i, k) = -V(i, k); + } + } + } + + // Order the singular values. + + while (k < pp) { + if (s(k) >= s(k+1)) { + break; + } + Real t = s(k); + s(k) = s(k+1); + s(k+1) = t; + if (wantv && (k < n-1)) { + for (i = 0; i < n; i++) { + t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t; + } + } + if (wantu && (k < m-1)) { + for (i = 0; i < m; i++) { + t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t; + } + } + k++; + } + iter = 0; + p--; + } + break; + } + } + return true; +} + +#endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD) + +} // namespace kaldi + +#endif // KALDI_MATRIX_JAMA_SVD_H_ diff --git a/kaldi_io/src/kaldi/matrix/kaldi-blas.h b/kaldi_io/src/kaldi/matrix/kaldi-blas.h new file mode 100644 index 0000000..5d25ab8 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/kaldi-blas.h @@ -0,0 +1,132 @@ +// matrix/kaldi-blas.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_KALDI_BLAS_H_ +#define KALDI_MATRIX_KALDI_BLAS_H_ + +// This file handles the #includes for BLAS, LAPACK and so on. +// It manipulates the declarations into a common format that kaldi can handle. +// However, the kaldi code will check whether HAVE_ATLAS is defined as that +// code is called a bit differently from CLAPACK that comes from other sources. + +// There are three alternatives: +// (i) you have ATLAS, which includes the ATLAS implementation of CBLAS +// plus a subset of CLAPACK (but with clapack_ in the function declarations). +// In this case, define HAVE_ATLAS and make sure the relevant directories are +// in the include path. + +// (ii) you have CBLAS (some implementation thereof) plus CLAPACK. +// In this case, define HAVE_CLAPACK. +// [Since CLAPACK depends on BLAS, the presence of BLAS is implicit]. + +// (iii) you have the MKL library, which includes CLAPACK and CBLAS. + +// Note that if we are using ATLAS, no Svd implementation is supplied, +// so we define HAVE_Svd to be zero and this directs our implementation to +// supply its own "by hand" implementation which is based on TNT code. + + + + +#if (defined(HAVE_CLAPACK) && (defined(HAVE_ATLAS) || defined(HAVE_MKL))) \ + || (defined(HAVE_ATLAS) && defined(HAVE_MKL)) +#error "Do not define more than one of HAVE_CLAPACK, HAVE_ATLAS and HAVE_MKL" +#endif + +#ifdef HAVE_ATLAS + extern "C" { + #include <cblas.h> + #include <clapack.h> + } +#elif defined(HAVE_CLAPACK) + #ifdef __APPLE__ + #ifndef __has_extension + #define __has_extension(x) 0 + #endif + #define vImage_Utilities_h + #define vImage_CVUtilities_h + #include <Accelerate/Accelerate.h> + typedef __CLPK_integer integer; + typedef __CLPK_logical logical; + typedef __CLPK_real real; + typedef __CLPK_doublereal doublereal; + typedef __CLPK_complex complex; + typedef __CLPK_doublecomplex doublecomplex; + typedef __CLPK_ftnlen ftnlen; + #else + extern "C" { + // May be in /usr/[local]/include if installed; else this uses the one + // from the tools/CLAPACK_include directory. + #include <cblas.h> + #include <f2c.h> + #include <clapack.h> + + // get rid of macros from f2c.h -- these are dangerous. + #undef abs + #undef dabs + #undef min + #undef max + #undef dmin + #undef dmax + #undef bit_test + #undef bit_clear + #undef bit_set + } + #endif +#elif defined(HAVE_MKL) + extern "C" { + #include <mkl.h> + } +#elif defined(HAVE_OPENBLAS) + // getting cblas.h and lapacke.h from <openblas-install-dir>/. + // putting in "" not <> to search -I before system libraries. + #include "cblas.h" + #include "lapacke.h" + #undef I + #undef complex + // get rid of macros from f2c.h -- these are dangerous. + #undef abs + #undef dabs + #undef min + #undef max + #undef dmin + #undef dmax + #undef bit_test + #undef bit_clear + #undef bit_set +#else + #error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)" +#endif + +#ifdef HAVE_OPENBLAS +typedef int KaldiBlasInt; // try int. +#endif +#ifdef HAVE_CLAPACK +typedef integer KaldiBlasInt; +#endif +#ifdef HAVE_MKL +typedef MKL_INT KaldiBlasInt; +#endif + +#ifdef HAVE_ATLAS +// in this case there is no need for KaldiBlasInt-- this typedef is only needed +// for Svd code which is not included in ATLAS (we re-implement it). +#endif + + +#endif // KALDI_MATRIX_KALDI_BLAS_H_ diff --git a/kaldi_io/src/kaldi/matrix/kaldi-gpsr.h b/kaldi_io/src/kaldi/matrix/kaldi-gpsr.h new file mode 100644 index 0000000..c294bdd --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/kaldi-gpsr.h @@ -0,0 +1,166 @@ +// matrix/kaldi-gpsr.h + +// Copyright 2012 Arnab Ghoshal + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_KALDI_GPSR_H_ +#define KALDI_MATRIX_KALDI_GPSR_H_ + +#include <string> +#include <vector> + +#include "base/kaldi-common.h" +#include "matrix/matrix-lib.h" +#include "itf/options-itf.h" + +namespace kaldi { + +/// This is an implementation of the GPSR algorithm. See, Figueiredo, Nowak and +/// Wright, "Gradient Projection for Sparse Reconstruction: Application to +/// Compressed Sensing and Other Inverse Problems," IEEE Journal of Selected +/// Topics in Signal Processing, vol. 1, no. 4, pp. 586-597, 2007. +/// http://dx.doi.org/10.1109/JSTSP.2007.910281 + +/// The GPSR algorithm, described in Figueiredo, et al., 2007, solves: +/// \f[ \min_x 0.5 * ||y - Ax||_2^2 + \tau ||x||_1, \f] +/// where \f$ x \in R^n, y \in R^k \f$, and \f$ A \in R^{n \times k} \f$. +/// In this implementation, we solve: +/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1, \f] +/// which is the more natural form in which such problems arise in our case. +/// Here, \f$ H = A^T A \in R^{n \times n} \f$ and \f$ g = A^T y \in R^n \f$. + + +/** \struct GpsrConfig + * Configuration variables needed in the GPSR algorithm. + */ +struct GpsrConfig { + bool use_gpsr_bb; ///< Use the Barzilai-Borwein gradient projection method + + /// The following options are common to both the basic & Barzilai-Borwein + /// versions of GPSR + double stop_thresh; ///< Stopping threshold + int32 max_iters; ///< Maximum number of iterations + double gpsr_tau; ///< Regularization scale + double alpha_min; ///< Minimum step size in the feasible direction + double alpha_max; ///< Maximum step size in the feasible direction + double max_sparsity; ///< Maximum percentage of dimensions set to 0 + double tau_reduction; ///< Multiply tau by this if max_sparsity reached + + /// The following options are for the backtracking line search in basic GPSR. + /// Step size reduction factor in backtracking line search. 0 < beta < 1 + double gpsr_beta; + /// Improvement factor in backtracking line search, i.e. the new objective + /// function must be less than the old one by mu times the gradient in the + /// direction of the change in x. 0 < mu < 1 + double gpsr_mu; + int32 max_iters_backtrak; ///< Max iterations for backtracking line search + + bool debias; ///< Do debiasing, i.e. unconstrained optimization at the end + double stop_thresh_debias; ///< Stopping threshold for debiasing stage + int32 max_iters_debias; ///< Maximum number of iterations for debiasing stage + + GpsrConfig() { + use_gpsr_bb = true; + + stop_thresh = 0.005; + max_iters = 100; + gpsr_tau = 10; + alpha_min = 1.0e-10; + alpha_max = 1.0e+20; + max_sparsity = 0.9; + tau_reduction = 0.8; + + gpsr_beta = 0.5; + gpsr_mu = 0.1; + max_iters_backtrak = 50; + + debias = false; + stop_thresh_debias = 0.001; + max_iters_debias = 50; + } + + void Register(OptionsItf *po); +}; + +inline void GpsrConfig::Register(OptionsItf *po) { + std::string module = "GpsrConfig: "; + po->Register("use-gpsr-bb", &use_gpsr_bb, module+ + "Use the Barzilai-Borwein gradient projection method."); + + po->Register("stop-thresh", &stop_thresh, module+ + "Stopping threshold for GPSR."); + po->Register("max-iters", &max_iters, module+ + "Maximum number of iterations of GPSR."); + po->Register("gpsr-tau", &gpsr_tau, module+ + "Regularization scale for GPSR."); + po->Register("alpha-min", &alpha_min, module+ + "Minimum step size in feasible direction."); + po->Register("alpha-max", &alpha_max, module+ + "Maximum step size in feasible direction."); + po->Register("max-sparsity", &max_sparsity, module+ + "Maximum percentage of dimensions set to 0."); + po->Register("tau-reduction", &tau_reduction, module+ + "Multiply tau by this if maximum sparsity is reached."); + + po->Register("gpsr-beta", &gpsr_beta, module+ + "Step size reduction factor in backtracking line search (0<beta<1)."); + po->Register("gpsr-mu", &gpsr_mu, module+ + "Improvement factor in backtracking line search (0<mu<1)."); + po->Register("max-iters-backtrack", &max_iters_backtrak, module+ + "Maximum number of iterations of backtracking line search."); + + po->Register("debias", &debias, module+ + "Do final debiasing step."); + po->Register("stop-thresh-debias", &stop_thresh_debias, module+ + "Stopping threshold for debiaisng step."); + po->Register("max-iters-debias", &max_iters_debias, module+ + "Maximum number of iterations of debiasing."); +} + +/// Solves a quadratic program in \f$ x \f$, with L_1 regularization: +/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1. \f] +/// This is similar to SolveQuadraticProblem() in sp-matrix.h with an added +/// L_1 term. +template<typename Real> +Real Gpsr(const GpsrConfig &opts, const SpMatrix<Real> &H, + const Vector<Real> &g, Vector<Real> *x, + const char *debug_str = "[unknown]") { + if (opts.use_gpsr_bb) + return GpsrBB(opts, H, g, x, debug_str); + else + return GpsrBasic(opts, H, g, x, debug_str); +} + +/// This is the basic GPSR algorithm, where the step size is determined by a +/// backtracking line search. The line search is called "Armijo rule along the +/// projection arc" in Bertsekas, Nonlinear Programming, 2nd ed. page 230. +template<typename Real> +Real GpsrBasic(const GpsrConfig &opts, const SpMatrix<Real> &H, + const Vector<Real> &g, Vector<Real> *x, + const char *debug_str = "[unknown]"); + +/// This is the paper calls the Barzilai-Borwein variant. This is a constrained +/// Netwon's method where the Hessian is approximated by scaled identity matrix +template<typename Real> +Real GpsrBB(const GpsrConfig &opts, const SpMatrix<Real> &H, + const Vector<Real> &g, Vector<Real> *x, + const char *debug_str = "[unknown]"); + + +} // namespace kaldi + +#endif // KALDI_MATRIX_KALDI_GPSR_H_ diff --git a/kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h b/kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h new file mode 100644 index 0000000..8bc4749 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h @@ -0,0 +1,62 @@ +// matrix/kaldi-matrix-inl.h + +// Copyright 2009-2011 Microsoft Corporation; Haihua Xu + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_KALDI_MATRIX_INL_H_ +#define KALDI_MATRIX_KALDI_MATRIX_INL_H_ 1 + +#include "matrix/kaldi-vector.h" + +namespace kaldi { + +/// Empty constructor +template<typename Real> +Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { } + + +template<> +template<> +void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb); + +template<> +template<> +void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); + +template<typename Real> +inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) { + M.Write(os, false); + return os; +} + +template<typename Real> +inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) { + M.Read(is, false); + return is; +} + + +template<typename Real> +inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) { + M.Read(is, false); + return is; +} + +}// namespace kaldi + + +#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_ diff --git a/kaldi_io/src/kaldi/matrix/kaldi-matrix.h b/kaldi_io/src/kaldi/matrix/kaldi-matrix.h new file mode 100644 index 0000000..e6829e0 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/kaldi-matrix.h @@ -0,0 +1,983 @@ +// matrix/kaldi-matrix.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Lukas Burget; +// Saarland University; Petr Schwarz; Yanmin Qian; +// Karel Vesely; Go Vivace Inc.; Haihua Xu + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_KALDI_MATRIX_H_ +#define KALDI_MATRIX_KALDI_MATRIX_H_ 1 + +#include "matrix/matrix-common.h" + +namespace kaldi { + +/// @{ \addtogroup matrix_funcs_scalar + +/// We need to declare this here as it will be a friend function. +/// tr(A B), or tr(A B^T). +template<typename Real> +Real TraceMatMat(const MatrixBase<Real> &A, const MatrixBase<Real> &B, + MatrixTransposeType trans = kNoTrans); +/// @} + +/// \addtogroup matrix_group +/// @{ + +/// Base class which provides matrix operations not involving resizing +/// or allocation. Classes Matrix and SubMatrix inherit from it and take care +/// of allocation and resizing. +template<typename Real> +class MatrixBase { + public: + // so this child can access protected members of other instances. + friend class Matrix<Real>; + // friend declarations for CUDA matrices (see ../cudamatrix/) + friend class CuMatrixBase<Real>; + friend class CuMatrix<Real>; + friend class CuSubMatrix<Real>; + friend class CuPackedMatrix<Real>; + + friend class PackedMatrix<Real>; + + /// Returns number of rows (or zero for emtpy matrix). + inline MatrixIndexT NumRows() const { return num_rows_; } + + /// Returns number of columns (or zero for emtpy matrix). + inline MatrixIndexT NumCols() const { return num_cols_; } + + /// Stride (distance in memory between each row). Will be >= NumCols. + inline MatrixIndexT Stride() const { return stride_; } + + /// Returns size in bytes of the data held by the matrix. + size_t SizeInBytes() const { + return static_cast<size_t>(num_rows_) * static_cast<size_t>(stride_) * + sizeof(Real); + } + + /// Gives pointer to raw data (const). + inline const Real* Data() const { + return data_; + } + + /// Gives pointer to raw data (non-const). + inline Real* Data() { return data_; } + + /// Returns pointer to data for one row (non-const) + inline Real* RowData(MatrixIndexT i) { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < + static_cast<UnsignedMatrixIndexT>(num_rows_)); + return data_ + i * stride_; + } + + /// Returns pointer to data for one row (const) + inline const Real* RowData(MatrixIndexT i) const { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < + static_cast<UnsignedMatrixIndexT>(num_rows_)); + return data_ + i * stride_; + } + + /// Indexing operator, non-const + /// (only checks sizes if compiled with -DKALDI_PARANOID) + inline Real& operator() (MatrixIndexT r, MatrixIndexT c) { + KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(num_rows_) && + static_cast<UnsignedMatrixIndexT>(c) < + static_cast<UnsignedMatrixIndexT>(num_cols_)); + return *(data_ + r * stride_ + c); + } + /// Indexing operator, provided for ease of debugging (gdb doesn't work + /// with parenthesis operator). + Real &Index (MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); } + + /// Indexing operator, const + /// (only checks sizes if compiled with -DKALDI_PARANOID) + inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const { + KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(num_rows_) && + static_cast<UnsignedMatrixIndexT>(c) < + static_cast<UnsignedMatrixIndexT>(num_cols_)); + return *(data_ + r * stride_ + c); + } + + /* Basic setting-to-special values functions. */ + + /// Sets matrix to zero. + void SetZero(); + /// Sets all elements to a specific value. + void Set(Real); + /// Sets to zero, except ones along diagonal [for non-square matrices too] + void SetUnit(); + /// Sets to random values of a normal distribution + void SetRandn(); + /// Sets to numbers uniformly distributed on (0, 1) + void SetRandUniform(); + + /* Copying functions. These do not resize the matrix! */ + + + /// Copy given matrix. (no resize is done). + template<typename OtherReal> + void CopyFromMat(const MatrixBase<OtherReal> & M, + MatrixTransposeType trans = kNoTrans); + + /// Copy from compressed matrix. + void CopyFromMat(const CompressedMatrix &M); + + /// Copy given spmatrix. (no resize is done). + template<typename OtherReal> + void CopyFromSp(const SpMatrix<OtherReal> &M); + + /// Copy given tpmatrix. (no resize is done). + template<typename OtherReal> + void CopyFromTp(const TpMatrix<OtherReal> &M, + MatrixTransposeType trans = kNoTrans); + + /// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h + template<typename OtherReal> + void CopyFromMat(const CuMatrixBase<OtherReal> &M, + MatrixTransposeType trans = kNoTrans); + + /// Inverse of vec() operator. Copies vector into matrix, row-by-row. + /// Note that rv.Dim() must either equal NumRows()*NumCols() or + /// NumCols()-- this has two modes of operation. + void CopyRowsFromVec(const VectorBase<Real> &v); + + /// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc + void CopyRowsFromVec(const CuVectorBase<Real> &v); + + template<typename OtherReal> + void CopyRowsFromVec(const VectorBase<OtherReal> &v); + + /// Copies vector into matrix, column-by-column. + /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows(); + /// this has two modes of operation. + void CopyColsFromVec(const VectorBase<Real> &v); + + /// Copy vector into specific column of matrix. + void CopyColFromVec(const VectorBase<Real> &v, const MatrixIndexT col); + /// Copy vector into specific row of matrix. + void CopyRowFromVec(const VectorBase<Real> &v, const MatrixIndexT row); + /// Copy vector into diagonal of matrix. + void CopyDiagFromVec(const VectorBase<Real> &v); + + /* Accessing of sub-parts of the matrix. */ + + /// Return specific row of matrix [const]. + inline const SubVector<Real> Row(MatrixIndexT i) const { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < + static_cast<UnsignedMatrixIndexT>(num_rows_)); + return SubVector<Real>(data_ + (i * stride_), NumCols()); + } + + /// Return specific row of matrix. + inline SubVector<Real> Row(MatrixIndexT i) { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < + static_cast<UnsignedMatrixIndexT>(num_rows_)); + return SubVector<Real>(data_ + (i * stride_), NumCols()); + } + + /// Return a sub-part of matrix. + inline SubMatrix<Real> Range(const MatrixIndexT row_offset, + const MatrixIndexT num_rows, + const MatrixIndexT col_offset, + const MatrixIndexT num_cols) const { + return SubMatrix<Real>(*this, row_offset, num_rows, + col_offset, num_cols); + } + inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset, + const MatrixIndexT num_rows) const { + return SubMatrix<Real>(*this, row_offset, num_rows, 0, num_cols_); + } + inline SubMatrix<Real> ColRange(const MatrixIndexT col_offset, + const MatrixIndexT num_cols) const { + return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols); + } + + /* Various special functions. */ + /// Returns sum of all elements in matrix. + Real Sum() const; + /// Returns trace of matrix. + Real Trace(bool check_square = true) const; + // If check_square = true, will crash if matrix is not square. + + /// Returns maximum element of matrix. + Real Max() const; + /// Returns minimum element of matrix. + Real Min() const; + + /// Element by element multiplication with a given matrix. + void MulElements(const MatrixBase<Real> &A); + + /// Divide each element by the corresponding element of a given matrix. + void DivElements(const MatrixBase<Real> &A); + + /// Multiply each element with a scalar value. + void Scale(Real alpha); + + /// Set, element-by-element, *this = max(*this, A) + void Max(const MatrixBase<Real> &A); + + /// Equivalent to (*this) = (*this) * diag(scale). Scaling + /// each column by a scalar taken from that dimension of the vector. + void MulColsVec(const VectorBase<Real> &scale); + + /// Equivalent to (*this) = diag(scale) * (*this). Scaling + /// each row by a scalar taken from that dimension of the vector. + void MulRowsVec(const VectorBase<Real> &scale); + + /// Divide each row into src.NumCols() equal groups, and then scale i'th row's + /// j'th group of elements by src(i, j). Requires src.NumRows() == + /// this->NumRows() and this->NumCols() % src.NumCols() == 0. + void MulRowsGroupMat(const MatrixBase<Real> &src); + + /// Returns logdet of matrix. + Real LogDet(Real *det_sign = NULL) const; + + /// matrix inverse. + /// if inverse_needed = false, will fill matrix with garbage. + /// (only useful if logdet wanted). + void Invert(Real *log_det = NULL, Real *det_sign = NULL, + bool inverse_needed = true); + /// matrix inverse [double]. + /// if inverse_needed = false, will fill matrix with garbage + /// (only useful if logdet wanted). + /// Does inversion in double precision even if matrix was not double. + void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL, + bool inverse_needed = true); + + /// Inverts all the elements of the matrix + void InvertElements(); + + /// Transpose the matrix. This one is only + /// applicable to square matrices (the one in the + /// Matrix child class works also for non-square. + void Transpose(); + + /// Copies column r from column indices[r] of src. + /// As a special case, if indexes[i] == -1, sets column i to zero + /// indices.size() must equal this->NumCols(), + /// all elements of "reorder" must be in [-1, src.NumCols()-1], + /// and src.NumRows() must equal this.NumRows() + void CopyCols(const MatrixBase<Real> &src, + const std::vector<MatrixIndexT> &indices); + + /// Copies row r from row indices[r] of src. + /// As a special case, if indexes[i] == -1, sets row i to zero + /// "reorder".size() must equal this->NumRows(), + /// all elements of "reorder" must be in [-1, src.NumRows()-1], + /// and src.NumCols() must equal this.NumCols() + void CopyRows(const MatrixBase<Real> &src, + const std::vector<MatrixIndexT> &indices); + + /// Applies floor to all matrix elements + void ApplyFloor(Real floor_val); + + /// Applies floor to all matrix elements + void ApplyCeiling(Real ceiling_val); + + /// Calculates log of all the matrix elemnts + void ApplyLog(); + + /// Exponentiate each of the elements. + void ApplyExp(); + + /// Applies power to all matrix elements + void ApplyPow(Real power); + + /// Apply power to the absolute value of each element. + /// Include the sign of the input element if include_sign == true. + /// If the power is negative and the input to the power is zero, + /// The output will be set zero. + void ApplyPowAbs(Real power, bool include_sign=false); + + /// Applies the Heaviside step function (x > 0 ? 1 : 0) to all matrix elements + /// Note: in general you can make different choices for x = 0, but for now + /// please leave it as it (i.e. returning zero) because it affects the + /// RectifiedLinearComponent in the neural net code. + void ApplyHeaviside(); + + /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D + /// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is + /// slightly complicated, due to the need for P to be real. In the symmetric + /// case D is diagonal and real, but in + /// the non-symmetric case there may be complex-conjugate pairs of eigenvalues. + /// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually + /// be block diagonal, with 2x2 blocks corresponding to any such pairs. If a + /// pair is lambda +- i*mu, D will have a corresponding 2x2 block + /// [lambda, mu; -mu, lambda]. + /// Note that if the input matrix (*this) is non-invertible, P may not be invertible + /// so in this case instead of the equation (*this) = P D P^{-1} holding, we have + /// instead (*this) P = P D. + /// + /// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag. + void Eig(MatrixBase<Real> *P, + VectorBase<Real> *eigs_real, + VectorBase<Real> *eigs_imag) const; + + /// The Power method attempts to take the matrix to a power using a method that + /// works in general for fractional and negative powers. The input matrix must + /// be invertible and have reasonable condition (or we don't guarantee the + /// results. The method is based on the eigenvalue decomposition. It will + /// return false and leave the matrix unchanged, if at entry the matrix had + /// real negative eigenvalues (or if it had zero eigenvalues and the power was + /// negative). + bool Power(Real pow); + + /** Singular value decomposition + Major limitations: + For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return + the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the + one on the left is rectangular. + + In Svd, *this = U*diag(S)*Vt. + Null pointers for U and/or Vt at input mean we do not want that output. We + expect that S.Dim() == m, U is either NULL or m by n, + and v is either NULL or n by n. + The singular values are not sorted (use SortSvd for that). */ + void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U, + MatrixBase<Real> *Vt); // Destroys calling matrix. + + /// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already + /// transposed; the normal formulation is U diag(s) V^T. + /// Null pointers for U or V mean we don't want that output (this saves + /// compute). The singular values are not sorted (use SortSvd for that). + void Svd(VectorBase<Real> *s, MatrixBase<Real> *U, + MatrixBase<Real> *Vt) const; + /// Compute SVD but only retain the singular values. + void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); } + + + /// Returns smallest singular value. + Real MinSingularValue() const { + Vector<Real> tmp(std::min(NumRows(), NumCols())); + Svd(&tmp); + return tmp.Min(); + } + + void TestUninitialized() const; // This function is designed so that if any element + // if the matrix is uninitialized memory, valgrind will complain. + + /// Returns condition number by computing Svd. Works even if cols > rows. + /// Returns infinity if all singular values are zero. + Real Cond() const; + + /// Returns true if matrix is Symmetric. + bool IsSymmetric(Real cutoff = 1.0e-05) const; // replace magic number + + /// Returns true if matrix is Diagonal. + bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number + + /// Returns true if the matrix is all zeros, except for ones on diagonal. (it + /// does not have to be square). More specifically, this function returns + /// false if for any i, j, (*this)(i, j) differs by more than cutoff from the + /// expression (i == j ? 1 : 0). + bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number + + /// Returns true if matrix is all zeros. + bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number + + /// Frobenius norm, which is the sqrt of sum of square elements. Same as Schatten 2-norm, + /// or just "2-norm". + Real FrobeniusNorm() const; + + /// Returns true if ((*this)-other).FrobeniusNorm() + /// <= tol * (*this).FrobeniusNorm(). + bool ApproxEqual(const MatrixBase<Real> &other, float tol = 0.01) const; + + /// Tests for exact equality. It's usually preferable to use ApproxEqual. + bool Equal(const MatrixBase<Real> &other) const; + + /// largest absolute value. + Real LargestAbsElem() const; // largest absolute value. + + /// Returns log(sum(exp())) without exp overflow + /// If prune > 0.0, it uses a pruning beam, discarding + /// terms less than (max - prune). Note: in future + /// we may change this so that if prune = 0.0, it takes + /// the max, so use -1 if you don't want to prune. + Real LogSumExp(Real prune = -1.0) const; + + /// Apply soft-max to the collection of all elements of the + /// matrix and return normalizer (log sum of exponentials). + Real ApplySoftMax(); + + /// Set each element to the sigmoid of the corresponding element of "src". + void Sigmoid(const MatrixBase<Real> &src); + + /// Set each element to y = log(1 + exp(x)) + void SoftHinge(const MatrixBase<Real> &src); + + /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p). + /// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. + void GroupPnorm(const MatrixBase<Real> &src, Real power); + + + /// Calculate derivatives for the GroupPnorm function above... + /// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable), + /// and "output" is the result of the computation (i.e. the "this" of that function + /// call), and *this has the same dimension as "input", then it sets each element + /// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where + /// "output-elem" is whichever element of output depends on that input element. + void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output, + Real power); + + + /// Set each element to the tanh of the corresponding element of "src". + void Tanh(const MatrixBase<Real> &src); + + // Function used in backpropagating derivatives of the sigmoid function: + // element-by-element, set *this = diff * value * (1.0 - value). + void DiffSigmoid(const MatrixBase<Real> &value, + const MatrixBase<Real> &diff); + + // Function used in backpropagating derivatives of the tanh function: + // element-by-element, set *this = diff * (1.0 - value^2). + void DiffTanh(const MatrixBase<Real> &value, + const MatrixBase<Real> &diff); + + /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive + * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an + * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not + * positive semi-definite (check_thresh controls how stringent the check is; + * set it to 2 to ensure it won't ever complain, but it will zero out negative + * dimensions in your matrix. + */ + void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P, + Real check_thresh = 0.001); + + friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A, + const MatrixBase<Real> &B, MatrixTransposeType trans); // tr (A B) + + // so it can get around const restrictions on the pointer to data_. + friend class SubMatrix<Real>; + + /// Add a scalar to each element + void Add(const Real alpha); + + /// Add a scalar to each diagonal element. + void AddToDiag(const Real alpha); + + /// *this += alpha * a * b^T + template<typename OtherReal> + void AddVecVec(const Real alpha, const VectorBase<OtherReal> &a, + const VectorBase<OtherReal> &b); + + /// [each row of *this] += alpha * v + template<typename OtherReal> + void AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v); + + /// [each col of *this] += alpha * v + template<typename OtherReal> + void AddVecToCols(const Real alpha, const VectorBase<OtherReal> &v); + + /// *this += alpha * M [or M^T] + void AddMat(const Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType transA = kNoTrans); + + /// *this = beta * *this + alpha * M M^T, for symmetric matrices. It only + /// updates the lower triangle of *this. It will leave the matrix asymmetric; + /// if you need it symmetric as a regular matrix, do CopyLowerToUpper(). + void SymAddMat2(const Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType transA, Real beta); + + /// *this = beta * *this + alpha * diag(v) * M [or M^T]. + /// The same as adding M but scaling each row M_i by v(i). + void AddDiagVecMat(const Real alpha, VectorBase<Real> &v, + const MatrixBase<Real> &M, MatrixTransposeType transM, + Real beta = 1.0); + + /// *this = beta * *this + alpha * M [or M^T] * diag(v) + /// The same as adding M but scaling each column M_j by v(j). + void AddMatDiagVec(const Real alpha, + const MatrixBase<Real> &M, MatrixTransposeType transM, + VectorBase<Real> &v, + Real beta = 1.0); + + /// *this = beta * *this + alpha * A .* B (.* element by element multiplication) + void AddMatMatElements(const Real alpha, + const MatrixBase<Real>& A, + const MatrixBase<Real>& B, + const Real beta); + + /// *this += alpha * S + template<typename OtherReal> + void AddSp(const Real alpha, const SpMatrix<OtherReal> &S); + + void AddMatMat(const Real alpha, + const MatrixBase<Real>& A, MatrixTransposeType transA, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const Real beta); + + /// *this = a * b / c (by element; when c = 0, *this = a) + void AddMatMatDivMat(const MatrixBase<Real>& A, + const MatrixBase<Real>& B, + const MatrixBase<Real>& C); + + /// A version of AddMatMat specialized for when the second argument + /// contains a lot of zeroes. + void AddMatSmat(const Real alpha, + const MatrixBase<Real>& A, MatrixTransposeType transA, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const Real beta); + + /// A version of AddMatMat specialized for when the first argument + /// contains a lot of zeroes. + void AddSmatMat(const Real alpha, + const MatrixBase<Real>& A, MatrixTransposeType transA, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const Real beta); + + /// this <-- beta*this + alpha*A*B*C. + void AddMatMatMat(const Real alpha, + const MatrixBase<Real>& A, MatrixTransposeType transA, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const MatrixBase<Real>& C, MatrixTransposeType transC, + const Real beta); + + /// this <-- beta*this + alpha*SpA*B. + // This and the routines below are really + // stubs that need to be made more efficient. + void AddSpMat(const Real alpha, + const SpMatrix<Real>& A, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const Real beta) { + Matrix<Real> M(A); + return AddMatMat(alpha, M, kNoTrans, B, transB, beta); + } + /// this <-- beta*this + alpha*A*B. + void AddTpMat(const Real alpha, + const TpMatrix<Real>& A, MatrixTransposeType transA, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const Real beta) { + Matrix<Real> M(A); + return AddMatMat(alpha, M, transA, B, transB, beta); + } + /// this <-- beta*this + alpha*A*B. + void AddMatSp(const Real alpha, + const MatrixBase<Real>& A, MatrixTransposeType transA, + const SpMatrix<Real>& B, + const Real beta) { + Matrix<Real> M(B); + return AddMatMat(alpha, A, transA, M, kNoTrans, beta); + } + /// this <-- beta*this + alpha*A*B*C. + void AddSpMatSp(const Real alpha, + const SpMatrix<Real> &A, + const MatrixBase<Real>& B, MatrixTransposeType transB, + const SpMatrix<Real>& C, + const Real beta) { + Matrix<Real> M(A), N(C); + return AddMatMatMat(alpha, M, kNoTrans, B, transB, N, kNoTrans, beta); + } + /// this <-- beta*this + alpha*A*B. + void AddMatTp(const Real alpha, + const MatrixBase<Real>& A, MatrixTransposeType transA, + const TpMatrix<Real>& B, MatrixTransposeType transB, + const Real beta) { + Matrix<Real> M(B); + return AddMatMat(alpha, A, transA, M, transB, beta); + } + + /// this <-- beta*this + alpha*A*B. + void AddTpTp(const Real alpha, + const TpMatrix<Real>& A, MatrixTransposeType transA, + const TpMatrix<Real>& B, MatrixTransposeType transB, + const Real beta) { + Matrix<Real> M(A), N(B); + return AddMatMat(alpha, M, transA, N, transB, beta); + } + + /// this <-- beta*this + alpha*A*B. + // This one is more efficient, not like the others above. + void AddSpSp(const Real alpha, + const SpMatrix<Real>& A, const SpMatrix<Real>& B, + const Real beta); + + /// Copy lower triangle to upper triangle (symmetrize) + void CopyLowerToUpper(); + + /// Copy upper triangle to lower triangle (symmetrize) + void CopyUpperToLower(); + + /// This function orthogonalizes the rows of a matrix using the Gram-Schmidt + /// process. It is only applicable if NumRows() <= NumCols(). It will use + /// random number generation to fill in rows with something nonzero, in cases + /// where the original matrix was of deficient row rank. + void OrthogonalizeRows(); + + /// stream read. + /// Use instead of stream<<*this, if you want to add to existing contents. + // Will throw exception on failure. + void Read(std::istream & in, bool binary, bool add = false); + /// write to stream. + void Write(std::ostream & out, bool binary) const; + + // Below is internal methods for Svd, user does not have to know about this. +#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD) + // protected: + // Should be protected but used directly in testing routine. + // destroys *this! + void LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U, + MatrixBase<Real> *Vt); +#else + protected: + // destroys *this! + bool JamaSvd(VectorBase<Real> *s, MatrixBase<Real> *U, + MatrixBase<Real> *V); + +#endif + protected: + + /// Initializer, callable only from child. + explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) : + data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) { + KALDI_ASSERT_IS_FLOATING_TYPE(Real); + } + + /// Initializer, callable only from child. + /// Empty initializer, for un-initialized matrix. + explicit MatrixBase(): data_(NULL) { + KALDI_ASSERT_IS_FLOATING_TYPE(Real); + } + + // Make sure pointers to MatrixBase cannot be deleted. + ~MatrixBase() { } + + /// A workaround that allows SubMatrix to get a pointer to non-const data + /// for const Matrix. Unfortunately C++ does not allow us to declare a + /// "public const" inheritance or anything like that, so it would require + /// a lot of work to make the SubMatrix class totally const-correct-- + /// we would have to override many of the Matrix functions. + inline Real* Data_workaround() const { + return data_; + } + + /// data memory area + Real* data_; + + /// these atributes store the real matrix size as it is stored in memory + /// including memalignment + MatrixIndexT num_cols_; /// < Number of columns + MatrixIndexT num_rows_; /// < Number of rows + /** True number of columns for the internal matrix. This number may differ + * from num_cols_ as memory alignment might be used. */ + MatrixIndexT stride_; + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase); +}; + +/// A class for storing matrices. +template<typename Real> +class Matrix : public MatrixBase<Real> { + public: + + /// Empty constructor. + Matrix(); + + /// Basic constructor. Sets to zero by default. + /// if set_zero == false, memory contents are undefined. + Matrix(const MatrixIndexT r, const MatrixIndexT c, + MatrixResizeType resize_type = kSetZero): + MatrixBase<Real>() { Resize(r, c, resize_type); } + + /// Copy constructor from CUDA matrix + /// This is defined in ../cudamatrix/cu-matrix.h + template<typename OtherReal> + explicit Matrix(const CuMatrixBase<OtherReal> &cu, + MatrixTransposeType trans = kNoTrans); + + + /// Swaps the contents of *this and *other. Shallow swap. + void Swap(Matrix<Real> *other); + + /// Defined in ../cudamatrix/cu-matrix.cc + void Swap(CuMatrix<Real> *mat); + + /// Constructor from any MatrixBase. Can also copy with transpose. + /// Allocates new memory. + explicit Matrix(const MatrixBase<Real> & M, + MatrixTransposeType trans = kNoTrans); + + /// Same as above, but need to avoid default copy constructor. + Matrix(const Matrix<Real> & M); // (cannot make explicit) + + /// Copy constructor: as above, but from another type. + template<typename OtherReal> + explicit Matrix(const MatrixBase<OtherReal> & M, + MatrixTransposeType trans = kNoTrans); + + /// Copy constructor taking SpMatrix... + /// It is symmetric, so no option for transpose, and NumRows == Cols + template<typename OtherReal> + explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() { + Resize(M.NumRows(), M.NumRows(), kUndefined); + this->CopyFromSp(M); + } + + /// Constructor from CompressedMatrix + explicit Matrix(const CompressedMatrix &C); + + /// Copy constructor taking TpMatrix... + template <typename OtherReal> + explicit Matrix(const TpMatrix<OtherReal> & M, + MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() { + if (trans == kNoTrans) { + Resize(M.NumRows(), M.NumCols(), kUndefined); + this->CopyFromTp(M); + } else { + Resize(M.NumCols(), M.NumRows(), kUndefined); + this->CopyFromTp(M, kTrans); + } + } + + /// read from stream. + // Unlike one in base, allows resizing. + void Read(std::istream & in, bool binary, bool add = false); + + /// Remove a specified row. + void RemoveRow(MatrixIndexT i); + + /// Transpose the matrix. Works for non-square + /// matrices as well as square ones. + void Transpose(); + + /// Distructor to free matrices. + ~Matrix() { Destroy(); } + + /// Sets matrix to a specified size (zero is OK as long as both r and c are + /// zero). The value of the new data depends on resize_type: + /// -if kSetZero, the new data will be zero + /// -if kUndefined, the new data will be undefined + /// -if kCopyData, the new data will be the same as the old data in any + /// shared positions, and zero elsewhere. + /// This function takes time proportional to the number of data elements. + void Resize(const MatrixIndexT r, + const MatrixIndexT c, + MatrixResizeType resize_type = kSetZero); + + /// Assignment operator that takes MatrixBase. + Matrix<Real> &operator = (const MatrixBase<Real> &other) { + if (MatrixBase<Real>::NumRows() != other.NumRows() || + MatrixBase<Real>::NumCols() != other.NumCols()) + Resize(other.NumRows(), other.NumCols(), kUndefined); + MatrixBase<Real>::CopyFromMat(other); + return *this; + } + + /// Assignment operator. Needed for inclusion in std::vector. + Matrix<Real> &operator = (const Matrix<Real> &other) { + if (MatrixBase<Real>::NumRows() != other.NumRows() || + MatrixBase<Real>::NumCols() != other.NumCols()) + Resize(other.NumRows(), other.NumCols(), kUndefined); + MatrixBase<Real>::CopyFromMat(other); + return *this; + } + + + private: + /// Deallocates memory and sets to empty matrix (dimension 0, 0). + void Destroy(); + + /// Init assumes the current class contents are invalid (i.e. junk or have + /// already been freed), and it sets the matrix to newly allocated memory with + /// the specified number of rows and columns. r == c == 0 is acceptable. The data + /// memory contents will be undefined. + void Init(const MatrixIndexT r, + const MatrixIndexT c); + +}; +/// @} end "addtogroup matrix_group" + +/// \addtogroup matrix_funcs_io +/// @{ + +/// A structure containing the HTK header. +/// [TODO: change the style of the variables to Kaldi-compliant] +struct HtkHeader { + /// Number of samples. + int32 mNSamples; + /// Sample period. + int32 mSamplePeriod; + /// Sample size + int16 mSampleSize; + /// Sample kind. + uint16 mSampleKind; +}; + +// Read HTK formatted features from file into matrix. +template<typename Real> +bool ReadHtk(std::istream &is, Matrix<Real> *M, HtkHeader *header_ptr); + +// Write (HTK format) features to file from matrix. +template<typename Real> +bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr); + +// Write (CMUSphinx format) features to file from matrix. +template<typename Real> +bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M); + +/// @} end of "addtogroup matrix_funcs_io" + +/** + Sub-matrix representation. + Can work with sub-parts of a matrix using this class. + Note that SubMatrix is not very const-correct-- it allows you to + change the contents of a const Matrix. Be careful! +*/ + +template<typename Real> +class SubMatrix : public MatrixBase<Real> { + public: + // Initialize a SubMatrix from part of a matrix; this is + // a bit like A(b:c, d:e) in Matlab. + // This initializer is against the proper semantics of "const", since + // SubMatrix can change its contents. It would be hard to implement + // a "const-safe" version of this class. + SubMatrix(const MatrixBase<Real>& T, + const MatrixIndexT ro, // row offset, 0 < ro < NumRows() + const MatrixIndexT r, // number of rows, r > 0 + const MatrixIndexT co, // column offset, 0 < co < NumCols() + const MatrixIndexT c); // number of columns, c > 0 + + // This initializer is mostly intended for use in CuMatrix and related + // classes. Be careful! + SubMatrix(Real *data, + MatrixIndexT num_rows, + MatrixIndexT num_cols, + MatrixIndexT stride); + + ~SubMatrix<Real>() {} + + /// This type of constructor is needed for Range() to work [in Matrix base + /// class]. Cannot make it explicit. + SubMatrix<Real> (const SubMatrix &other): + MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_, + other.stride_) {} + + private: + /// Disallow assignment. + SubMatrix<Real> &operator = (const SubMatrix<Real> &other); +}; +/// @} End of "addtogroup matrix_funcs_io". + +/// \addtogroup matrix_funcs_scalar +/// @{ + +// Some declarations. These are traces of products. + + +template<typename Real> +bool ApproxEqual(const MatrixBase<Real> &A, + const MatrixBase<Real> &B, Real tol = 0.01) { + return A.ApproxEqual(B, tol); +} + +template<typename Real> +inline void AssertEqual(const MatrixBase<Real> &A, const MatrixBase<Real> &B, + float tol = 0.01) { + KALDI_ASSERT(A.ApproxEqual(B, tol)); +} + +/// Returns trace of matrix. +template <typename Real> +double TraceMat(const MatrixBase<Real> &A) { return A.Trace(); } + + +/// Returns tr(A B C) +template <typename Real> +Real TraceMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA, + const MatrixBase<Real> &B, MatrixTransposeType transB, + const MatrixBase<Real> &C, MatrixTransposeType transC); + +/// Returns tr(A B C D) +template <typename Real> +Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA, + const MatrixBase<Real> &B, MatrixTransposeType transB, + const MatrixBase<Real> &C, MatrixTransposeType transC, + const MatrixBase<Real> &D, MatrixTransposeType transD); + +/// @} end "addtogroup matrix_funcs_scalar" + + +/// \addtogroup matrix_funcs_misc +/// @{ + + +/// Function to ensure that SVD is sorted. This function is made as generic as +/// possible, to be applicable to other types of problems. s->Dim() should be +/// the same as U->NumCols(), and we sort s from greatest to least absolute +/// value (if sort_on_absolute_value == true) or greatest to least value +/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it +/// exists, around in the same way. Note: the "absolute value" part won't matter +/// if this is an actual SVD, since singular values are non-negative. +template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U, + MatrixBase<Real>* Vt = NULL, + bool sort_on_absolute_value = true); + +/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig. +/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2 +/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a corresponding +/// 2x2 block [lambda, mu; -mu, lambda]. +/// This function will throw if any complex eigenvalues are not in complex conjugate +/// pairs (or the members of such pairs are not consecutively numbered). +template<typename Real> +void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag, + MatrixBase<Real> *D); + +/// The following function is used in Matrix::Power, and separately tested, so we +/// declare it here mainly for the testing code to see. It takes a complex value to +/// a power using a method that will work for noninteger powers (but will fail if the +/// complex value is real and negative). +template<typename Real> +bool AttemptComplexPower(Real *x_re, Real *x_im, Real power); + + + +/// @} end of addtogroup matrix_funcs_misc + +/// \addtogroup matrix_funcs_io +/// @{ +template<typename Real> +std::ostream & operator << (std::ostream & Out, const MatrixBase<Real> & M); + +template<typename Real> +std::istream & operator >> (std::istream & In, MatrixBase<Real> & M); + +// The Matrix read allows resizing, so we override the MatrixBase one. +template<typename Real> +std::istream & operator >> (std::istream & In, Matrix<Real> & M); + + +template<typename Real> +bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) { + return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()); +} + +/// @} end of \addtogroup matrix_funcs_io + + +} // namespace kaldi + + + +// we need to include the implementation and some +// template specializations. +#include "matrix/kaldi-matrix-inl.h" + + +#endif // KALDI_MATRIX_KALDI_MATRIX_H_ diff --git a/kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h b/kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h new file mode 100644 index 0000000..c3a4f52 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h @@ -0,0 +1,58 @@ +// matrix/kaldi-vector-inl.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; +// Haihua Xu + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +// This is an internal header file, included by other library headers. +// You should not attempt to use it directly. + +#ifndef KALDI_MATRIX_KALDI_VECTOR_INL_H_ +#define KALDI_MATRIX_KALDI_VECTOR_INL_H_ 1 + +namespace kaldi { + +template<typename Real> +std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) { + rv.Write(os, false); + return os; +} + +template<typename Real> +std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) { + rv.Read(is, false); + return is; +} + +template<typename Real> +std::istream &operator >> (std::istream &is, Vector<Real> &rv) { + rv.Read(is, false); + return is; +} + +template<> +template<> +void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv); + +template<> +template<> +void VectorBase<double>::AddVec<double>(const double alpha, + const VectorBase<double> &rv); + +} // namespace kaldi + +#endif // KALDI_MATRIX_KALDI_VECTOR_INL_H_ diff --git a/kaldi_io/src/kaldi/matrix/kaldi-vector.h b/kaldi_io/src/kaldi/matrix/kaldi-vector.h new file mode 100644 index 0000000..2b3395b --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/kaldi-vector.h @@ -0,0 +1,585 @@ +// matrix/kaldi-vector.h + +// Copyright 2009-2012 Ondrej Glembek; Microsoft Corporation; Lukas Burget; +// Saarland University (Author: Arnab Ghoshal); +// Ariya Rastrow; Petr Schwarz; Yanmin Qian; +// Karel Vesely; Go Vivace Inc.; Arnab Ghoshal +// Wei Shi; + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_KALDI_VECTOR_H_ +#define KALDI_MATRIX_KALDI_VECTOR_H_ 1 + +#include "matrix/matrix-common.h" + +namespace kaldi { + +/// \addtogroup matrix_group +/// @{ + +/// Provides a vector abstraction class. +/// This class provides a way to work with vectors in kaldi. +/// It encapsulates basic operations and memory optimizations. +template<typename Real> +class VectorBase { + public: + /// Set vector to all zeros. + void SetZero(); + + /// Returns true if matrix is all zeros. + bool IsZero(Real cutoff = 1.0e-06) const; // replace magic number + + /// Set all members of a vector to a specified value. + void Set(Real f); + + /// Set vector to random normally-distributed noise. + void SetRandn(); + + /// This function returns a random index into this vector, + /// chosen with probability proportional to the corresponding + /// element. Requires that this->Min() >= 0 and this->Sum() > 0. + MatrixIndexT RandCategorical() const; + + /// Returns the dimension of the vector. + inline MatrixIndexT Dim() const { return dim_; } + + /// Returns the size in memory of the vector, in bytes. + inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); } + + /// Returns a pointer to the start of the vector's data. + inline Real* Data() { return data_; } + + /// Returns a pointer to the start of the vector's data (const). + inline const Real* Data() const { return data_; } + + /// Indexing operator (const). + inline Real operator() (MatrixIndexT i) const { + KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < + static_cast<UnsignedMatrixIndexT>(dim_)); + return *(data_ + i); + } + + /// Indexing operator (non-const). + inline Real & operator() (MatrixIndexT i) { + KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < + static_cast<UnsignedMatrixIndexT>(dim_)); + return *(data_ + i); + } + + /** @brief Returns a sub-vector of a vector (a range of elements). + * @param o [in] Origin, 0 < o < Dim() + * @param l [in] Length 0 < l < Dim()-o + * @return A SubVector object that aliases the data of the Vector object. + * See @c SubVector class for details */ + SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) { + return SubVector<Real>(*this, o, l); + } + + /** @brief Returns a const sub-vector of a vector (a range of elements). + * @param o [in] Origin, 0 < o < Dim() + * @param l [in] Length 0 < l < Dim()-o + * @return A SubVector object that aliases the data of the Vector object. + * See @c SubVector class for details */ + const SubVector<Real> Range(const MatrixIndexT o, + const MatrixIndexT l) const { + return SubVector<Real>(*this, o, l); + } + + /// Copy data from another vector (must match own size). + void CopyFromVec(const VectorBase<Real> &v); + + /// Copy data from a SpMatrix or TpMatrix (must match own size). + template<typename OtherReal> + void CopyFromPacked(const PackedMatrix<OtherReal> &M); + + /// Copy data from another vector of different type (double vs. float) + template<typename OtherReal> + void CopyFromVec(const VectorBase<OtherReal> &v); + + /// Copy from CuVector. This is defined in ../cudamatrix/cu-vector.h + template<typename OtherReal> + void CopyFromVec(const CuVectorBase<OtherReal> &v); + + + /// Apply natural log to all elements. Throw if any element of + /// the vector is negative (but doesn't complain about zero; the + /// log will be -infinity + void ApplyLog(); + + /// Apply natural log to another vector and put result in *this. + void ApplyLogAndCopy(const VectorBase<Real> &v); + + /// Apply exponential to each value in vector. + void ApplyExp(); + + /// Take absolute value of each of the elements + void ApplyAbs(); + + /// Applies floor to all elements. Returns number of elements floored. + MatrixIndexT ApplyFloor(Real floor_val); + + /// Applies ceiling to all elements. Returns number of elements changed. + MatrixIndexT ApplyCeiling(Real ceil_val); + + /// Applies floor to all elements. Returns number of elements floored. + MatrixIndexT ApplyFloor(const VectorBase<Real> &floor_vec); + + /// Apply soft-max to vector and return normalizer (log sum of exponentials). + /// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$ + Real ApplySoftMax(); + + /// Sets each element of *this to the tanh of the corresponding element of "src". + void Tanh(const VectorBase<Real> &src); + + /// Sets each element of *this to the sigmoid function of the corresponding + /// element of "src". + void Sigmoid(const VectorBase<Real> &src); + + /// Take all elements of vector to a power. + void ApplyPow(Real power); + + /// Take the absolute value of all elements of a vector to a power. + /// Include the sign of the input element if include_sign == true. + /// If power is negative and the input value is zero, the output is set zero. + void ApplyPowAbs(Real power, bool include_sign=false); + + /// Compute the p-th norm of the vector. + Real Norm(Real p) const; + + /// Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0). + bool ApproxEqual(const VectorBase<Real> &other, float tol = 0.01) const; + + /// Invert all elements. + void InvertElements(); + + /// Add vector : *this = *this + alpha * rv (with casting between floats and + /// doubles) + template<typename OtherReal> + void AddVec(const Real alpha, const VectorBase<OtherReal> &v); + + /// Add vector : *this = *this + alpha * rv^2 [element-wise squaring]. + void AddVec2(const Real alpha, const VectorBase<Real> &v); + + /// Add vector : *this = *this + alpha * rv^2 [element-wise squaring], + /// with casting between floats and doubles. + template<typename OtherReal> + void AddVec2(const Real alpha, const VectorBase<OtherReal> &v); + + /// Add matrix times vector : this <-- beta*this + alpha*M*v. + /// Calls BLAS GEMV. + void AddMatVec(const Real alpha, const MatrixBase<Real> &M, + const MatrixTransposeType trans, const VectorBase<Real> &v, + const Real beta); // **beta previously defaulted to 0.0** + + /// This is as AddMatVec, except optimized for where v contains a lot + /// of zeros. + void AddMatSvec(const Real alpha, const MatrixBase<Real> &M, + const MatrixTransposeType trans, const VectorBase<Real> &v, + const Real beta); // **beta previously defaulted to 0.0** + + + /// Add symmetric positive definite matrix times vector: + /// this <-- beta*this + alpha*M*v. Calls BLAS SPMV. + void AddSpVec(const Real alpha, const SpMatrix<Real> &M, + const VectorBase<Real> &v, const Real beta); // **beta previously defaulted to 0.0** + + /// Add triangular matrix times vector: this <-- beta*this + alpha*M*v. + /// Works even if rv == *this. + void AddTpVec(const Real alpha, const TpMatrix<Real> &M, + const MatrixTransposeType trans, const VectorBase<Real> &v, + const Real beta); // **beta previously defaulted to 0.0** + + /// Set each element to y = (x == orig ? changed : x). + void ReplaceValue(Real orig, Real changed); + + /// Multipy element-by-element by another vector. + void MulElements(const VectorBase<Real> &v); + /// Multipy element-by-element by another vector of different type. + template<typename OtherReal> + void MulElements(const VectorBase<OtherReal> &v); + + /// Divide element-by-element by a vector. + void DivElements(const VectorBase<Real> &v); + /// Divide element-by-element by a vector of different type. + template<typename OtherReal> + void DivElements(const VectorBase<OtherReal> &v); + + /// Add a constant to each element of a vector. + void Add(Real c); + + /// Add element-by-element product of vectlrs: + // this <-- alpha * v .* r + beta*this . + void AddVecVec(Real alpha, const VectorBase<Real> &v, + const VectorBase<Real> &r, Real beta); + + /// Add element-by-element quotient of two vectors. + /// this <---- alpha*v/r + beta*this + void AddVecDivVec(Real alpha, const VectorBase<Real> &v, + const VectorBase<Real> &r, Real beta); + + /// Multiplies all elements by this constant. + void Scale(Real alpha); + + /// Multiplies this vector by lower-triangular marix: *this <-- *this *M + void MulTp(const TpMatrix<Real> &M, const MatrixTransposeType trans); + + /// If trans == kNoTrans, solves M x = b, where b is the value of *this at input + /// and x is the value of *this at output. + /// If trans == kTrans, solves M' x = b. + /// Does not test for M being singular or near-singular, so test it before + /// calling this routine. + void Solve(const TpMatrix<Real> &M, const MatrixTransposeType trans); + + /// Performs a row stack of the matrix M + void CopyRowsFromMat(const MatrixBase<Real> &M); + template<typename OtherReal> + void CopyRowsFromMat(const MatrixBase<OtherReal> &M); + + /// The following is implemented in ../cudamatrix/cu-matrix.cc + void CopyRowsFromMat(const CuMatrixBase<Real> &M); + + /// Performs a column stack of the matrix M + void CopyColsFromMat(const MatrixBase<Real> &M); + + /// Extracts a row of the matrix M. Could also do this with + /// this->Copy(M[row]). + void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row); + /// Extracts a row of the matrix M with type conversion. + template<typename OtherReal> + void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row); + + /// Extracts a row of the symmetric matrix S. + template<typename OtherReal> + void CopyRowFromSp(const SpMatrix<OtherReal> &S, MatrixIndexT row); + + /// Extracts a column of the matrix M. + template<typename OtherReal> + void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col); + + /// Extracts the diagonal of the matrix M. + void CopyDiagFromMat(const MatrixBase<Real> &M); + + /// Extracts the diagonal of a packed matrix M; works for Sp or Tp. + void CopyDiagFromPacked(const PackedMatrix<Real> &M); + + + /// Extracts the diagonal of a symmetric matrix. + inline void CopyDiagFromSp(const SpMatrix<Real> &M) { CopyDiagFromPacked(M); } + + /// Extracts the diagonal of a triangular matrix. + inline void CopyDiagFromTp(const TpMatrix<Real> &M) { CopyDiagFromPacked(M); } + + /// Returns the maximum value of any element, or -infinity for the empty vector. + Real Max() const; + + /// Returns the maximum value of any element, and the associated index. + /// Error if vector is empty. + Real Max(MatrixIndexT *index) const; + + /// Returns the minimum value of any element, or +infinity for the empty vector. + Real Min() const; + + /// Returns the minimum value of any element, and the associated index. + /// Error if vector is empty. + Real Min(MatrixIndexT *index) const; + + /// Returns sum of the elements + Real Sum() const; + + /// Returns sum of the logs of the elements. More efficient than + /// just taking log of each. Will return NaN if any elements are + /// negative. + Real SumLog() const; + + /// Does *this = alpha * (sum of rows of M) + beta * *this. + void AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0); + + /// Does *this = alpha * (sum of columns of M) + beta * *this. + void AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0); + + /// Add the diagonal of a matrix times itself: + /// *this = diag(M M^T) + beta * *this (if trans == kNoTrans), or + /// *this = diag(M^T M) + beta * *this (if trans == kTrans). + void AddDiagMat2(Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType trans = kNoTrans, Real beta = 1.0); + + /// Add the diagonal of a matrix product: *this = diag(M N), assuming the + /// "trans" arguments are both kNoTrans; for transpose arguments, it behaves + /// as you would expect. + void AddDiagMatMat(Real alpha, const MatrixBase<Real> &M, MatrixTransposeType transM, + const MatrixBase<Real> &N, MatrixTransposeType transN, + Real beta = 1.0); + + /// Returns log(sum(exp())) without exp overflow + /// If prune > 0.0, ignores terms less than the max - prune. + /// [Note: in future, if prune = 0.0, it will take the max. + /// For now, use -1 if you don't want it to prune.] + Real LogSumExp(Real prune = -1.0) const; + + /// Reads from C++ stream (option to add to existing contents). + /// Throws exception on failure + void Read(std::istream & in, bool binary, bool add = false); + + /// Writes to C++ stream (option to write in binary). + void Write(std::ostream &Out, bool binary) const; + + friend class VectorBase<double>; + friend class VectorBase<float>; + friend class CuVectorBase<Real>; + friend class CuVector<Real>; + protected: + /// Destructor; does not deallocate memory, this is handled by child classes. + /// This destructor is protected so this object so this object can only be + /// deleted via a child. + ~VectorBase() {} + + /// Empty initializer, corresponds to vector of zero size. + explicit VectorBase(): data_(NULL), dim_(0) { + KALDI_ASSERT_IS_FLOATING_TYPE(Real); + } + +// Took this out since it is not currently used, and it is possible to create +// objects where the allocated memory is not the same size as dim_ : Arnab +// /// Initializer from a pointer and a size; keeps the pointer internally +// /// (ownership or non-ownership depends on the child class). +// explicit VectorBase(Real* data, MatrixIndexT dim) +// : data_(data), dim_(dim) {} + + // Arnab : made this protected since it is unsafe too. + /// Load data into the vector: sz must match own size. + void CopyFromPtr(const Real* Data, MatrixIndexT sz); + + /// data memory area + Real* data_; + /// dimension of vector + MatrixIndexT dim_; + KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase); +}; // class VectorBase + +/** @brief A class representing a vector. + * + * This class provides a way to work with vectors in kaldi. + * It encapsulates basic operations and memory optimizations. */ +template<typename Real> +class Vector: public VectorBase<Real> { + public: + /// Constructor that takes no arguments. Initializes to empty. + Vector(): VectorBase<Real>() {} + + /// Constructor with specific size. Sets to all-zero by default + /// if set_zero == false, memory contents are undefined. + explicit Vector(const MatrixIndexT s, + MatrixResizeType resize_type = kSetZero) + : VectorBase<Real>() { Resize(s, resize_type); } + + /// Copy constructor from CUDA vector + /// This is defined in ../cudamatrix/cu-vector.h + template<typename OtherReal> + explicit Vector(const CuVectorBase<OtherReal> &cu); + + /// Copy constructor. The need for this is controversial. + Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit) + Resize(v.Dim(), kUndefined); + this->CopyFromVec(v); + } + + /// Copy-constructor from base-class, needed to copy from SubVector. + explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() { + Resize(v.Dim(), kUndefined); + this->CopyFromVec(v); + } + + /// Type conversion constructor. + template<typename OtherReal> + explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() { + Resize(v.Dim(), kUndefined); + this->CopyFromVec(v); + } + +// Took this out since it is unsafe : Arnab +// /// Constructor from a pointer and a size; copies the data to a location +// /// it owns. +// Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() { +// Resize(s); + // CopyFromPtr(Data, s); +// } + + + /// Swaps the contents of *this and *other. Shallow swap. + void Swap(Vector<Real> *other); + + /// Destructor. Deallocates memory. + ~Vector() { Destroy(); } + + /// Read function using C++ streams. Can also add to existing contents + /// of matrix. + void Read(std::istream & in, bool binary, bool add = false); + + /// Set vector to a specified size (can be zero). + /// The value of the new data depends on resize_type: + /// -if kSetZero, the new data will be zero + /// -if kUndefined, the new data will be undefined + /// -if kCopyData, the new data will be the same as the old data in any + /// shared positions, and zero elsewhere. + /// This function takes time proportional to the number of data elements. + void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero); + + /// Remove one element and shifts later elements down. + void RemoveElement(MatrixIndexT i); + + /// Assignment operator, protected so it can only be used by std::vector + Vector<Real> &operator = (const Vector<Real> &other) { + Resize(other.Dim(), kUndefined); + this->CopyFromVec(other); + return *this; + } + + /// Assignment operator that takes VectorBase. + Vector<Real> &operator = (const VectorBase<Real> &other) { + Resize(other.Dim(), kUndefined); + this->CopyFromVec(other); + return *this; + } + private: + /// Init assumes the current contents of the class are invalid (i.e. junk or + /// has already been freed), and it sets the vector to newly allocated memory + /// with the specified dimension. dim == 0 is acceptable. The memory contents + /// pointed to by data_ will be undefined. + void Init(const MatrixIndexT dim); + + /// Destroy function, called internally. + void Destroy(); + +}; + + +/// Represents a non-allocating general vector which can be defined +/// as a sub-vector of higher-level vector [or as the row of a matrix]. +template<typename Real> +class SubVector : public VectorBase<Real> { + public: + /// Constructor from a Vector or SubVector. + /// SubVectors are not const-safe and it's very hard to make them + /// so for now we just give up. This function contains const_cast. + SubVector(const VectorBase<Real> &t, const MatrixIndexT origin, + const MatrixIndexT length) : VectorBase<Real>() { + // following assert equiv to origin>=0 && length>=0 && + // origin+length <= rt.dim_ + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+ + static_cast<UnsignedMatrixIndexT>(length) <= + static_cast<UnsignedMatrixIndexT>(t.Dim())); + VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin); + VectorBase<Real>::dim_ = length; + } + + /// This constructor initializes the vector to point at the contents + /// of this packed matrix (SpMatrix or TpMatrix). + SubVector(const PackedMatrix<Real> &M) { + VectorBase<Real>::data_ = const_cast<Real*> (M.Data()); + VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2; + } + + /// Copy constructor + SubVector(const SubVector &other) : VectorBase<Real> () { + // this copy constructor needed for Range() to work in base class. + VectorBase<Real>::data_ = other.data_; + VectorBase<Real>::dim_ = other.dim_; + } + + /// Constructor from a pointer to memory and a length. Keeps a pointer + /// to the data but does not take ownership (will never delete). + SubVector(Real *data, MatrixIndexT length) : VectorBase<Real> () { + VectorBase<Real>::data_ = data; + VectorBase<Real>::dim_ = length; + } + + + /// This operation does not preserve const-ness, so be careful. + SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) { + VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row)); + VectorBase<Real>::dim_ = matrix.NumCols(); + } + + ~SubVector() {} ///< Destructor (does nothing; no pointers are owned here). + + private: + /// Disallow assignment operator. + SubVector & operator = (const SubVector &other) {} +}; + +/// @} end of "addtogroup matrix_group" +/// \addtogroup matrix_funcs_io +/// @{ +/// Output to a C++ stream. Non-binary by default (use Write for +/// binary output). +template<typename Real> +std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v); + +/// Input from a C++ stream. Will automatically read text or +/// binary data from the stream. +template<typename Real> +std::istream & operator >> (std::istream & in, VectorBase<Real> & v); + +/// Input from a C++ stream. Will automatically read text or +/// binary data from the stream. +template<typename Real> +std::istream & operator >> (std::istream & in, Vector<Real> & v); +/// @} end of \addtogroup matrix_funcs_io + +/// \addtogroup matrix_funcs_scalar +/// @{ + + +template<typename Real> +bool ApproxEqual(const VectorBase<Real> &a, + const VectorBase<Real> &b, Real tol = 0.01) { + return a.ApproxEqual(b, tol); +} + +template<typename Real> +inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b, + float tol = 0.01) { + KALDI_ASSERT(a.ApproxEqual(b, tol)); +} + + +/// Returns dot product between v1 and v2. +template<typename Real> +Real VecVec(const VectorBase<Real> &v1, const VectorBase<Real> &v2); + +template<typename Real, typename OtherReal> +Real VecVec(const VectorBase<Real> &v1, const VectorBase<OtherReal> &v2); + + +/// Returns \f$ v_1^T M v_2 \f$ . +/// Not as efficient as it could be where v1 == v2. +template<typename Real> +Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M, + const VectorBase<Real> &v2); + +/// @} End of "addtogroup matrix_funcs_scalar" + + +} // namespace kaldi + +// we need to include the implementation +#include "matrix/kaldi-vector-inl.h" + + + +#endif // KALDI_MATRIX_KALDI_VECTOR_H_ + diff --git a/kaldi_io/src/kaldi/matrix/matrix-common.h b/kaldi_io/src/kaldi/matrix/matrix-common.h new file mode 100644 index 0000000..d202b2e --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/matrix-common.h @@ -0,0 +1,100 @@ +// matrix/matrix-common.h + +// Copyright 2009-2011 Microsoft Corporation + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_MATRIX_COMMON_H_ +#define KALDI_MATRIX_MATRIX_COMMON_H_ + +// This file contains some #includes, forward declarations +// and typedefs that are needed by all the main header +// files in this directory. + +#include "base/kaldi-common.h" +#include "matrix/kaldi-blas.h" + +namespace kaldi { +typedef enum { + kTrans = CblasTrans, + kNoTrans = CblasNoTrans +} MatrixTransposeType; + +typedef enum { + kSetZero, + kUndefined, + kCopyData +} MatrixResizeType; + +typedef enum { + kTakeLower, + kTakeUpper, + kTakeMean, + kTakeMeanAndCheck +} SpCopyType; + +template<typename Real> class VectorBase; +template<typename Real> class Vector; +template<typename Real> class SubVector; +template<typename Real> class MatrixBase; +template<typename Real> class SubMatrix; +template<typename Real> class Matrix; +template<typename Real> class SpMatrix; +template<typename Real> class TpMatrix; +template<typename Real> class PackedMatrix; + +// these are classes that won't be defined in this +// directory; they're mostly needed for friend declarations. +template<typename Real> class CuMatrixBase; +template<typename Real> class CuSubMatrix; +template<typename Real> class CuMatrix; +template<typename Real> class CuVectorBase; +template<typename Real> class CuSubVector; +template<typename Real> class CuVector; +template<typename Real> class CuPackedMatrix; +template<typename Real> class CuSpMatrix; +template<typename Real> class CuTpMatrix; + +class CompressedMatrix; + +/// This class provides a way for switching between double and float types. +template<typename T> class OtherReal { }; // useful in reading+writing routines + // to switch double and float. +/// A specialized class for switching from float to double. +template<> class OtherReal<float> { + public: + typedef double Real; +}; +/// A specialized class for switching from double to float. +template<> class OtherReal<double> { + public: + typedef float Real; +}; + + +typedef int32 MatrixIndexT; +typedef int32 SignedMatrixIndexT; +typedef uint32 UnsignedMatrixIndexT; + +// If you want to use size_t for the index type, do as follows instead: +//typedef size_t MatrixIndexT; +//typedef ssize_t SignedMatrixIndexT; +//typedef size_t UnsignedMatrixIndexT; + +} + + + +#endif // KALDI_MATRIX_MATRIX_COMMON_H_ diff --git a/kaldi_io/src/kaldi/matrix/matrix-functions-inl.h b/kaldi_io/src/kaldi/matrix/matrix-functions-inl.h new file mode 100644 index 0000000..9fac851 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/matrix-functions-inl.h @@ -0,0 +1,56 @@ +// matrix/matrix-functions-inl.h + +// Copyright 2009-2011 Microsoft Corporation +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +// +// (*) incorporates, with permission, FFT code from his book +// "Signal Processing with Lapped Transforms", Artech, 1992. + + + +#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_ +#define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_ + +namespace kaldi { + +//! ComplexMul implements, inline, the complex multiplication b *= a. +template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im, + Real *b_re, Real *b_im) { + Real tmp_re = (*b_re * a_re) - (*b_im * a_im); + *b_im = *b_re * a_im + *b_im * a_re; + *b_re = tmp_re; +} + +template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im, + const Real &b_re, const Real &b_im, + Real *c_re, Real *c_im) { + *c_re += b_re*a_re - b_im*a_im; + *c_im += b_re*a_im + b_im*a_re; +} + + +template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im) { + *a_re = std::cos(x); + *a_im = std::sin(x); +} + + +} // end namespace kaldi + + +#endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_ + diff --git a/kaldi_io/src/kaldi/matrix/matrix-functions.h b/kaldi_io/src/kaldi/matrix/matrix-functions.h new file mode 100644 index 0000000..b70ca56 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/matrix-functions.h @@ -0,0 +1,235 @@ +// matrix/matrix-functions.h + +// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Jan Silovsky; +// Yanmin Qian; 1991 Henrique (Rico) Malvar (*) +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +// +// (*) incorporates, with permission, FFT code from his book +// "Signal Processing with Lapped Transforms", Artech, 1992. + + + +#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_H_ +#define KALDI_MATRIX_MATRIX_FUNCTIONS_H_ + +#include "matrix/kaldi-vector.h" +#include "matrix/kaldi-matrix.h" + +namespace kaldi { + +/// @addtogroup matrix_funcs_misc +/// @{ + +/** The function ComplexFft does an Fft on the vector argument v. + v is a vector of even dimension, interpreted for both input + and output as a vector of complex numbers i.e. + \f[ v = ( re_0, im_0, re_1, im_1, ... ) \f] + The dimension of v must be a power of 2. + + If "forward == true" this routine does the Discrete Fourier Transform + (DFT), i.e.: + \f[ vout[m] \leftarrow \sum_{n = 0}^{N-1} vin[i] exp( -2pi m n / N ) \f] + + If "backward" it does the Inverse Discrete Fourier Transform (IDFT) + *WITHOUT THE FACTOR 1/N*, + i.e.: + \f[ vout[m] <-- \sum_{n = 0}^{N-1} vin[i] exp( 2pi m n / N ) \f] + [note the sign difference on the 2 pi for the backward one.] + + Note that this is the definition of the FT given in most texts, but + it differs from the Numerical Recipes version in which the forward + and backward algorithms are flipped. + + Note that you would have to multiply by 1/N after the IDFT to get + back to where you started from. We don't do this because + in some contexts, the transform is made symmetric by multiplying + by sqrt(N) in both passes. The user can do this by themselves. + + See also SplitRadixComplexFft, declared in srfft.h, which is more efficient + but only works if the length of the input is a power of 2. + */ +template<typename Real> void ComplexFft (VectorBase<Real> *v, bool forward, Vector<Real> *tmp_work = NULL); + +/// ComplexFt is the same as ComplexFft but it implements the Fourier +/// transform in an inefficient way. It is mainly included for testing purposes. +/// See comment for ComplexFft to describe the input and outputs and what it does. +template<typename Real> void ComplexFt (const VectorBase<Real> &in, + VectorBase<Real> *out, bool forward); + +/// RealFft is a fourier transform of real inputs. Internally it uses +/// ComplexFft. The input dimension N must be even. If forward == true, +/// it transforms from a sequence of N real points to its complex fourier +/// transform; otherwise it goes in the reverse direction. If you call it +/// in the forward and then reverse direction and multiply by 1.0/N, you +/// will get back the original data. +/// The interpretation of the complex-FFT data is as follows: the array +/// is a sequence of complex numbers C_n of length N/2 with (real, im) format, +/// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...]. +/// See also SplitRadixRealFft, declared in srfft.h, which is more efficient +/// but only works if the length of the input is a power of 2. + +template<typename Real> void RealFft (VectorBase<Real> *v, bool forward); + + +/// RealFt has the same input and output format as RealFft above, but it is +/// an inefficient implementation included for testing purposes. +template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward); + +/// ComputeDctMatrix computes a matrix corresponding to the DCT, such that +/// M * v equals the DCT of vector v. M must be square at input. +/// This is the type = III DCT with normalization, corresponding to the +/// following equations, where x is the signal and X is the DCT: +/// X_0 = 1/sqrt(2*N) \sum_{n = 0}^{N-1} x_n +/// X_k = 1/sqrt(N) \sum_{n = 0}^{N-1} x_n cos( \pi/N (n + 1/2) k ) +/// This matrix's transpose is its own inverse, so transposing this +/// matrix will give the inverse DCT. +/// Caution: the type III DCT is generally known as the "inverse DCT" (with the +/// type II being the actual DCT), so this function is somewhatd mis-named. It +/// was probably done this way for HTK compatibility. We don't change it +/// because it was this way from the start and changing it would affect the +/// feature generation. + +template<typename Real> void ComputeDctMatrix(Matrix<Real> *M); + + +/// ComplexMul implements, inline, the complex multiplication b *= a. +template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im, + Real *b_re, Real *b_im); + +/// ComplexMul implements, inline, the complex operation c += (a * b). +template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im, + const Real &b_re, const Real &b_im, + Real *c_re, Real *c_im); + + +/// ComplexImExp implements a <-- exp(i x), inline. +template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im); + + +// This class allows you to compute the matrix exponential function +// B = I + A + 1/2! A^2 + 1/3! A^3 + ... +// This method is most accurate where the result is of the same order of +// magnitude as the unit matrix (it will typically not work well when +// the answer has almost-zero eigenvalues or is close to zero). +// It also provides a function that allows you do back-propagate the +// derivative of a scalar function through this calculation. +// The +template<typename Real> +class MatrixExponential { + public: + MatrixExponential() { } + + void Compute(const MatrixBase<Real> &M, MatrixBase<Real> *X); // does *X = exp(M) + + // Version for symmetric matrices (it just copies to full matrix). + void Compute(const SpMatrix<Real> &M, SpMatrix<Real> *X); // does *X = exp(M) + + void Backprop(const MatrixBase<Real> &hX, MatrixBase<Real> *hM) const; // Propagates + // the gradient of a scalar function f backwards through this operation, i.e.: + // if the parameter dX represents df/dX (with no transpose, so element i, j of dX + // is the derivative of f w.r.t. E(i, j)), it sets dM to df/dM, again with no + // transpose (of course, only the part thereof that comes through the effect of + // A on B). This applies to the values of A and E that were called most recently + // with Compute(). + + // Version for symmetric matrices (it just copies to full matrix). + void Backprop(const SpMatrix<Real> &hX, SpMatrix<Real> *hM) const; + + private: + void Clear(); + + static MatrixIndexT ComputeN(const MatrixBase<Real> &M); + + // This is intended for matrices P with small norms: compute B_0 = exp(P) - I. + // Keeps adding terms in the Taylor series till there is no further + // change in the result. Stores some of the powers of A in powers_, + // and the number of terms K as K_. + void ComputeTaylor(const MatrixBase<Real> &P, MatrixBase<Real> *B0); + + // Backprop through the Taylor-series computation above. + // note: hX is \hat{X} in the math; hM is \hat{M} in the math. + void BackpropTaylor(const MatrixBase<Real> &hX, + MatrixBase<Real> *hM) const; + + Matrix<Real> P_; // Equals M * 2^(-N_) + std::vector<Matrix<Real> > B_; // B_[0] = exp(P_) - I, + // B_[k] = 2 B_[k-1] + B_[k-1]^2 [k > 0], + // ( = exp(P_)^k - I ) + // goes from 0..N_ [size N_+1]. + + std::vector<Matrix<Real> > powers_; // powers (>1) of P_ stored here, + // up to all but the last one used in the Taylor expansion (this is the + // last one we need in the backprop). The index is the power minus 2. + + MatrixIndexT N_; // Power N_ >=0 such that P_ = A * 2^(-N_), + // we choose it so that P_ has a sufficiently small norm + // that the Taylor series will converge fast. +}; + + +/** + ComputePCA does a PCA computation, using either outer products + or inner products, whichever is more efficient. Let D be + the dimension of the data points, N be the number of data + points, and G be the PCA dimension we want to retain. We assume + G <= N and G <= D. + + @param X [in] An N x D matrix. Each row of X is a point x_i. + @param U [out] A G x D matrix. Each row of U is a basis element u_i. + @param A [out] An N x D matrix, or NULL. Each row of A is a set of coefficients + in the basis for a point x_i, so A(i, g) is the coefficient of u_i + in x_i. + @param print_eigs [in] If true, prints out diagnostic information about the + eigenvalues. + @param exact [in] If true, does the exact computation; if false, does + a much faster (but almost exact) computation based on the Lanczos + method. +*/ + +template<typename Real> +void ComputePca(const MatrixBase<Real> &X, + MatrixBase<Real> *U, + MatrixBase<Real> *A, + bool print_eigs = false, + bool exact = true); + + + +// This function does: *plus += max(0, a b^T), +// *minus += max(0, -(a b^T)). +template<typename Real> +void AddOuterProductPlusMinus(Real alpha, + const VectorBase<Real> &a, + const VectorBase<Real> &b, + MatrixBase<Real> *plus, + MatrixBase<Real> *minus); + +template<typename Real1, typename Real2> +inline void AssertSameDim(const MatrixBase<Real1> &mat1, const MatrixBase<Real2> &mat2) { + KALDI_ASSERT(mat1.NumRows() == mat2.NumRows() + && mat1.NumCols() == mat2.NumCols()); +} + + +/// @} end of "addtogroup matrix_funcs_misc" + +} // end namespace kaldi + +#include "matrix/matrix-functions-inl.h" + + +#endif diff --git a/kaldi_io/src/kaldi/matrix/matrix-lib.h b/kaldi_io/src/kaldi/matrix/matrix-lib.h new file mode 100644 index 0000000..39acec5 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/matrix-lib.h @@ -0,0 +1,37 @@ +// matrix/matrix-lib.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +// Include everything from this directory. +// These files include other stuff that we need. +#ifndef KALDI_MATRIX_MATRIX_LIB_H_ +#define KALDI_MATRIX_MATRIX_LIB_H_ + +#include "matrix/cblas-wrappers.h" +#include "base/kaldi-common.h" +#include "matrix/kaldi-vector.h" +#include "matrix/kaldi-matrix.h" +#include "matrix/sp-matrix.h" +#include "matrix/tp-matrix.h" +#include "matrix/matrix-functions.h" +#include "matrix/srfft.h" +#include "matrix/compressed-matrix.h" +#include "matrix/optimization.h" + +#endif + diff --git a/kaldi_io/src/kaldi/matrix/optimization.h b/kaldi_io/src/kaldi/matrix/optimization.h new file mode 100644 index 0000000..66309ac --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/optimization.h @@ -0,0 +1,248 @@ +// matrix/optimization.h + +// Copyright 2012 Johns Hopkins University (author: Daniel Povey) +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +// +// (*) incorporates, with permission, FFT code from his book +// "Signal Processing with Lapped Transforms", Artech, 1992. + + + +#ifndef KALDI_MATRIX_OPTIMIZATION_H_ +#define KALDI_MATRIX_OPTIMIZATION_H_ + +#include "matrix/kaldi-vector.h" +#include "matrix/kaldi-matrix.h" + +namespace kaldi { + + +/// @addtogroup matrix_optimization +/// @{ + +struct LinearCgdOptions { + int32 max_iters; // Maximum number of iters (if >= 0). + BaseFloat max_error; // Maximum 2-norm of the residual A x - b (convergence + // test) + // Every time the residual 2-norm decreases by this recompute_residual_factor + // since the last time it was computed from scratch, recompute it from + // scratch. This helps to keep the computed residual accurate even in the + // presence of roundoff. + BaseFloat recompute_residual_factor; + + LinearCgdOptions(): max_iters(-1), + max_error(0.0), + recompute_residual_factor(0.01) { } +}; + +/* + This function uses linear conjugate gradient descent to approximately solve + the system A x = b. The value of x at entry corresponds to the initial guess + of x. The algorithm continues until the number of iterations equals b.Dim(), + or until the 2-norm of (A x - b) is <= max_error, or until the number of + iterations equals max_iter, whichever happens sooner. It is a requirement + that A be positive definite. + It returns the number of iterations that were actually executed (this is + useful for testing purposes). +*/ +template<typename Real> +int32 LinearCgd(const LinearCgdOptions &opts, + const SpMatrix<Real> &A, const VectorBase<Real> &b, + VectorBase<Real> *x); + + + + + + +/** + This is an implementation of L-BFGS. It pushes responsibility for + determining when to stop, onto the user. There is no call-back here: + everything is done via calls to the class itself (see the example in + matrix-lib-test.cc). This does not implement constrained L-BFGS, but it will + handle constrained problems correctly as long as the function approaches + +infinity (or -infinity for maximization problems) when it gets close to the + bound of the constraint. In these types of problems, you just let the + function value be +infinity for minimization problems, or -infinity for + maximization problems, outside these bounds). +*/ + +struct LbfgsOptions { + bool minimize; // if true, we're minimizing, else maximizing. + int m; // m is the number of stored vectors L-BFGS keeps. + float first_step_learning_rate; // The very first step of L-BFGS is + // like gradient descent. If you want to configure the size of that step, + // you can do it using this variable. + float first_step_length; // If this variable is >0.0, it overrides + // first_step_learning_rate; on the first step we choose an approximate + // Hessian that is the multiple of the identity that would generate this + // step-length, or 1.0 if the gradient is zero. + float first_step_impr; // If this variable is >0.0, it overrides + // first_step_learning_rate; on the first step we choose an approximate + // Hessian that is the multiple of the identity that would generate this + // amount of objective function improvement (assuming the "real" objf + // was linear). + float c1; // A constant in Armijo rule = Wolfe condition i) + float c2; // A constant in Wolfe condition ii) + float d; // An amount > 1.0 (default 2.0) that we initially multiply or + // divide the step length by, in the line search. + int max_line_search_iters; // after this many iters we restart L-BFGS. + int avg_step_length; // number of iters to avg step length over, in + // RecentStepLength(). + + LbfgsOptions (bool minimize = true): + minimize(minimize), + m(10), + first_step_learning_rate(1.0), + first_step_length(0.0), + first_step_impr(0.0), + c1(1.0e-04), + c2(0.9), + d(2.0), + max_line_search_iters(50), + avg_step_length(4) { } +}; + +template<typename Real> +class OptimizeLbfgs { + public: + /// Initializer takes the starting value of x. + OptimizeLbfgs(const VectorBase<Real> &x, + const LbfgsOptions &opts); + + /// This returns the value of the variable x that has the best objective + /// function so far, and the corresponding objective function value if + /// requested. This would typically be called only at the end. + const VectorBase<Real>& GetValue(Real *objf_value = NULL) const; + + /// This returns the value at which the function wants us + /// to compute the objective function and gradient. + const VectorBase<Real>& GetProposedValue() const { return new_x_; } + + /// Returns the average magnitude of the last n steps (but not + /// more than the number we have stored). Before we have taken + /// any steps, returns +infinity. Note: if the most recent + /// step length was 0, it returns 0, regardless of the other + /// step lengths. This makes it suitable as a convergence test + /// (else we'd generate NaN's). + Real RecentStepLength() const; + + /// The user calls this function to provide the class with the + /// function and gradient info at the point GetProposedValue(). + /// If this point is outside the constraints you can set function_value + /// to {+infinity,-infinity} for {minimization,maximization} problems. + /// In this case the gradient, and also the second derivative (if you call + /// the second overloaded version of this function) will be ignored. + void DoStep(Real function_value, + const VectorBase<Real> &gradient); + + /// The user can call this version of DoStep() if it is desired to set some + /// kind of approximate Hessian on this iteration. Note: it is a prerequisite + /// that diag_approx_2nd_deriv must be strictly positive (minimizing), or + /// negative (maximizing). + void DoStep(Real function_value, + const VectorBase<Real> &gradient, + const VectorBase<Real> &diag_approx_2nd_deriv); + + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(OptimizeLbfgs); + + + // The following variable says what stage of the computation we're at. + // Refer to Algorithm 7.5 (L-BFGS) of Nodecdal & Wright, "Numerical + // Optimization", 2nd edition. + // kBeforeStep means we're about to do + /// "compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4). + // kWithinStep means we're at some point within line search; note + // that line search is iterative so we can stay in this state more + // than one time on each iteration. + enum ComputationState { + kBeforeStep, + kWithinStep, // This means we're within the step-size computation, and + // have not yet done the 1st function evaluation. + }; + + inline MatrixIndexT Dim() { return x_.Dim(); } + inline MatrixIndexT M() { return opts_.m; } + SubVector<Real> Y(MatrixIndexT i) { + return SubVector<Real>(data_, (i % M()) * 2); // vector y_i + } + SubVector<Real> S(MatrixIndexT i) { + return SubVector<Real>(data_, (i % M()) * 2 + 1); // vector s_i + } + // The following are subroutines within DoStep(): + bool AcceptStep(Real function_value, + const VectorBase<Real> &gradient); + void Restart(const VectorBase<Real> &x, + Real function_value, + const VectorBase<Real> &gradient); + void ComputeNewDirection(Real function_value, + const VectorBase<Real> &gradient); + void ComputeHifNeeded(const VectorBase<Real> &gradient); + void StepSizeIteration(Real function_value, + const VectorBase<Real> &gradient); + void RecordStepLength(Real s); + + + LbfgsOptions opts_; + SignedMatrixIndexT k_; // Iteration number, starts from zero. Gets set back to zero + // when we restart. + + ComputationState computation_state_; + bool H_was_set_; // True if the user specified H_; if false, + // we'll use a heuristic to estimate it. + + + Vector<Real> x_; // current x. + Vector<Real> new_x_; // the x proposed in the line search. + Vector<Real> best_x_; // the x with the best objective function so far + // (either the same as x_ or something in the current line search.) + Vector<Real> deriv_; // The most recently evaluated derivative-- at x_k. + Vector<Real> temp_; + Real f_; // The function evaluated at x_k. + Real best_f_; // the best objective function so far. + Real d_; // a number d > 1.0, but during an iteration we may decrease this, when + // we switch between armijo and wolfe failures. + + int num_wolfe_i_failures_; // the num times we decreased step size. + int num_wolfe_ii_failures_; // the num times we increased step size. + enum { kWolfeI, kWolfeII, kNone } last_failure_type_; // last type of step-search + // failure on this iter. + + Vector<Real> H_; // Current inverse-Hessian estimate. May be computed by this class itself, + // or provided by user using 2nd form of SetGradientInfo(). + Matrix<Real> data_; // dimension (m*2) x dim. Even rows store + // gradients y_i, odd rows store steps s_i. + Vector<Real> rho_; // dimension m; rho_(m) = 1/(y_m^T s_m), Eq. 7.17. + + std::vector<Real> step_lengths_; // The step sizes we took on the last + // (up to m) iterations; these are not stored in a rotating buffer but + // are shifted by one each time (this is more convenient when we + // restart, as we keep this info past restarting). + + +}; + +/// @} + + +} // end namespace kaldi + + + +#endif + diff --git a/kaldi_io/src/kaldi/matrix/packed-matrix.h b/kaldi_io/src/kaldi/matrix/packed-matrix.h new file mode 100644 index 0000000..722d932 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/packed-matrix.h @@ -0,0 +1,197 @@ +// matrix/packed-matrix.h + +// Copyright 2009-2013 Ondrej Glembek; Lukas Burget; Microsoft Corporation; +// Saarland University; Yanmin Qian; +// Johns Hopkins University (Author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_PACKED_MATRIX_H_ +#define KALDI_MATRIX_PACKED_MATRIX_H_ + +#include "matrix/matrix-common.h" +#include <algorithm> + +namespace kaldi { + +/// \addtogroup matrix_funcs_io +// we need to declare the friend << operator here +template<typename Real> +std::ostream & operator <<(std::ostream & out, const PackedMatrix<Real>& M); + + +/// \addtogroup matrix_group +/// @{ + +/// @brief Packed matrix: base class for triangular and symmetric matrices. +template<typename Real> class PackedMatrix { + friend class CuPackedMatrix<Real>; + public: + //friend class CuPackedMatrix<Real>; + + PackedMatrix() : data_(NULL), num_rows_(0) {} + + explicit PackedMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero): + data_(NULL) { Resize(r, resize_type); } + + explicit PackedMatrix(const PackedMatrix<Real> &orig) : data_(NULL) { + Resize(orig.num_rows_, kUndefined); + CopyFromPacked(orig); + } + + template<typename OtherReal> + explicit PackedMatrix(const PackedMatrix<OtherReal> &orig) : data_(NULL) { + Resize(orig.NumRows(), kUndefined); + CopyFromPacked(orig); + } + + void SetZero(); /// < Set to zero + void SetUnit(); /// < Set to unit matrix. + void SetRandn(); /// < Set to random values of a normal distribution + + Real Trace() const; + + // Needed for inclusion in std::vector + PackedMatrix<Real> & operator =(const PackedMatrix<Real> &other) { + Resize(other.NumRows()); + CopyFromPacked(other); + return *this; + } + + ~PackedMatrix() { + Destroy(); + } + + /// Set packed matrix to a specified size (can be zero). + /// The value of the new data depends on resize_type: + /// -if kSetZero, the new data will be zero + /// -if kUndefined, the new data will be undefined + /// -if kCopyData, the new data will be the same as the old data in any + /// shared positions, and zero elsewhere. + /// This function takes time proportional to the number of data elements. + void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero); + + void AddToDiag(const Real r); // Adds r to diaginal + + void ScaleDiag(const Real alpha); // Scales diagonal by alpha. + + void SetDiag(const Real alpha); // Sets diagonal to this value. + + template<typename OtherReal> + void CopyFromPacked(const PackedMatrix<OtherReal> &orig); + + /// CopyFromVec just interprets the vector as having the same layout + /// as the packed matrix. Must have the same dimension, i.e. + /// orig.Dim() == (NumRows()*(NumRows()+1)) / 2; + template<typename OtherReal> + void CopyFromVec(const SubVector<OtherReal> &orig); + + Real* Data() { return data_; } + const Real* Data() const { return data_; } + inline MatrixIndexT NumRows() const { return num_rows_; } + inline MatrixIndexT NumCols() const { return num_rows_; } + size_t SizeInBytes() const { + size_t nr = static_cast<size_t>(num_rows_); + return ((nr * (nr+1)) / 2) * sizeof(Real); + } + + //MatrixIndexT Stride() const { return stride_; } + + // This code is duplicated in child classes to avoid extra levels of calls. + Real operator() (MatrixIndexT r, MatrixIndexT c) const { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(num_rows_) && + static_cast<UnsignedMatrixIndexT>(c) < + static_cast<UnsignedMatrixIndexT>(num_rows_) + && c <= r); + return *(data_ + (r * (r + 1)) / 2 + c); + } + + // This code is duplicated in child classes to avoid extra levels of calls. + Real &operator() (MatrixIndexT r, MatrixIndexT c) { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(num_rows_) && + static_cast<UnsignedMatrixIndexT>(c) < + static_cast<UnsignedMatrixIndexT>(num_rows_) + && c <= r); + return *(data_ + (r * (r + 1)) / 2 + c); + } + + Real Max() const { + KALDI_ASSERT(num_rows_ > 0); + return * (std::max_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) )); + } + + Real Min() const { + KALDI_ASSERT(num_rows_ > 0); + return * (std::min_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) )); + } + + void Scale(Real c); + + friend std::ostream & operator << <> (std::ostream & out, + const PackedMatrix<Real> &m); + // Use instead of stream<<*this, if you want to add to existing contents. + // Will throw exception on failure. + void Read(std::istream &in, bool binary, bool add = false); + + void Write(std::ostream &out, bool binary) const; + + void Destroy(); + + /// Swaps the contents of *this and *other. Shallow swap. + void Swap(PackedMatrix<Real> *other); + void Swap(Matrix<Real> *other); + + + protected: + // Will only be called from this class or derived classes. + void AddPacked(const Real alpha, const PackedMatrix<Real>& M); + Real *data_; + MatrixIndexT num_rows_; + //MatrixIndexT stride_; + private: + /// Init assumes the current contents of the class are is invalid (i.e. junk or + /// has already been freed), and it sets the matrixd to newly allocated memory + /// with the specified dimension. dim == 0 is acceptable. The memory contents + /// pointed to by data_ will be undefined. + void Init(MatrixIndexT dim); + +}; +/// @} end "addtogroup matrix_group" + + +/// \addtogroup matrix_funcs_io +/// @{ + +template<typename Real> +std::ostream & operator << (std::ostream & os, const PackedMatrix<Real>& M) { + M.Write(os, false); + return os; +} + +template<typename Real> +std::istream & operator >> (std::istream &is, PackedMatrix<Real> &M) { + M.Read(is, false); + return is; +} + +/// @} + +} // namespace kaldi + +#endif + diff --git a/kaldi_io/src/kaldi/matrix/sp-matrix-inl.h b/kaldi_io/src/kaldi/matrix/sp-matrix-inl.h new file mode 100644 index 0000000..1579592 --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/sp-matrix-inl.h @@ -0,0 +1,42 @@ +// matrix/sp-matrix-inl.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_MATRIX_SP_MATRIX_INL_H_ +#define KALDI_MATRIX_SP_MATRIX_INL_H_ + +#include "matrix/tp-matrix.h" + +namespace kaldi { + +// All the lines in this file seem to be declaring template specializations. +// These tell the compiler that we'll implement the templated function +// separately for the different template arguments (float, double). + +template<> +double SolveQuadraticProblem(const SpMatrix<double> &H, const VectorBase<double> &g, + const SolverOptions &opts, VectorBase<double> *x); + +template<> +float SolveQuadraticProblem(const SpMatrix<float> &H, const VectorBase<float> &g, + const SolverOptions &opts, VectorBase<float> *x); + +} // namespace kaldi + + +#endif // KALDI_MATRIX_SP_MATRIX_INL_H_ diff --git a/kaldi_io/src/kaldi/matrix/sp-matrix.h b/kaldi_io/src/kaldi/matrix/sp-matrix.h new file mode 100644 index 0000000..209d24a --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/sp-matrix.h @@ -0,0 +1,524 @@ +// matrix/sp-matrix.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Lukas Burget; +// Saarland University; Ariya Rastrow; Yanmin Qian; +// Jan Silovsky + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_SP_MATRIX_H_ +#define KALDI_MATRIX_SP_MATRIX_H_ + +#include <algorithm> +#include <vector> + +#include "matrix/packed-matrix.h" + +namespace kaldi { + + +/// \addtogroup matrix_group +/// @{ +template<typename Real> class SpMatrix; + + +/** + * @brief Packed symetric matrix class +*/ +template<typename Real> +class SpMatrix : public PackedMatrix<Real> { + friend class CuSpMatrix<Real>; + public: + // so it can use our assignment operator. + friend class std::vector<Matrix<Real> >; + + SpMatrix(): PackedMatrix<Real>() {} + + /// Copy constructor from CUDA version of SpMatrix + /// This is defined in ../cudamatrix/cu-sp-matrix.h + + explicit SpMatrix(const CuSpMatrix<Real> &cu); + + explicit SpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero) + : PackedMatrix<Real>(r, resize_type) {} + + SpMatrix(const SpMatrix<Real> &orig) + : PackedMatrix<Real>(orig) {} + + template<typename OtherReal> + explicit SpMatrix(const SpMatrix<OtherReal> &orig) + : PackedMatrix<Real>(orig) {} + +#ifdef KALDI_PARANOID + explicit SpMatrix(const MatrixBase<Real> & orig, + SpCopyType copy_type = kTakeMeanAndCheck) + : PackedMatrix<Real>(orig.NumRows(), kUndefined) { + CopyFromMat(orig, copy_type); + } +#else + explicit SpMatrix(const MatrixBase<Real> & orig, + SpCopyType copy_type = kTakeMean) + : PackedMatrix<Real>(orig.NumRows(), kUndefined) { + CopyFromMat(orig, copy_type); + } +#endif + + /// Shallow swap. + void Swap(SpMatrix *other); + + inline void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) { + PackedMatrix<Real>::Resize(nRows, resize_type); + } + + void CopyFromSp(const SpMatrix<Real> &other) { + PackedMatrix<Real>::CopyFromPacked(other); + } + + template<typename OtherReal> + void CopyFromSp(const SpMatrix<OtherReal> &other) { + PackedMatrix<Real>::CopyFromPacked(other); + } + +#ifdef KALDI_PARANOID + void CopyFromMat(const MatrixBase<Real> &orig, + SpCopyType copy_type = kTakeMeanAndCheck); +#else // different default arg if non-paranoid mode. + void CopyFromMat(const MatrixBase<Real> &orig, + SpCopyType copy_type = kTakeMean); +#endif + + inline Real operator() (MatrixIndexT r, MatrixIndexT c) const { + // if column is less than row, then swap these as matrix is stored + // as upper-triangular... only allowed for const matrix object. + if (static_cast<UnsignedMatrixIndexT>(c) > + static_cast<UnsignedMatrixIndexT>(r)) + std::swap(c, r); + // c<=r now so don't have to check c. + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(this->num_rows_)); + return *(this->data_ + (r*(r+1)) / 2 + c); + // Duplicating code from PackedMatrix.h + } + + inline Real &operator() (MatrixIndexT r, MatrixIndexT c) { + if (static_cast<UnsignedMatrixIndexT>(c) > + static_cast<UnsignedMatrixIndexT>(r)) + std::swap(c, r); + // c<=r now so don't have to check c. + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(this->num_rows_)); + return *(this->data_ + (r * (r + 1)) / 2 + c); + // Duplicating code from PackedMatrix.h + } + + using PackedMatrix<Real>::operator =; + using PackedMatrix<Real>::Scale; + + /// matrix inverse. + /// if inverse_needed = false, will fill matrix with garbage. + /// (only useful if logdet wanted). + void Invert(Real *logdet = NULL, Real *det_sign= NULL, + bool inverse_needed = true); + + // Below routine does inversion in double precision, + // even for single-precision object. + void InvertDouble(Real *logdet = NULL, Real *det_sign = NULL, + bool inverse_needed = true); + + /// Returns maximum ratio of singular values. + inline Real Cond() const { + Matrix<Real> tmp(*this); + return tmp.Cond(); + } + + /// Takes matrix to a fraction power via Svd. + /// Will throw exception if matrix is not positive semidefinite + /// (to within a tolerance) + void ApplyPow(Real exponent); + + /// This is the version of SVD that we implement for symmetric positive + /// definite matrices. This exists for historical reasons; right now its + /// internal implementation is the same as Eig(). It computes the eigenvalue + /// decomposition (*this) = P * diag(s) * P^T with P orthogonal. Will throw + /// exception if input is not positive semidefinite to within a tolerance. + void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P, + Real tolerance = 0.001) const; + + /// Solves the symmetric eigenvalue problem: at end we should have (*this) = P + /// * diag(s) * P^T. We solve the problem using the symmetric QR method. + /// P may be NULL. + /// Implemented in qr.cc. + /// If you need the eigenvalues sorted, the function SortSvd declared in + /// kaldi-matrix is suitable. + void Eig(VectorBase<Real> *s, MatrixBase<Real> *P = NULL) const; + + /// This function gives you, approximately, the largest eigenvalues of the + /// symmetric matrix and the corresponding eigenvectors. (largest meaning, + /// further from zero). It does this by doing a SVD within the Krylov + /// subspace generated by this matrix and a random vector. This is + /// a form of the Lanczos method with complete reorthogonalization, followed + /// by SVD within a smaller dimension ("lanczos_dim"). + /// + /// If *this is m by m, s should be of dimension n and P should be of + /// dimension m by n, with n <= m. The *columns* of P are the approximate + /// eigenvectors; P * diag(s) * P^T would be a low-rank reconstruction of + /// *this. The columns of P will be orthogonal, and the elements of s will be + /// the eigenvalues of *this projected into that subspace, but beyond that + /// there are no exact guarantees. (This is because the convergence of this + /// method is statistical). Note: it only makes sense to use this + /// method if you are in very high dimension and n is substantially smaller + /// than m: for example, if you want the 100 top eigenvalues of a 10k by 10k + /// matrix. This function calls Rand() to initialize the lanczos + /// iterations and also for restarting. + /// If lanczos_dim is zero, it will default to the greater of: + /// s->Dim() + 50 or s->Dim() + s->Dim()/2, but not more than this->Dim(). + /// If lanczos_dim == this->Dim(), you might as well just call the function + /// Eig() since the result will be the same, and Eig() would be faster; the + /// whole point of this function is to reduce the dimension of the SVD + /// computation. + void TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P, + MatrixIndexT lanczos_dim = 0) const; + + + + /// Takes log of the matrix (does eigenvalue decomposition then takes + /// log of eigenvalues and reconstructs). Will throw of not +ve definite. + void Log(); + + + // Takes exponential of the matrix (equivalent to doing eigenvalue + // decomposition then taking exp of eigenvalues and reconstructing). + void Exp(); + + /// Returns the maximum of the absolute values of any of the + /// eigenvalues. + Real MaxAbsEig() const; + + void PrintEigs(const char *name) { + Vector<Real> s((*this).NumRows()); + Matrix<Real> P((*this).NumRows(), (*this).NumCols()); + SymPosSemiDefEig(&s, &P); + KALDI_LOG << "PrintEigs: " << name << ": " << s; + } + + bool IsPosDef() const; // returns true if Cholesky succeeds. + void AddSp(const Real alpha, const SpMatrix<Real> &Ma) { + this->AddPacked(alpha, Ma); + } + + /// Computes log determinant but only for +ve-def matrices + /// (it uses Cholesky). + /// If matrix is not +ve-def, it will throw an exception + /// was LogPDDeterminant() + Real LogPosDefDet() const; + + Real LogDet(Real *det_sign = NULL) const; + + /// rank-one update, this <-- this + alpha v v' + template<typename OtherReal> + void AddVec2(const Real alpha, const VectorBase<OtherReal> &v); + + /// rank-two update, this <-- this + alpha (v w' + w v'). + void AddVecVec(const Real alpha, const VectorBase<Real> &v, + const VectorBase<Real> &w); + + /// Does *this = beta * *thi + alpha * diag(v) * S * diag(v) + void AddVec2Sp(const Real alpha, const VectorBase<Real> &v, + const SpMatrix<Real> &S, const Real beta); + + /// diagonal update, this <-- this + diag(v) + template<typename OtherReal> + void AddDiagVec(const Real alpha, const VectorBase<OtherReal> &v); + + /// rank-N update: + /// if (transM == kNoTrans) + /// (*this) = beta*(*this) + alpha * M * M^T, + /// or (if transM == kTrans) + /// (*this) = beta*(*this) + alpha * M^T * M + /// Note: beta used to default to 0.0. + void AddMat2(const Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType transM, const Real beta); + + /// Extension of rank-N update: + /// this <-- beta*this + alpha * M * A * M^T. + /// (*this) and A are allowed to be the same. + /// If transM == kTrans, then we do it as M^T * A * M. + void AddMat2Sp(const Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType transM, const SpMatrix<Real> &A, + const Real beta = 0.0); + + /// This is a version of AddMat2Sp specialized for when M is fairly sparse. + /// This was required for making the raw-fMLLR code efficient. + void AddSmat2Sp(const Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType transM, const SpMatrix<Real> &A, + const Real beta = 0.0); + + /// The following function does: + /// this <-- beta*this + alpha * T * A * T^T. + /// (*this) and A are allowed to be the same. + /// If transM == kTrans, then we do it as alpha * T^T * A * T. + /// Currently it just calls AddMat2Sp, but if needed we + /// can implement it more efficiently. + void AddTp2Sp(const Real alpha, const TpMatrix<Real> &T, + MatrixTransposeType transM, const SpMatrix<Real> &A, + const Real beta = 0.0); + + /// The following function does: + /// this <-- beta*this + alpha * T * T^T. + /// (*this) and A are allowed to be the same. + /// If transM == kTrans, then we do it as alpha * T^T * T + /// Currently it just calls AddMat2, but if needed we + /// can implement it more efficiently. + void AddTp2(const Real alpha, const TpMatrix<Real> &T, + MatrixTransposeType transM, const Real beta = 0.0); + + /// Extension of rank-N update: + /// this <-- beta*this + alpha * M * diag(v) * M^T. + /// if transM == kTrans, then + /// this <-- beta*this + alpha * M^T * diag(v) * M. + void AddMat2Vec(const Real alpha, const MatrixBase<Real> &M, + MatrixTransposeType transM, const VectorBase<Real> &v, + const Real beta = 0.0); + + + /// Floors this symmetric matrix to the matrix + /// alpha * Floor, where the matrix Floor is positive + /// definite. + /// It is floored in the sense that after flooring, + /// x^T (*this) x >= x^T (alpha*Floor) x. + /// This is accomplished using an Svd. It will crash + /// if Floor is not positive definite. Returns the number of + /// elements that were floored. + int ApplyFloor(const SpMatrix<Real> &Floor, Real alpha = 1.0, + bool verbose = false); + + /// Floor: Given a positive semidefinite matrix, floors the eigenvalues + /// to the specified quantity. A previous version of this function had + /// a tolerance which is now no longer needed since we have code to + /// do the symmetric eigenvalue decomposition and no longer use the SVD + /// code for that purose. + int ApplyFloor(Real floor); + + bool IsDiagonal(Real cutoff = 1.0e-05) const; + bool IsUnit(Real cutoff = 1.0e-05) const; + bool IsZero(Real cutoff = 1.0e-05) const; + bool IsTridiagonal(Real cutoff = 1.0e-05) const; + + /// sqrt of sum of square elements. + Real FrobeniusNorm() const; + + /// Returns true if ((*this)-other).FrobeniusNorm() <= + /// tol*(*this).FrobeniusNorma() + bool ApproxEqual(const SpMatrix<Real> &other, float tol = 0.01) const; + + // LimitCond: + // Limits the condition of symmetric positive semidefinite matrix to + // a specified value + // by flooring all eigenvalues to a positive number which is some multiple + // of the largest one (or zero if there are no positive eigenvalues). + // Takes the condition number we are willing to accept, and floors + // eigenvalues to the largest eigenvalue divided by this. + // Returns #eigs floored or already equal to the floor. + // Throws exception if input is not positive definite. + // returns #floored. + MatrixIndexT LimitCond(Real maxCond = 1.0e+5, bool invert = false); + + // as LimitCond but all done in double precision. // returns #floored. + MatrixIndexT LimitCondDouble(Real maxCond = 1.0e+5, bool invert = false) { + SpMatrix<double> dmat(*this); + MatrixIndexT ans = dmat.LimitCond(maxCond, invert); + (*this).CopyFromSp(dmat); + return ans; + } + Real Trace() const; + + /// Tridiagonalize the matrix with an orthogonal transformation. If + /// *this starts as S, produce T (and Q, if non-NULL) such that + /// T = Q A Q^T, i.e. S = Q^T T Q. Caution: this is the other way + /// round from most authors (it's more efficient in row-major indexing). + void Tridiagonalize(MatrixBase<Real> *Q); + + /// The symmetric QR algorithm. This will mostly be useful in internal code. + /// Typically, you will call this after Tridiagonalize(), on the same object. + /// When called, *this (call it A at this point) must be tridiagonal; at exit, + /// *this will be a diagonal matrix D that is similar to A via orthogonal + /// transformations. This algorithm right-multiplies Q by orthogonal + /// transformations. It turns *this from a tridiagonal into a diagonal matrix + /// while maintaining that (Q *this Q^T) has the same value at entry and exit. + /// At entry Q should probably be either NULL or orthogonal, but we don't check + /// this. + void Qr(MatrixBase<Real> *Q); + + private: + void EigInternal(VectorBase<Real> *s, MatrixBase<Real> *P, + Real tolerance, int recurse) const; +}; + +/// @} end of "addtogroup matrix_group" + +/// \addtogroup matrix_funcs_scalar +/// @{ + + +/// Returns tr(A B). +float TraceSpSp(const SpMatrix<float> &A, const SpMatrix<float> &B); +double TraceSpSp(const SpMatrix<double> &A, const SpMatrix<double> &B); + + +template<typename Real> +inline bool ApproxEqual(const SpMatrix<Real> &A, + const SpMatrix<Real> &B, Real tol = 0.01) { + return A.ApproxEqual(B, tol); +} + +template<typename Real> +inline void AssertEqual(const SpMatrix<Real> &A, + const SpMatrix<Real> &B, Real tol = 0.01) { + KALDI_ASSERT(ApproxEqual(A, B, tol)); +} + + + +/// Returns tr(A B). +template<typename Real, typename OtherReal> +Real TraceSpSp(const SpMatrix<Real> &A, const SpMatrix<OtherReal> &B); + + + +// TraceSpSpLower is the same as Trace(A B) except the lower-diagonal elements +// are counted only once not twice as they should be. It is useful in certain +// optimizations. +template<typename Real> +Real TraceSpSpLower(const SpMatrix<Real> &A, const SpMatrix<Real> &B); + + +/// Returns tr(A B). +/// No option to transpose B because would make no difference. +template<typename Real> +Real TraceSpMat(const SpMatrix<Real> &A, const MatrixBase<Real> &B); + +/// Returns tr(A B C) +/// (A and C may be transposed as specified by transA and transC). +template<typename Real> +Real TraceMatSpMat(const MatrixBase<Real> &A, MatrixTransposeType transA, + const SpMatrix<Real> &B, const MatrixBase<Real> &C, + MatrixTransposeType transC); + +/// Returns tr (A B C D) +/// (A and C may be transposed as specified by transA and transB). +template<typename Real> +Real TraceMatSpMatSp(const MatrixBase<Real> &A, MatrixTransposeType transA, + const SpMatrix<Real> &B, const MatrixBase<Real> &C, + MatrixTransposeType transC, const SpMatrix<Real> &D); + +/** Computes v1^T * M * v2. Not as efficient as it could be where v1 == v2 + * (but no suitable blas routines available). + */ + +/// Returns \f$ v_1^T M v_2 \f$ +/// Not as efficient as it could be where v1 == v2. +template<typename Real> +Real VecSpVec(const VectorBase<Real> &v1, const SpMatrix<Real> &M, + const VectorBase<Real> &v2); + + +/// @} \addtogroup matrix_funcs_scalar + +/// \addtogroup matrix_funcs_misc +/// @{ + + +/// This class describes the options for maximizing various quadratic objective +/// functions. It's mostly as described in the SGMM paper "the subspace +/// Gaussian mixture model -- a structured model for speech recognition", but +/// the diagonal_precondition option is newly added, to handle problems where +/// different dimensions have very different scaling (we recommend to use the +/// option but it's set false for back compatibility). +struct SolverOptions { + BaseFloat K; // maximum condition number + BaseFloat eps; + std::string name; + bool optimize_delta; + bool diagonal_precondition; + bool print_debug_output; + explicit SolverOptions(const std::string &name): + K(1.0e+4), eps(1.0e-40), name(name), + optimize_delta(true), diagonal_precondition(false), + print_debug_output(true) { } + SolverOptions(): K(1.0e+4), eps(1.0e-40), name("[unknown]"), + optimize_delta(true), diagonal_precondition(false), + print_debug_output(true) { } + void Check() const; +}; + + +/// Maximizes the auxiliary function +/// \f[ Q(x) = x.g - 0.5 x^T H x \f] +/// using a numerically stable method. Like a numerically stable version of +/// \f$ x := Q^{-1} g. \f$ +/// Assumes H positive semidefinite. +/// Returns the objective-function change. + +template<typename Real> +Real SolveQuadraticProblem(const SpMatrix<Real> &H, + const VectorBase<Real> &g, + const SolverOptions &opts, + VectorBase<Real> *x); + + + +/// Maximizes the auxiliary function : +/// \f[ Q(x) = tr(M^T P Y) - 0.5 tr(P M Q M^T) \f] +/// Like a numerically stable version of \f$ M := Y Q^{-1} \f$. +/// Assumes Q and P positive semidefinite, and matrix dimensions match +/// enough to make expressions meaningful. +/// This is mostly as described in the SGMM paper "the subspace Gaussian mixture +/// model -- a structured model for speech recognition", but the +/// diagonal_precondition option is newly added, to handle problems +/// where different dimensions have very different scaling (we recommend to use +/// the option but it's set false for back compatibility). +template<typename Real> +Real SolveQuadraticMatrixProblem(const SpMatrix<Real> &Q, + const MatrixBase<Real> &Y, + const SpMatrix<Real> &P, + const SolverOptions &opts, + MatrixBase<Real> *M); + +/// Maximizes the auxiliary function : +/// \f[ Q(M) = tr(M^T G) -0.5 tr(P_1 M Q_1 M^T) -0.5 tr(P_2 M Q_2 M^T). \f] +/// Encountered in matrix update with a prior. We also apply a limit on the +/// condition but it should be less frequently necessary, and can be set larger. +template<typename Real> +Real SolveDoubleQuadraticMatrixProblem(const MatrixBase<Real> &G, + const SpMatrix<Real> &P1, + const SpMatrix<Real> &P2, + const SpMatrix<Real> &Q1, + const SpMatrix<Real> &Q2, + const SolverOptions &opts, + MatrixBase<Real> *M); + + +/// @} End of "addtogroup matrix_funcs_misc" + +} // namespace kaldi + + +// Including the implementation (now actually just includes some +// template specializations). +#include "matrix/sp-matrix-inl.h" + + +#endif // KALDI_MATRIX_SP_MATRIX_H_ + diff --git a/kaldi_io/src/kaldi/matrix/srfft.h b/kaldi_io/src/kaldi/matrix/srfft.h new file mode 100644 index 0000000..c0d36af --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/srfft.h @@ -0,0 +1,132 @@ +// matrix/srfft.h + +// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc. +// 2014 Daniel Povey +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +// +// This file includes a modified version of code originally published in Malvar, +// H., "Signal processing with lapped transforms, " Artech House, Inc., 1992. The +// current copyright holder of the original code, Henrique S. Malvar, has given +// his permission for the release of this modified version under the Apache +// License v2.0. + +#ifndef KALDI_MATRIX_SRFFT_H_ +#define KALDI_MATRIX_SRFFT_H_ + +#include "matrix/kaldi-vector.h" +#include "matrix/kaldi-matrix.h" + +namespace kaldi { + +/// @addtogroup matrix_funcs_misc +/// @{ + + +// This class is based on code by Henrique (Rico) Malvar, from his book +// "Signal Processing with Lapped Transforms" (1992). Copied with +// permission, optimized by Go Vivace Inc., and converted into C++ by +// Microsoft Corporation +// This is a more efficient way of doing the complex FFT than ComplexFft +// (declared in matrix-functios.h), but it only works for powers of 2. +// Note: in multi-threaded code, you would need to have one of these objects per +// thread, because multiple calls to Compute in parallel would not work. +template<typename Real> +class SplitRadixComplexFft { + public: + typedef MatrixIndexT Integer; + + // N is the number of complex points (must be a power of two, or this + // will crash). Note that the constructor does some work so it's best to + // initialize the object once and do the computation many times. + SplitRadixComplexFft(Integer N); + + // Does the FFT computation, given pointers to the real and + // imaginary parts. If "forward", do the forward FFT; else + // do the inverse FFT (without the 1/N factor). + // xr and xi are pointers to zero-based arrays of size N, + // containing the real and imaginary parts + // respectively. + void Compute(Real *xr, Real *xi, bool forward) const; + + // This version of Compute takes a single array of size N*2, + // containing [ r0 im0 r1 im1 ... ]. Otherwise its behavior is the + // same as the version above. + void Compute(Real *x, bool forward); + + + // This version of Compute is const; it operates on an array of size N*2 + // containing [ r0 im0 r1 im1 ... ], but it uses the argument "temp_buffer" as + // temporary storage instead of a class-member variable. It will allocate it if + // needed. + void Compute(Real *x, bool forward, std::vector<Real> *temp_buffer) const; + + ~SplitRadixComplexFft(); + + protected: + // temp_buffer_ is allocated only if someone calls Compute with only one Real* + // argument and we need a temporary buffer while creating interleaved data. + std::vector<Real> temp_buffer_; + private: + void ComputeTables(); + void ComputeRecursive(Real *xr, Real *xi, Integer logn) const; + void BitReversePermute(Real *x, Integer logn) const; + + Integer N_; + Integer logn_; // log(N) + + Integer *brseed_; + // brseed is Evans' seed table, ref: (Ref: D. M. W. + // Evans, "An improved digit-reversal permutation algorithm ...", + // IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125). + Real **tab_; // Tables of butterfly coefficients. + + KALDI_DISALLOW_COPY_AND_ASSIGN(SplitRadixComplexFft); +}; + +template<typename Real> +class SplitRadixRealFft: private SplitRadixComplexFft<Real> { + public: + SplitRadixRealFft(MatrixIndexT N): // will fail unless N>=4 and N is a power of 2. + SplitRadixComplexFft<Real> (N/2), N_(N) { } + + /// If forward == true, this function transforms from a sequence of N real points to its complex fourier + /// transform; otherwise it goes in the reverse direction. If you call it + /// in the forward and then reverse direction and multiply by 1.0/N, you + /// will get back the original data. + /// The interpretation of the complex-FFT data is as follows: the array + /// is a sequence of complex numbers C_n of length N/2 with (real, im) format, + /// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...]. + void Compute(Real *x, bool forward); + + + /// This is as the other Compute() function, but it is a const version that + /// uses a user-supplied buffer. + void Compute(Real *x, bool forward, std::vector<Real> *temp_buffer) const; + + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(SplitRadixRealFft); + int N_; +}; + + +/// @} end of "addtogroup matrix_funcs_misc" + +} // end namespace kaldi + + +#endif + diff --git a/kaldi_io/src/kaldi/matrix/tp-matrix.h b/kaldi_io/src/kaldi/matrix/tp-matrix.h new file mode 100644 index 0000000..f43e86c --- /dev/null +++ b/kaldi_io/src/kaldi/matrix/tp-matrix.h @@ -0,0 +1,131 @@ +// matrix/tp-matrix.h + +// Copyright 2009-2011 Ondrej Glembek; Lukas Burget; Microsoft Corporation; +// Saarland University; Yanmin Qian; Haihua Xu +// 2013 Johns Hopkins Universith (author: Daniel Povey) + + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_TP_MATRIX_H_ +#define KALDI_MATRIX_TP_MATRIX_H_ + + +#include "matrix/packed-matrix.h" + +namespace kaldi { +/// \addtogroup matrix_group +/// @{ + +template<typename Real> class TpMatrix; + +/// @brief Packed symetric matrix class +template<typename Real> +class TpMatrix : public PackedMatrix<Real> { + friend class CuTpMatrix<float>; + friend class CuTpMatrix<double>; + public: + TpMatrix() : PackedMatrix<Real>() {} + explicit TpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero) + : PackedMatrix<Real>(r, resize_type) {} + TpMatrix(const TpMatrix<Real>& orig) : PackedMatrix<Real>(orig) {} + + /// Copy constructor from CUDA TpMatrix + /// This is defined in ../cudamatrix/cu-tp-matrix.cc + explicit TpMatrix(const CuTpMatrix<Real> &cu); + + + template<typename OtherReal> explicit TpMatrix(const TpMatrix<OtherReal>& orig) + : PackedMatrix<Real>(orig) {} + + Real operator() (MatrixIndexT r, MatrixIndexT c) const { + if (static_cast<UnsignedMatrixIndexT>(c) > + static_cast<UnsignedMatrixIndexT>(r)) { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(c) < + static_cast<UnsignedMatrixIndexT>(this->num_rows_)); + return 0; + } + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(this->num_rows_)); + // c<=r now so don't have to check c. + return *(this->data_ + (r*(r+1)) / 2 + c); + // Duplicating code from PackedMatrix.h + } + + Real &operator() (MatrixIndexT r, MatrixIndexT c) { + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < + static_cast<UnsignedMatrixIndexT>(this->num_rows_)); + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(c) <= + static_cast<UnsignedMatrixIndexT>(r) && + "you cannot access the upper triangle of TpMatrix using " + "a non-const matrix object."); + return *(this->data_ + (r*(r+1)) / 2 + c); + // Duplicating code from PackedMatrix.h + } + // Note: Cholesky may throw std::runtime_error + void Cholesky(const SpMatrix<Real>& orig); + + void Invert(); + + // Inverts in double precision. + void InvertDouble() { + TpMatrix<double> dmat(*this); + dmat.Invert(); + (*this).CopyFromTp(dmat); + } + + /// Shallow swap + void Swap(TpMatrix<Real> *other); + + /// Returns the determinant of the matrix (product of diagonals) + Real Determinant(); + + /// CopyFromMat copies the lower triangle of M into *this + /// (or the upper triangle, if Trans == kTrans). + void CopyFromMat(const MatrixBase<Real> &M, + MatrixTransposeType Trans = kNoTrans); + + /// This is implemented in ../cudamatrix/cu-tp-matrix.cc + void CopyFromMat(const CuTpMatrix<Real> &other); + + /// CopyFromTp copies another triangular matrix into this one. + void CopyFromTp(const TpMatrix<Real> &other) { + PackedMatrix<Real>::CopyFromPacked(other); + } + + template<typename OtherReal> void CopyFromTp(const TpMatrix<OtherReal> &other) { + PackedMatrix<Real>::CopyFromPacked(other); + } + + /// AddTp does *this += alpha * M. + void AddTp(const Real alpha, const TpMatrix<Real> &M) { + this->AddPacked(alpha, M); + } + + using PackedMatrix<Real>::operator =; + using PackedMatrix<Real>::Scale; + + void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) { + PackedMatrix<Real>::Resize(nRows, resize_type); + } +}; + +/// @} end of "addtogroup matrix_group". + +} // namespace kaldi + + +#endif + |