From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/kaldi/itf/clusterable-itf.h | 97 ++++++++++++++++++++++ kaldi_io/src/kaldi/itf/context-dep-itf.h | 80 ++++++++++++++++++ kaldi_io/src/kaldi/itf/decodable-itf.h | 123 ++++++++++++++++++++++++++++ kaldi_io/src/kaldi/itf/online-feature-itf.h | 105 ++++++++++++++++++++++++ kaldi_io/src/kaldi/itf/optimizable-itf.h | 51 ++++++++++++ kaldi_io/src/kaldi/itf/options-itf.h | 49 +++++++++++ 6 files changed, 505 insertions(+) create mode 100644 kaldi_io/src/kaldi/itf/clusterable-itf.h create mode 100644 kaldi_io/src/kaldi/itf/context-dep-itf.h create mode 100644 kaldi_io/src/kaldi/itf/decodable-itf.h create mode 100644 kaldi_io/src/kaldi/itf/online-feature-itf.h create mode 100644 kaldi_io/src/kaldi/itf/optimizable-itf.h create mode 100644 kaldi_io/src/kaldi/itf/options-itf.h (limited to 'kaldi_io/src/kaldi/itf') diff --git a/kaldi_io/src/kaldi/itf/clusterable-itf.h b/kaldi_io/src/kaldi/itf/clusterable-itf.h new file mode 100644 index 0000000..7ef9ae0 --- /dev/null +++ b/kaldi_io/src/kaldi/itf/clusterable-itf.h @@ -0,0 +1,97 @@ +// itf/clusterable-itf.h + +// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc. + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#ifndef KALDI_ITF_CLUSTERABLE_ITF_H_ +#define KALDI_ITF_CLUSTERABLE_ITF_H_ 1 + +#include +#include "base/kaldi-common.h" + +namespace kaldi { + + +/** \addtogroup clustering_group + @{ + A virtual class for clusterable objects; see \ref clustering for an + explanation if its function. +*/ + + + +class Clusterable { + public: + /// \name Functions that must be overridden + /// @{ + + /// Return a copy of this object. + virtual Clusterable *Copy() const = 0; + /// Return the objective function associated with the stats + /// [assuming ML estimation] + virtual BaseFloat Objf() const = 0; + /// Return the normalizer (typically, count) associated with the stats + virtual BaseFloat Normalizer() const = 0; + /// Set stats to empty. + virtual void SetZero() = 0; + /// Add other stats. + virtual void Add(const Clusterable &other) = 0; + /// Subtract other stats. + virtual void Sub(const Clusterable &other) = 0; + /// Scale the stats by a positive number f [not mandatory to supply this]. + virtual void Scale(BaseFloat f) { + KALDI_ERR << "This Clusterable object does not implement Scale()."; + } + + /// Return a string that describes the inherited type. + virtual std::string Type() const = 0; + + /// Write data to stream. + virtual void Write(std::ostream &os, bool binary) const = 0; + + /// Read data from a stream and return the corresponding object (const + /// function; it's a class member because we need access to the vtable + /// so generic code can read derived types). + virtual Clusterable* ReadNew(std::istream &os, bool binary) const = 0; + + virtual ~Clusterable() {} + + /// @} + + /// \name Functions that have default implementations + /// @{ + + // These functions have default implementations (but may be overridden for + // speed). Implementatons in tree/clusterable-classes.cc + + /// Return the objective function of the combined object this + other. + virtual BaseFloat ObjfPlus(const Clusterable &other) const; + /// Return the objective function of the subtracted object this - other. + virtual BaseFloat ObjfMinus(const Clusterable &other) const; + /// Return the objective function decrease from merging the two + /// clusters, negated to be a positive number (or zero). + virtual BaseFloat Distance(const Clusterable &other) const; + /// @} + +}; +/// @} end of "ingroup clustering_group" + +} // end namespace kaldi + +#endif // KALDI_ITF_CLUSTERABLE_ITF_H_ + diff --git a/kaldi_io/src/kaldi/itf/context-dep-itf.h b/kaldi_io/src/kaldi/itf/context-dep-itf.h new file mode 100644 index 0000000..6a0bd0f --- /dev/null +++ b/kaldi_io/src/kaldi/itf/context-dep-itf.h @@ -0,0 +1,80 @@ +// itf/context-dep-itf.h + +// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc. + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#ifndef KALDI_ITF_CONTEXT_DEP_ITF_H_ +#define KALDI_ITF_CONTEXT_DEP_ITF_H_ +#include "base/kaldi-common.h" + +namespace kaldi { +/// @ingroup tree_group +/// @{ + +/// context-dep-itf.h provides a link between +/// the tree-building code in ../tree/, and the FST code in ../fstext/ +/// (particularly, ../fstext/context-dep.h). It is an abstract +/// interface that describes an object that can map from a +/// phone-in-context to a sequence of integer leaf-ids. +class ContextDependencyInterface { + public: + /// ContextWidth() returns the value N (e.g. 3 for triphone models) that says how many phones + /// are considered for computing context. + virtual int ContextWidth() const = 0; + + /// Central position P of the phone context, in 0-based numbering, e.g. P = 1 for typical + /// triphone system. We have to see if we can do without this function. + virtual int CentralPosition() const = 0; + + /// The "new" Compute interface. For typical topologies, + /// pdf_class would be 0, 1, 2. + /// Returns success or failure; outputs the pdf-id. + /// + /// "Compute" is the main function of this interface, that takes a + /// sequence of N phones (and it must be N phones), possibly + /// including epsilons (symbol id zero) but only at positions other + /// than P [these represent unknown phone context due to end or + /// begin of sequence]. We do not insist that Compute must always + /// output (into stateseq) a nonempty sequence of states, but we + /// anticipate that stateseq will alyway be nonempty at output in + /// typical use cases. "Compute" returns false if expansion somehow + /// failed. Normally the calling code should raise an exception if + /// this happens. We can define a different interface later in + /// order to handle other kinds of information-- the underlying + /// data-structures from event-map.h are very flexible. + virtual bool Compute(const std::vector &phoneseq, int32 pdf_class, + int32 *pdf_id) const = 0; + + + + /// NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1). + virtual int32 NumPdfs() const = 0; + + virtual ~ContextDependencyInterface() {}; + ContextDependencyInterface() {} + + /// Returns pointer to new object which is copy of current one. + virtual ContextDependencyInterface *Copy() const = 0; + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependencyInterface); +}; +/// @} +} // namespace Kaldi + + +#endif diff --git a/kaldi_io/src/kaldi/itf/decodable-itf.h b/kaldi_io/src/kaldi/itf/decodable-itf.h new file mode 100644 index 0000000..ba4d765 --- /dev/null +++ b/kaldi_io/src/kaldi/itf/decodable-itf.h @@ -0,0 +1,123 @@ +// itf/decodable-itf.h + +// Copyright 2009-2011 Microsoft Corporation; Saarland University; +// Mirko Hannemann; Go Vivace Inc.; +// 2013 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_ITF_DECODABLE_ITF_H_ +#define KALDI_ITF_DECODABLE_ITF_H_ 1 +#include "base/kaldi-common.h" + +namespace kaldi { +/// @ingroup Interfaces +/// @{ + + +/** + DecodableInterface provides a link between the (acoustic-modeling and + feature-processing) code and the decoder. The idea is to make this + interface as small as possible, and to make it as agnostic as possible about + the form of the acoustic model (e.g. don't assume the probabilities are a + function of just a vector of floats), and about the decoder (e.g. don't + assume it accesses frames in strict left-to-right order). For normal + models, without on-line operation, the "decodable" sub-class will just be a + wrapper around a matrix of features and an acoustic model, and it will + answer the question 'what is the acoustic likelihood for this index and this + frame?'. + + For online decoding, where the features are coming in in real time, it is + important to understand the IsLastFrame() and NumFramesReady() functions. + There are two ways these are used: the old online-decoding code, in ../online/, + and the new online-decoding code, in ../online2/. In the old online-decoding + code, the decoder would do: + \code{.cc} + for (int frame = 0; !decodable.IsLastFrame(frame); frame++) { + // Process this frame + } + \endcode + and the the call to IsLastFrame would block if the features had not arrived yet. + The decodable object would have to know when to terminate the decoding. This + online-decoding mode is still supported, it is what happens when you call, for + example, LatticeFasterDecoder::Decode(). + + We realized that this "blocking" mode of decoding is not very convenient + because it forces the program to be multi-threaded and makes it complex to + control endpointing. In the "new" decoding code, you don't call (for example) + LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(), + and then each time you get more features, you provide them to the decodable + object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does + something like this: + \code{.cc} + while (num_frames_decoded_ < decodable.NumFramesReady()) { + // Decode one more frame [increments num_frames_decoded_] + } + \endcode + So the decodable object never has IsLastFrame() called. For decoding where + you are starting with a matrix of features, the NumFramesReady() function will + always just return the number of frames in the file, and IsLastFrame() will + return true for the last frame. + + For truly online decoding, the "old" online decodable objects in ../online/ have a + "blocking" IsLastFrame() and will crash if you call NumFramesReady(). + The "new" online decodable objects in ../online2/ return the number of frames + currently accessible if you call NumFramesReady(). You will likely not need + to call IsLastFrame(), but we implement it to only return true for the last + frame of the file once we've decided to terminate decoding. +*/ + +class DecodableInterface { + public: + /// Returns the log likelihood, which will be negated in the decoder. + /// The "frame" starts from zero. You should verify that IsLastFrame(frame-1) + /// returns false before calling this. + virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0; + + /// Returns true if this is the last frame. Frames are zero-based, so the + /// first frame is zero. IsLastFrame(-1) will return false, unless the file + /// is empty (which is a case that I'm not sure all the code will handle, so + /// be careful). Caution: the behavior of this function in an online setting + /// is being changed somewhat. In future it may return false in cases where + /// we haven't yet decided to terminate decoding, but later true if we decide + /// to terminate decoding. The plan in future is to rely more on + /// NumFramesReady(), and in future, IsLastFrame() would always return false + /// in an online-decoding setting, and would only return true in a + /// decoding-from-matrix setting where we want to allow the last delta or LDA + /// features to be flushed out for compatibility with the baseline setup. + virtual bool IsLastFrame(int32 frame) const = 0; + + /// The call NumFramesReady() will return the number of frames currently available + /// for this decodable object. This is for use in setups where you don't want the + /// decoder to block while waiting for input. This is newly added as of Jan 2014, + /// and I hope, going forward, to rely on this mechanism more than IsLastFrame to + /// know when to stop decoding. + virtual int32 NumFramesReady() const { + KALDI_ERR << "NumFramesReady() not implemented for this decodable type."; + return -1; + } + + /// Returns the number of states in the acoustic model + /// (they will be indexed one-based, i.e. from 1 to NumIndices(); + /// this is for compatibility with OpenFst. + virtual int32 NumIndices() const = 0; + + virtual ~DecodableInterface() {} +}; +/// @} +} // namespace Kaldi + +#endif // KALDI_ITF_DECODABLE_ITF_H_ diff --git a/kaldi_io/src/kaldi/itf/online-feature-itf.h b/kaldi_io/src/kaldi/itf/online-feature-itf.h new file mode 100644 index 0000000..dafcd8a --- /dev/null +++ b/kaldi_io/src/kaldi/itf/online-feature-itf.h @@ -0,0 +1,105 @@ +// itf/online-feature-itf.h + +// Copyright 2013 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_ITF_ONLINE_FEATURE_ITF_H_ +#define KALDI_ITF_ONLINE_FEATURE_ITF_H_ 1 +#include "base/kaldi-common.h" +#include "matrix/matrix-lib.h" + +namespace kaldi { +/// @ingroup Interfaces +/// @{ + +/** + OnlineFeatureInterface is an interface for online feature processing (it is + also usable in the offline setting, but currently we're not using it for + that). This is for use in the online2/ directory, and it supersedes the + interface in ../online/online-feat-input.h. We have a slighty different + model that puts more control in the hands of the calling thread, and won't + involve waiting on semaphores in the decoding thread. + + This interface only specifies how the object *outputs* the features. + How it obtains the features, e.g. from a previous object or objects of type + OnlineFeatureInterface, is not specified in the interface and you will + likely define new constructors or methods in the derived type to do that. + + You should appreciate that this interface is designed to allow random + access to features, as long as they are ready. That is, the user + can call GetFrame for any frame less than NumFramesReady(), and when + implementing a child class you must not make assumptions about the + order in which the user makes these calls. +*/ + +class OnlineFeatureInterface { + public: + virtual int32 Dim() const = 0; /// returns the feature dimension. + + /// Returns the total number of frames, since the start of the utterance, that + /// are now available. In an online-decoding context, this will likely + /// increase with time as more data becomes available. + virtual int32 NumFramesReady() const = 0; + + /// Returns true if this is the last frame. Frame indices are zero-based, so the + /// first frame is zero. IsLastFrame(-1) will return false, unless the file + /// is empty (which is a case that I'm not sure all the code will handle, so + /// be careful). This function may return false for some frame if + /// we haven't yet decided to terminate decoding, but later true if we decide + /// to terminate decoding. This function exists mainly to correctly handle + /// end effects in feature extraction, and is not a mechanism to determine how + /// many frames are in the decodable object (as it used to be, and for backward + /// compatibility, still is, in the Decodable interface). + virtual bool IsLastFrame(int32 frame) const = 0; + + /// Gets the feature vector for this frame. Before calling this for a given + /// frame, it is assumed that you called NumFramesReady() and it returned a + /// number greater than "frame". Otherwise this call will likely crash with + /// an assert failure. This function is not declared const, in case there is + /// some kind of caching going on, but most of the time it shouldn't modify + /// the class. + virtual void GetFrame(int32 frame, VectorBase *feat) = 0; + + /// Virtual destructor. Note: constructors that take another member of + /// type OnlineFeatureInterface are not expected to take ownership of + /// that pointer; the caller needs to keep track of that manually. + virtual ~OnlineFeatureInterface() { } +}; + + +/// Add a virtual class for "source" features such as MFCC or PLP or pitch +/// features. +class OnlineBaseFeature: public OnlineFeatureInterface { + public: + /// This would be called from the application, when you get more wave data. + /// Note: the sampling_rate is typically only provided so the code can assert + /// that it matches the sampling rate expected in the options. + virtual void AcceptWaveform(BaseFloat sampling_rate, + const VectorBase &waveform) = 0; + + /// InputFinished() tells the class you won't be providing any + /// more waveform. This will help flush out the last few frames + /// of delta or LDA features (it will typically affect the return value + /// of IsLastFrame. + virtual void InputFinished() = 0; +}; + + +/// @} +} // namespace Kaldi + +#endif // KALDI_ITF_ONLINE_FEATURE_ITF_H_ diff --git a/kaldi_io/src/kaldi/itf/optimizable-itf.h b/kaldi_io/src/kaldi/itf/optimizable-itf.h new file mode 100644 index 0000000..1b8f54b --- /dev/null +++ b/kaldi_io/src/kaldi/itf/optimizable-itf.h @@ -0,0 +1,51 @@ +// itf/optimizable-itf.h + +// Copyright 2009-2011 Go Vivace Inc.; Microsoft Corporation; Georg Stemmer + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_ITF_OPTIMIZABLE_ITF_H_ +#define KALDI_ITF_OPTIMIZABLE_ITF_H_ + +#include "base/kaldi-common.h" +#include "matrix/matrix-lib.h" + +namespace kaldi { +/// @ingroup Interfaces +/// @{ + +/// OptimizableInterface provides +/// a virtual class for optimizable objects. +/// E.g. a class that computed a likelihood function and +/// its gradient using some parameter +/// that has to be optimized on data +/// could inherit from it. +template +class OptimizableInterface { + public: + /// computes gradient for a parameter params and returns it + /// in gradient_out + virtual void ComputeGradient(const Vector ¶ms, + Vector *gradient_out) = 0; + /// computes the function value for a parameter params + /// and returns it + virtual Real ComputeValue(const Vector ¶ms) = 0; + + virtual ~OptimizableInterface() {} +}; +/// @} end of "Interfaces" +} // end namespace kaldi + +#endif diff --git a/kaldi_io/src/kaldi/itf/options-itf.h b/kaldi_io/src/kaldi/itf/options-itf.h new file mode 100644 index 0000000..204f46d --- /dev/null +++ b/kaldi_io/src/kaldi/itf/options-itf.h @@ -0,0 +1,49 @@ +// itf/options-itf.h + +// Copyright 2013 Tanel Alumae, Tallinn University of Technology + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_ITF_OPTIONS_ITF_H_ +#define KALDI_ITF_OPTIONS_ITF_H_ 1 +#include "base/kaldi-common.h" + +namespace kaldi { + +class OptionsItf { + public: + + virtual void Register(const std::string &name, + bool *ptr, const std::string &doc) = 0; + virtual void Register(const std::string &name, + int32 *ptr, const std::string &doc) = 0; + virtual void Register(const std::string &name, + uint32 *ptr, const std::string &doc) = 0; + virtual void Register(const std::string &name, + float *ptr, const std::string &doc) = 0; + virtual void Register(const std::string &name, + double *ptr, const std::string &doc) = 0; + virtual void Register(const std::string &name, + std::string *ptr, const std::string &doc) = 0; + + virtual ~OptionsItf() {} +}; + +} // namespace Kaldi + +#endif // KALDI_ITF_OPTIONS_ITF_H_ + + -- cgit v1.2.3-70-g09d2