summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi/itf
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/kaldi/itf')
-rw-r--r--kaldi_io/src/kaldi/itf/clusterable-itf.h97
-rw-r--r--kaldi_io/src/kaldi/itf/context-dep-itf.h80
-rw-r--r--kaldi_io/src/kaldi/itf/decodable-itf.h123
-rw-r--r--kaldi_io/src/kaldi/itf/online-feature-itf.h105
-rw-r--r--kaldi_io/src/kaldi/itf/optimizable-itf.h51
-rw-r--r--kaldi_io/src/kaldi/itf/options-itf.h49
6 files changed, 505 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/itf/clusterable-itf.h b/kaldi_io/src/kaldi/itf/clusterable-itf.h
new file mode 100644
index 0000000..7ef9ae0
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/clusterable-itf.h
@@ -0,0 +1,97 @@
+// itf/clusterable-itf.h
+
+// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_ITF_CLUSTERABLE_ITF_H_
+#define KALDI_ITF_CLUSTERABLE_ITF_H_ 1
+
+#include <string>
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+
+/** \addtogroup clustering_group
+ @{
+ A virtual class for clusterable objects; see \ref clustering for an
+ explanation if its function.
+*/
+
+
+
+class Clusterable {
+ public:
+ /// \name Functions that must be overridden
+ /// @{
+
+ /// Return a copy of this object.
+ virtual Clusterable *Copy() const = 0;
+ /// Return the objective function associated with the stats
+ /// [assuming ML estimation]
+ virtual BaseFloat Objf() const = 0;
+ /// Return the normalizer (typically, count) associated with the stats
+ virtual BaseFloat Normalizer() const = 0;
+ /// Set stats to empty.
+ virtual void SetZero() = 0;
+ /// Add other stats.
+ virtual void Add(const Clusterable &other) = 0;
+ /// Subtract other stats.
+ virtual void Sub(const Clusterable &other) = 0;
+ /// Scale the stats by a positive number f [not mandatory to supply this].
+ virtual void Scale(BaseFloat f) {
+ KALDI_ERR << "This Clusterable object does not implement Scale().";
+ }
+
+ /// Return a string that describes the inherited type.
+ virtual std::string Type() const = 0;
+
+ /// Write data to stream.
+ virtual void Write(std::ostream &os, bool binary) const = 0;
+
+ /// Read data from a stream and return the corresponding object (const
+ /// function; it's a class member because we need access to the vtable
+ /// so generic code can read derived types).
+ virtual Clusterable* ReadNew(std::istream &os, bool binary) const = 0;
+
+ virtual ~Clusterable() {}
+
+ /// @}
+
+ /// \name Functions that have default implementations
+ /// @{
+
+ // These functions have default implementations (but may be overridden for
+ // speed). Implementatons in tree/clusterable-classes.cc
+
+ /// Return the objective function of the combined object this + other.
+ virtual BaseFloat ObjfPlus(const Clusterable &other) const;
+ /// Return the objective function of the subtracted object this - other.
+ virtual BaseFloat ObjfMinus(const Clusterable &other) const;
+ /// Return the objective function decrease from merging the two
+ /// clusters, negated to be a positive number (or zero).
+ virtual BaseFloat Distance(const Clusterable &other) const;
+ /// @}
+
+};
+/// @} end of "ingroup clustering_group"
+
+} // end namespace kaldi
+
+#endif // KALDI_ITF_CLUSTERABLE_ITF_H_
+
diff --git a/kaldi_io/src/kaldi/itf/context-dep-itf.h b/kaldi_io/src/kaldi/itf/context-dep-itf.h
new file mode 100644
index 0000000..6a0bd0f
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/context-dep-itf.h
@@ -0,0 +1,80 @@
+// itf/context-dep-itf.h
+
+// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_ITF_CONTEXT_DEP_ITF_H_
+#define KALDI_ITF_CONTEXT_DEP_ITF_H_
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+/// @ingroup tree_group
+/// @{
+
+/// context-dep-itf.h provides a link between
+/// the tree-building code in ../tree/, and the FST code in ../fstext/
+/// (particularly, ../fstext/context-dep.h). It is an abstract
+/// interface that describes an object that can map from a
+/// phone-in-context to a sequence of integer leaf-ids.
+class ContextDependencyInterface {
+ public:
+ /// ContextWidth() returns the value N (e.g. 3 for triphone models) that says how many phones
+ /// are considered for computing context.
+ virtual int ContextWidth() const = 0;
+
+ /// Central position P of the phone context, in 0-based numbering, e.g. P = 1 for typical
+ /// triphone system. We have to see if we can do without this function.
+ virtual int CentralPosition() const = 0;
+
+ /// The "new" Compute interface. For typical topologies,
+ /// pdf_class would be 0, 1, 2.
+ /// Returns success or failure; outputs the pdf-id.
+ ///
+ /// "Compute" is the main function of this interface, that takes a
+ /// sequence of N phones (and it must be N phones), possibly
+ /// including epsilons (symbol id zero) but only at positions other
+ /// than P [these represent unknown phone context due to end or
+ /// begin of sequence]. We do not insist that Compute must always
+ /// output (into stateseq) a nonempty sequence of states, but we
+ /// anticipate that stateseq will alyway be nonempty at output in
+ /// typical use cases. "Compute" returns false if expansion somehow
+ /// failed. Normally the calling code should raise an exception if
+ /// this happens. We can define a different interface later in
+ /// order to handle other kinds of information-- the underlying
+ /// data-structures from event-map.h are very flexible.
+ virtual bool Compute(const std::vector<int32> &phoneseq, int32 pdf_class,
+ int32 *pdf_id) const = 0;
+
+
+
+ /// NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
+ virtual int32 NumPdfs() const = 0;
+
+ virtual ~ContextDependencyInterface() {};
+ ContextDependencyInterface() {}
+
+ /// Returns pointer to new object which is copy of current one.
+ virtual ContextDependencyInterface *Copy() const = 0;
+ private:
+ KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependencyInterface);
+};
+/// @}
+} // namespace Kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/itf/decodable-itf.h b/kaldi_io/src/kaldi/itf/decodable-itf.h
new file mode 100644
index 0000000..ba4d765
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/decodable-itf.h
@@ -0,0 +1,123 @@
+// itf/decodable-itf.h
+
+// Copyright 2009-2011 Microsoft Corporation; Saarland University;
+// Mirko Hannemann; Go Vivace Inc.;
+// 2013 Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ITF_DECODABLE_ITF_H_
+#define KALDI_ITF_DECODABLE_ITF_H_ 1
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+/// @ingroup Interfaces
+/// @{
+
+
+/**
+ DecodableInterface provides a link between the (acoustic-modeling and
+ feature-processing) code and the decoder. The idea is to make this
+ interface as small as possible, and to make it as agnostic as possible about
+ the form of the acoustic model (e.g. don't assume the probabilities are a
+ function of just a vector of floats), and about the decoder (e.g. don't
+ assume it accesses frames in strict left-to-right order). For normal
+ models, without on-line operation, the "decodable" sub-class will just be a
+ wrapper around a matrix of features and an acoustic model, and it will
+ answer the question 'what is the acoustic likelihood for this index and this
+ frame?'.
+
+ For online decoding, where the features are coming in in real time, it is
+ important to understand the IsLastFrame() and NumFramesReady() functions.
+ There are two ways these are used: the old online-decoding code, in ../online/,
+ and the new online-decoding code, in ../online2/. In the old online-decoding
+ code, the decoder would do:
+ \code{.cc}
+ for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
+ // Process this frame
+ }
+ \endcode
+ and the the call to IsLastFrame would block if the features had not arrived yet.
+ The decodable object would have to know when to terminate the decoding. This
+ online-decoding mode is still supported, it is what happens when you call, for
+ example, LatticeFasterDecoder::Decode().
+
+ We realized that this "blocking" mode of decoding is not very convenient
+ because it forces the program to be multi-threaded and makes it complex to
+ control endpointing. In the "new" decoding code, you don't call (for example)
+ LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
+ and then each time you get more features, you provide them to the decodable
+ object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
+ something like this:
+ \code{.cc}
+ while (num_frames_decoded_ < decodable.NumFramesReady()) {
+ // Decode one more frame [increments num_frames_decoded_]
+ }
+ \endcode
+ So the decodable object never has IsLastFrame() called. For decoding where
+ you are starting with a matrix of features, the NumFramesReady() function will
+ always just return the number of frames in the file, and IsLastFrame() will
+ return true for the last frame.
+
+ For truly online decoding, the "old" online decodable objects in ../online/ have a
+ "blocking" IsLastFrame() and will crash if you call NumFramesReady().
+ The "new" online decodable objects in ../online2/ return the number of frames
+ currently accessible if you call NumFramesReady(). You will likely not need
+ to call IsLastFrame(), but we implement it to only return true for the last
+ frame of the file once we've decided to terminate decoding.
+*/
+
+class DecodableInterface {
+ public:
+ /// Returns the log likelihood, which will be negated in the decoder.
+ /// The "frame" starts from zero. You should verify that IsLastFrame(frame-1)
+ /// returns false before calling this.
+ virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
+
+ /// Returns true if this is the last frame. Frames are zero-based, so the
+ /// first frame is zero. IsLastFrame(-1) will return false, unless the file
+ /// is empty (which is a case that I'm not sure all the code will handle, so
+ /// be careful). Caution: the behavior of this function in an online setting
+ /// is being changed somewhat. In future it may return false in cases where
+ /// we haven't yet decided to terminate decoding, but later true if we decide
+ /// to terminate decoding. The plan in future is to rely more on
+ /// NumFramesReady(), and in future, IsLastFrame() would always return false
+ /// in an online-decoding setting, and would only return true in a
+ /// decoding-from-matrix setting where we want to allow the last delta or LDA
+ /// features to be flushed out for compatibility with the baseline setup.
+ virtual bool IsLastFrame(int32 frame) const = 0;
+
+ /// The call NumFramesReady() will return the number of frames currently available
+ /// for this decodable object. This is for use in setups where you don't want the
+ /// decoder to block while waiting for input. This is newly added as of Jan 2014,
+ /// and I hope, going forward, to rely on this mechanism more than IsLastFrame to
+ /// know when to stop decoding.
+ virtual int32 NumFramesReady() const {
+ KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
+ return -1;
+ }
+
+ /// Returns the number of states in the acoustic model
+ /// (they will be indexed one-based, i.e. from 1 to NumIndices();
+ /// this is for compatibility with OpenFst.
+ virtual int32 NumIndices() const = 0;
+
+ virtual ~DecodableInterface() {}
+};
+/// @}
+} // namespace Kaldi
+
+#endif // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/kaldi_io/src/kaldi/itf/online-feature-itf.h b/kaldi_io/src/kaldi/itf/online-feature-itf.h
new file mode 100644
index 0000000..dafcd8a
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/online-feature-itf.h
@@ -0,0 +1,105 @@
+// itf/online-feature-itf.h
+
+// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ITF_ONLINE_FEATURE_ITF_H_
+#define KALDI_ITF_ONLINE_FEATURE_ITF_H_ 1
+#include "base/kaldi-common.h"
+#include "matrix/matrix-lib.h"
+
+namespace kaldi {
+/// @ingroup Interfaces
+/// @{
+
+/**
+ OnlineFeatureInterface is an interface for online feature processing (it is
+ also usable in the offline setting, but currently we're not using it for
+ that). This is for use in the online2/ directory, and it supersedes the
+ interface in ../online/online-feat-input.h. We have a slighty different
+ model that puts more control in the hands of the calling thread, and won't
+ involve waiting on semaphores in the decoding thread.
+
+ This interface only specifies how the object *outputs* the features.
+ How it obtains the features, e.g. from a previous object or objects of type
+ OnlineFeatureInterface, is not specified in the interface and you will
+ likely define new constructors or methods in the derived type to do that.
+
+ You should appreciate that this interface is designed to allow random
+ access to features, as long as they are ready. That is, the user
+ can call GetFrame for any frame less than NumFramesReady(), and when
+ implementing a child class you must not make assumptions about the
+ order in which the user makes these calls.
+*/
+
+class OnlineFeatureInterface {
+ public:
+ virtual int32 Dim() const = 0; /// returns the feature dimension.
+
+ /// Returns the total number of frames, since the start of the utterance, that
+ /// are now available. In an online-decoding context, this will likely
+ /// increase with time as more data becomes available.
+ virtual int32 NumFramesReady() const = 0;
+
+ /// Returns true if this is the last frame. Frame indices are zero-based, so the
+ /// first frame is zero. IsLastFrame(-1) will return false, unless the file
+ /// is empty (which is a case that I'm not sure all the code will handle, so
+ /// be careful). This function may return false for some frame if
+ /// we haven't yet decided to terminate decoding, but later true if we decide
+ /// to terminate decoding. This function exists mainly to correctly handle
+ /// end effects in feature extraction, and is not a mechanism to determine how
+ /// many frames are in the decodable object (as it used to be, and for backward
+ /// compatibility, still is, in the Decodable interface).
+ virtual bool IsLastFrame(int32 frame) const = 0;
+
+ /// Gets the feature vector for this frame. Before calling this for a given
+ /// frame, it is assumed that you called NumFramesReady() and it returned a
+ /// number greater than "frame". Otherwise this call will likely crash with
+ /// an assert failure. This function is not declared const, in case there is
+ /// some kind of caching going on, but most of the time it shouldn't modify
+ /// the class.
+ virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) = 0;
+
+ /// Virtual destructor. Note: constructors that take another member of
+ /// type OnlineFeatureInterface are not expected to take ownership of
+ /// that pointer; the caller needs to keep track of that manually.
+ virtual ~OnlineFeatureInterface() { }
+};
+
+
+/// Add a virtual class for "source" features such as MFCC or PLP or pitch
+/// features.
+class OnlineBaseFeature: public OnlineFeatureInterface {
+ public:
+ /// This would be called from the application, when you get more wave data.
+ /// Note: the sampling_rate is typically only provided so the code can assert
+ /// that it matches the sampling rate expected in the options.
+ virtual void AcceptWaveform(BaseFloat sampling_rate,
+ const VectorBase<BaseFloat> &waveform) = 0;
+
+ /// InputFinished() tells the class you won't be providing any
+ /// more waveform. This will help flush out the last few frames
+ /// of delta or LDA features (it will typically affect the return value
+ /// of IsLastFrame.
+ virtual void InputFinished() = 0;
+};
+
+
+/// @}
+} // namespace Kaldi
+
+#endif // KALDI_ITF_ONLINE_FEATURE_ITF_H_
diff --git a/kaldi_io/src/kaldi/itf/optimizable-itf.h b/kaldi_io/src/kaldi/itf/optimizable-itf.h
new file mode 100644
index 0000000..1b8f54b
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/optimizable-itf.h
@@ -0,0 +1,51 @@
+// itf/optimizable-itf.h
+
+// Copyright 2009-2011 Go Vivace Inc.; Microsoft Corporation; Georg Stemmer
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_ITF_OPTIMIZABLE_ITF_H_
+#define KALDI_ITF_OPTIMIZABLE_ITF_H_
+
+#include "base/kaldi-common.h"
+#include "matrix/matrix-lib.h"
+
+namespace kaldi {
+/// @ingroup Interfaces
+/// @{
+
+/// OptimizableInterface provides
+/// a virtual class for optimizable objects.
+/// E.g. a class that computed a likelihood function and
+/// its gradient using some parameter
+/// that has to be optimized on data
+/// could inherit from it.
+template<class Real>
+class OptimizableInterface {
+ public:
+ /// computes gradient for a parameter params and returns it
+ /// in gradient_out
+ virtual void ComputeGradient(const Vector<Real> &params,
+ Vector<Real> *gradient_out) = 0;
+ /// computes the function value for a parameter params
+ /// and returns it
+ virtual Real ComputeValue(const Vector<Real> &params) = 0;
+
+ virtual ~OptimizableInterface() {}
+};
+/// @} end of "Interfaces"
+} // end namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/itf/options-itf.h b/kaldi_io/src/kaldi/itf/options-itf.h
new file mode 100644
index 0000000..204f46d
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/options-itf.h
@@ -0,0 +1,49 @@
+// itf/options-itf.h
+
+// Copyright 2013 Tanel Alumae, Tallinn University of Technology
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ITF_OPTIONS_ITF_H_
+#define KALDI_ITF_OPTIONS_ITF_H_ 1
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+class OptionsItf {
+ public:
+
+ virtual void Register(const std::string &name,
+ bool *ptr, const std::string &doc) = 0;
+ virtual void Register(const std::string &name,
+ int32 *ptr, const std::string &doc) = 0;
+ virtual void Register(const std::string &name,
+ uint32 *ptr, const std::string &doc) = 0;
+ virtual void Register(const std::string &name,
+ float *ptr, const std::string &doc) = 0;
+ virtual void Register(const std::string &name,
+ double *ptr, const std::string &doc) = 0;
+ virtual void Register(const std::string &name,
+ std::string *ptr, const std::string &doc) = 0;
+
+ virtual ~OptionsItf() {}
+};
+
+} // namespace Kaldi
+
+#endif // KALDI_ITF_OPTIONS_ITF_H_
+
+