diff options
Diffstat (limited to 'kaldi_io/src/kaldi/util/kaldi-table-inl.h')
-rw-r--r-- | kaldi_io/src/kaldi/util/kaldi-table-inl.h | 2246 |
1 files changed, 0 insertions, 2246 deletions
diff --git a/kaldi_io/src/kaldi/util/kaldi-table-inl.h b/kaldi_io/src/kaldi/util/kaldi-table-inl.h deleted file mode 100644 index 6b73c88..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-table-inl.h +++ /dev/null @@ -1,2246 +0,0 @@ -// util/kaldi-table-inl.h - -// Copyright 2009-2011 Microsoft Corporation -// 2013 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_ -#define KALDI_UTIL_KALDI_TABLE_INL_H_ - -#include <algorithm> -#include "util/kaldi-io.h" -#include "util/text-utils.h" -#include "util/stl-utils.h" // for StringHasher. - - -namespace kaldi { - -/// \addtogroup table_impl_types -/// @{ - -template<class Holder> class SequentialTableReaderImplBase { - public: - typedef typename Holder::T T; - // note that Open takes rxfilename not rspecifier. - virtual bool Open(const std::string &rxfilename) = 0; - virtual bool Done() const = 0; - virtual bool IsOpen() const = 0; - virtual std::string Key() = 0; - virtual const T &Value() = 0; - virtual void FreeCurrent() = 0; - virtual void Next() = 0; - virtual bool Close() = 0; - SequentialTableReaderImplBase() { } - virtual ~SequentialTableReaderImplBase() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase); -}; - - -// This is the implementation for SequentialTableReader -// when it's actually a script file. -template<class Holder> class SequentialTableReaderScriptImpl: - public SequentialTableReaderImplBase<Holder> { - public: - typedef typename Holder::T T; - - SequentialTableReaderScriptImpl(): state_(kUninitialized) { } - - virtual bool Open(const std::string &rspecifier) { - if (state_ != kUninitialized) - if (! Close()) // call Close() yourself to suppress this exception. - KALDI_ERR << "TableReader::Open, error closing previous input: " - << "rspecifier was " << rspecifier_; - bool binary; - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kScriptRspecifier); - if (!script_input_.Open(script_rxfilename_, &binary)) { // Failure on Open - KALDI_WARN << "Failed to open script file " - << PrintableRxfilename(script_rxfilename_); - state_ = kUninitialized; - return false; - } else { // Open succeeded. - if (binary) { // script file should not be binary file.. - state_ = kError; // bad script file. - script_input_.Close(); - return false; - } else { - state_ = kFileStart; - Next(); - if (state_ == kError) { - script_input_.Close(); - return false; - } - if (opts_.permissive) { // Next() will have preloaded. - KALDI_ASSERT(state_ == kLoadSucceeded || state_ == kEof); - } else { - KALDI_ASSERT(state_ == kHaveScpLine || state_ == kEof); - } - return true; // Success. - } - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kEof: case kError: case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: return true; - case kUninitialized: return false; - default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid - // state for user to call something on. - return false; - } - } - - virtual bool Done() const { - switch (state_) { - case kHaveScpLine: return false; - case kLoadSucceeded: case kLoadFailed: return false; - // These cases are because we want LoadCurrent() - // to be callable after Next() and to not change the Done() status [only Next() should change - // the Done() status]. - case kEof: case kError: return true; // Error condition, like Eof, counts as Done(); the destructor - // or Close() will inform the user of the error. - default: KALDI_ERR << "Done() called on TableReader object at the wrong time."; - return false; - } - } - - virtual std::string Key() { - // Valid to call this whenever Done() returns false. - switch (state_) { - case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: break; - default: - // coding error. - KALDI_ERR << "Key() called on TableReader object at the wrong time."; - } - return key_; - } - const T &Value() { - StateType orig_state = state_; - if (state_ == kHaveScpLine) LoadCurrent(); // Takes - // state_ to kLoadSucceeded or kLoadFailed. - if (state_ == kLoadFailed) { // this can happen due to - // a file listed in an scp file not existing, or - // read failure, failure of a command, etc. - if (orig_state == kHaveScpLine) - KALDI_ERR << "TableReader: failed to load object from " - << PrintableRxfilename(data_rxfilename_) - << " (to suppress this error, add the permissive " - << "(p, ) option to the rspecifier."; - - else // orig_state_ was kLoadFailed, which only could have happened - // if the user called FreeCurrent(). - KALDI_ERR << "TableReader: you called Value() after FreeCurrent()."; - } else if (state_ != kLoadSucceeded) { - // This would be a coding error. - KALDI_ERR << "TableReader: Value() called at the wrong time."; - } - return holder_.Value(); - } - void FreeCurrent() { - if (state_ == kLoadSucceeded) { - holder_.Clear(); - state_ = kLoadFailed; - } else { - KALDI_WARN << "TableReader: FreeCurrent called at the wrong time."; - } - } - void Next() { - while (1) { - NextScpLine(); - if (Done()) return; - if (opts_.permissive) { - // Permissive mode means, when reading scp files, we treat keys whose scp entry - // cannot be read as nonexistent. This means trying to read. - if (LoadCurrent()) return; // Success. - // else try the next scp line. - } else { - return; // We go the next key; Value() will crash if we can't - // read the scp line. - } - } - } - - virtual bool Close() { - // Close() will succeed if the stream was not in an error - // state. To clean up, it also closes the Input objects if - // they're open. - if (script_input_.IsOpen()) - script_input_.Close(); - if (data_input_.IsOpen()) - data_input_.Close(); - if (state_ == kLoadSucceeded) - holder_.Clear(); - if (!this->IsOpen()) - KALDI_ERR << "Close() called on input that was not open."; - StateType old_state = state_; - state_ = kUninitialized; - if (old_state == kError) { - if (opts_.permissive) { - KALDI_WARN << "Close() called on scp file with read error, ignoring the " - "error because permissive mode specified."; - return true; - } else return false; // User will do something with the error status. - } else return true; - } - - virtual ~SequentialTableReaderScriptImpl() { - if (state_ == kError) - KALDI_ERR << "TableReader: reading script file failed: from scp " - << PrintableRxfilename(script_rxfilename_); - // If you don't want this exception to be thrown you can - // call Close() and check the status. - if (state_ == kLoadSucceeded) - holder_.Clear(); - } - private: - bool LoadCurrent() { - // Attempts to load object whose rxfilename is on the current scp line. - if (state_ != kHaveScpLine) - KALDI_ERR << "TableReader: LoadCurrent() called at the wrong time."; - bool ans; - // note, NULL means it doesn't read the binary-mode header - if (Holder::IsReadInBinary()) ans = data_input_.Open(data_rxfilename_, NULL); - else ans = data_input_.OpenTextMode(data_rxfilename_); - if (!ans) { - // May want to make this warning a VLOG at some point - KALDI_WARN << "TableReader: failed to open file " - << PrintableRxfilename(data_rxfilename_); - state_ = kLoadFailed; - return false; - } else { - if (holder_.Read(data_input_.Stream())) { - state_ = kLoadSucceeded; - return true; - } else { // holder_ will not contain data. - KALDI_WARN << "TableReader: failed to load object from " - << PrintableRxfilename(data_rxfilename_); - state_ = kLoadFailed; - return false; - } - } - } - - // Reads the next line in the script file. - void NextScpLine() { - switch (state_) { - case kLoadSucceeded: holder_.Clear(); break; - case kHaveScpLine: case kLoadFailed: case kFileStart: break; - default: - // No other states are valid to call Next() from. - KALDI_ERR << "Reading script file: Next called wrongly."; - } - std::string line; - if (getline(script_input_.Stream(), line)) { - SplitStringOnFirstSpace(line, &key_, &data_rxfilename_); - if (!key_.empty() && !data_rxfilename_.empty()) { - // Got a valid line. - state_ = kHaveScpLine; - } else { - // Got an invalid line. - state_ = kError; // we can't make sense of this - // scp file and will now die. - } - } else { - state_ = kEof; // nothing more in the scp file. - // Might as well close the input streams as don't need them. - script_input_.Close(); - if (data_input_.IsOpen()) - data_input_.Close(); - } - } - - - Input script_input_; // Input object for the .scp file - Input data_input_; // Input object for the entries in - // the script file. - Holder holder_; // Holds the object. - bool binary_; // Binary-mode archive. - std::string key_; - std::string rspecifier_; - std::string script_rxfilename_; // of the script file. - RspecifierOptions opts_; // options. - std::string data_rxfilename_; // of the file we're reading. - enum StateType { - // [The state of the reading process] [does holder_ [is script_inp_ - // have object] open] - kUninitialized, // Uninitialized or closed. no no - kEof, // We did Next() and found eof in script file. no no - kError, // Some other error no yes - kHaveScpLine, // Just called Open() or Next() and have a no yes - // line of the script file but no data. - kLoadSucceeded, // Called LoadCurrent() and it succeeded. yes yes - kLoadFailed, // Called LoadCurrent() and it failed, no yes - // or the user called FreeCurrent().. note, - // if when called by user we are in this state, - // it means the user called FreeCurrent(). - kFileStart, // [state we only use internally] no yes - } state_; - private: -}; - - -// This is the implementation for SequentialTableReader -// when it's an archive. Note that the archive format is: -// key1 [space] object1 key2 [space] -// object2 ... eof. -// "object1" is the output of the Holder::Write function and will -// typically contain a binary header (in binary mode) and then -// the output of object.Write(os, binary). -// The archive itself does not care whether it is in binary -// or text mode, for reading purposes. - -template<class Holder> class SequentialTableReaderArchiveImpl: - public SequentialTableReaderImplBase<Holder> { - public: - typedef typename Holder::T T; - - SequentialTableReaderArchiveImpl(): state_(kUninitialized) { } - - virtual bool Open(const std::string &rspecifier) { - if (state_ != kUninitialized) { - if (! Close()) { // call Close() yourself to suppress this exception. - if (opts_.permissive) - KALDI_WARN << "TableReader::Open, error closing previous input " - "(only warning, since permissive mode)."; - else - KALDI_ERR << "TableReader::Open, error closing previous input."; - } - } - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, - &archive_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kArchiveRspecifier); - - bool ans; - // NULL means don't expect binary-mode header - if (Holder::IsReadInBinary()) - ans = input_.Open(archive_rxfilename_, NULL); - else - ans = input_.OpenTextMode(archive_rxfilename_); - if (!ans) { // header. - KALDI_WARN << "TableReader: failed to open stream " - << PrintableRxfilename(archive_rxfilename_); - state_ = kUninitialized; // Failure on Open - return false; // User should print the error message. - } - state_ = kFileStart; - Next(); - if (state_ == kError) { - KALDI_WARN << "Error beginning to read archive file (wrong filename?): " - << PrintableRxfilename(archive_rxfilename_); - input_.Close(); - state_ = kUninitialized; - return false; - } - KALDI_ASSERT(state_ == kHaveObject || state_ == kEof); - return true; - } - - virtual void Next() { - switch (state_) { - case kHaveObject: - holder_.Clear(); break; - case kFileStart: case kFreedObject: - break; - default: - KALDI_ERR << "TableReader: Next() called wrongly."; - } - std::istream &is = input_.Stream(); - is.clear(); // Clear any fail bits that may have been set... just in case - // this happened in the Read function. - is >> key_; // This eats up any leading whitespace and gets the string. - if (is.eof()) { - state_ = kEof; - return; - } - if (is.fail()) { // This shouldn't really happen, barring file-system errors. - KALDI_WARN << "Error reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - int c; - if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key. - // We also allow tab [which is consumed] and newline [which is not], just - // so we can read archives generated by scripts that may not be fully - // aware of how this format works. - KALDI_WARN << "Invalid archive file format: expected space after key " - << key_ << ", got character " - << CharToString(static_cast<char>(is.peek())) << ", reading " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - if (c != '\n') is.get(); // Consume the space or tab. - if (holder_.Read(is)) { - state_ = kHaveObject; - return; - } else { - KALDI_WARN << "Object read failed, reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kEof: case kError: case kHaveObject: case kFreedObject: return true; - case kUninitialized: return false; - default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid - // state for user to call something on. - return false; - } - } - - virtual bool Done() const { - switch (state_) { - case kHaveObject: - return false; - case kEof: case kError: - return true; // Error-state counts as Done(), but destructor - // will fail (unless you check the status with Close()). - default: - KALDI_ERR << "Done() called on TableReader object at the wrong time."; - return false; - } - } - - virtual std::string Key() { - // Valid to call this whenever Done() returns false - switch (state_) { - case kHaveObject: break; // only valid case. - default: - // coding error. - KALDI_ERR << "Key() called on TableReader object at the wrong time."; - } - return key_; - } - const T &Value() { - switch (state_) { - case kHaveObject: - break; // only valid case. - default: - // coding error. - KALDI_ERR << "Value() called on TableReader object at the wrong time."; - } - return holder_.Value(); - } - virtual void FreeCurrent() { - if (state_ == kHaveObject) { - holder_.Clear(); - state_ = kFreedObject; - } else - KALDI_WARN << "TableReader: FreeCurernt called at the wrong time."; - } - - virtual bool Close() { - if (! this->IsOpen()) - KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly."; - if (input_.IsOpen()) - input_.Close(); - if (state_ == kHaveObject) - holder_.Clear(); - bool ans; - if (opts_.permissive) { - ans = true; // always return success. - if (state_ == kError) - KALDI_WARN << "Error detected closing TableReader for archive " - << PrintableRxfilename(archive_rxfilename_) << " but ignoring " - << "it as permissive mode specified."; - } else - ans = (state_ != kError); // If error state, user should detect it. - state_ = kUninitialized; - return ans; - } - - virtual ~SequentialTableReaderArchiveImpl() { - if (state_ == kError) { - if (opts_.permissive) - KALDI_WARN << "Error detected closing TableReader for archive " - << PrintableRxfilename(archive_rxfilename_) << " but ignoring " - << "it as permissive mode specified."; - else - KALDI_ERR << "TableReader: error detected closing archive " - << PrintableRxfilename(archive_rxfilename_); - } - // If you don't want this exception to be thrown you can - // call Close() and check the status. - if (state_ == kHaveObject) - holder_.Clear(); - } - private: - Input input_; // Input object for the archive - Holder holder_; // Holds the object. - std::string key_; - std::string rspecifier_; - std::string archive_rxfilename_; - RspecifierOptions opts_; - enum { // [The state of the reading process] [does holder_ [is input_ - // have object] open] - kUninitialized, // Uninitialized or closed. no no - kFileStart, // [state we use internally: just opened.] no yes - kEof, // We did Next() and found eof in archive no no - kError, // Some other error no no - kHaveObject, // We read the key and the object after it. yes yes - kFreedObject, // The user called FreeCurrent(). no yes - } state_; -}; - - -template<class Holder> -SequentialTableReader<Holder>::SequentialTableReader(const std::string &rspecifier): impl_(NULL) { - if (rspecifier != "" && !Open(rspecifier)) - KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier; -} - -template<class Holder> -bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) { - if (IsOpen()) - if (!Close()) - KALDI_ERR << "Could not close previously open object."; - // now impl_ will be NULL. - - RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, NULL); - switch (wt) { - case kArchiveRspecifier: - impl_ = new SequentialTableReaderArchiveImpl<Holder>(); - break; - case kScriptRspecifier: - impl_ = new SequentialTableReaderScriptImpl<Holder>(); - break; - case kNoRspecifier: default: - KALDI_WARN << "Invalid rspecifier " << rspecifier; - return false; - } - if (!impl_->Open(rspecifier)) { - delete impl_; - impl_ = NULL; - return false; // sub-object will have printed warnings. - } - else return true; -} - -template<class Holder> -bool SequentialTableReader<Holder>::Close() { - CheckImpl(); - bool ans = impl_->Close(); - delete impl_; // We don't keep around empty impl_ objects. - impl_ = NULL; - return ans; -} - - -template<class Holder> -bool SequentialTableReader<Holder>::IsOpen() const { - return (impl_ != NULL); // Because we delete the object whenever - // that object is not open. Thus, the IsOpen functions of the - // Impl objects are not really needed. -} - -template<class Holder> -std::string SequentialTableReader<Holder>::Key() { - CheckImpl(); - return impl_->Key(); // this call may throw if called wrongly in other ways, - // e.g. eof. -} - - -template<class Holder> -void SequentialTableReader<Holder>::FreeCurrent() { - CheckImpl(); - impl_->FreeCurrent(); -} - - -template<class Holder> -const typename SequentialTableReader<Holder>::T & -SequentialTableReader<Holder>::Value() { - CheckImpl(); - return impl_->Value(); // This may throw (if LoadCurrent() returned false you are safe.). -} - - -template<class Holder> -void SequentialTableReader<Holder>::Next() { - CheckImpl(); - impl_->Next(); -} - -template<class Holder> -bool SequentialTableReader<Holder>::Done() { - CheckImpl(); - return impl_->Done(); -} - - -template<class Holder> -SequentialTableReader<Holder>::~SequentialTableReader() { - if (impl_) delete impl_; - // Destructor of impl_ may throw. -} - - - -template<class Holder> class TableWriterImplBase { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &wspecifier) = 0; - - // Write returns true on success, false on failure, but - // some errors may not be detected until we call Close(). - // It throws (via KALDI_ERR) if called wrongly. We could - // have just thrown on all errors, since this is what - // TableWriter does; it was designed this way because originally - // TableWriter::Write returned an exit status. - virtual bool Write(const std::string &key, const T &value) = 0; - - // Flush will flush any archive; it does not return error status, - // any errors will be reported on the next Write or Close. - virtual void Flush() = 0; - - virtual bool Close() = 0; - - virtual bool IsOpen() const = 0; - - // May throw on write error if Close was not called. - virtual ~TableWriterImplBase() { } - - TableWriterImplBase() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase); -}; - - -// The implementation of TableWriter we use when writing directly -// to an archive with no associated scp. -template<class Holder> -class TableWriterArchiveImpl: public TableWriterImplBase<Holder> { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &wspecifier) { - switch (state_) { - case kUninitialized: - break; - case kWriteError: - KALDI_ERR << "TableWriter: opening stream, already open with write error."; - case kOpen: default: - if (!Close()) // throw because this error may not have been previously - // detected by the user. - KALDI_ERR << "TableWriter: opening stream, error closing previously open stream."; - } - wspecifier_ = wspecifier; - WspecifierType ws = ClassifyWspecifier(wspecifier, - &archive_wxfilename_, - NULL, - &opts_); - KALDI_ASSERT(ws == kArchiveWspecifier); // or wrongly called. - - if (output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header. - state_ = kOpen; - return true; - } else { - // stream will not be open. User will report this error - // (we return bool), so don't bother printing anything. - state_ = kUninitialized; - return false; - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kUninitialized: return false; - case kOpen: case kWriteError: return true; - default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state."; - } - return false; - } - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual bool Write(const std::string &key, const T &value) { - switch (state_) { - case kOpen: break; - case kWriteError: - // user should have known from the last - // call to Write that there was a problem. - KALDI_WARN << "TableWriter: attempting to write to invalid stream."; - return false; - case kUninitialized: default: - KALDI_ERR << "TableWriter: Write called on invalid stream"; - - } - // state is now kOpen or kWriteError. - if (!IsToken(key)) // e.g. empty string or has spaces... - KALDI_ERR << "TableWriter: using invalid key " << key; - output_.Stream() << key << ' '; - if (!Holder::Write(output_.Stream(), opts_.binary, value)) { - KALDI_WARN << "TableWriter: write failure to " - << PrintableWxfilename(archive_wxfilename_); - state_ = kWriteError; - return false; - } - if (state_ == kWriteError) return false; // Even if this Write seems to have - // succeeded, we fail because a previous Write failed and the archive may be - // corrupted and unreadable. - - if (opts_.flush) - Flush(); - return true; - } - - // Flush will flush any archive; it does not return error status, - // any errors will be reported on the next Write or Close. - virtual void Flush() { - switch (state_) { - case kWriteError: case kOpen: - output_.Stream().flush(); // Don't check error status. - return; - default: - KALDI_WARN << "TableWriter: Flush called on not-open writer."; - } - } - - virtual bool Close() { - if (!this->IsOpen() || !output_.IsOpen()) - KALDI_ERR << "TableWriter: Close called on a stream that was not open." << this->IsOpen() << ", " << output_.IsOpen(); - bool close_success = output_.Close(); - if (!close_success) { - KALDI_WARN << "TableWriter: error closing stream: wspecifier is " - << wspecifier_; - state_ = kUninitialized; - return false; - } - if (state_ == kWriteError) { - KALDI_WARN << "TableWriter: closing writer in error state: wspecifier is " - << wspecifier_; - state_ = kUninitialized; - return false; - } - state_ = kUninitialized; - return true; - } - - TableWriterArchiveImpl(): state_(kUninitialized) {} - - // May throw on write error if Close was not called. - virtual ~TableWriterArchiveImpl() { - if (!IsOpen()) return; - else if (!Close()) - KALDI_ERR << "At TableWriter destructor: Write failed or stream close " - << "failed: wspecifier is "<< wspecifier_; - } - - private: - Output output_; - WspecifierOptions opts_; - std::string wspecifier_; - std::string archive_wxfilename_; - enum { // is stream open? - kUninitialized, // no - kOpen, // yes - kWriteError, // yes - } state_; -}; - - - - -// The implementation of TableWriter we use when writing to -// individual files (more generally, wxfilenames) specified -// in an scp file that we read. - -// Note: the code for this class is similar to RandomAccessTableReaderScriptImpl; -// try to keep them in sync. - -template<class Holder> -class TableWriterScriptImpl: public TableWriterImplBase<Holder> { - public: - typedef typename Holder::T T; - - TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {} - - virtual bool Open(const std::string &wspecifier) { - switch (state_) { - case kReadScript: - KALDI_ERR << " Opening already open TableWriter: call Close first."; - case kUninitialized: case kNotReadScript: - break; - } - wspecifier_ = wspecifier; - WspecifierType ws = ClassifyWspecifier(wspecifier, - NULL, - &script_rxfilename_, - &opts_); - KALDI_ASSERT(ws == kScriptWspecifier); // or wrongly called. - KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point. - - if (! ReadScriptFile(script_rxfilename_, - true, // print any warnings - &script_)) { // error reading script file or invalid format - state_ = kNotReadScript; - return false; // no need to print further warnings. user gets the error. - } - std::sort(script_.begin(), script_.end()); - for (size_t i = 0; i+1 < script_.size(); i++) { - if (script_[i].first.compare(script_[i+1].first) >= 0) { - // script[i] not < script[i+1] in lexical order... - KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_) - << " contains duplicate key " << script_[i].first; - state_ = kNotReadScript; - return false; - } - } - state_ = kReadScript; - return true; - } - - virtual bool IsOpen() const { return (state_ == kReadScript); } - - virtual bool Close() { - if (!IsOpen()) - KALDI_ERR << "Close() called on TableWriter that was not open."; - state_ = kUninitialized; - last_found_ = 0; - script_.clear(); - return true; - } - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual bool Write(const std::string &key, const T &value) { - if (!IsOpen()) - KALDI_ERR << "TableWriter: Write called on invalid stream"; - - if (!IsToken(key)) // e.g. empty string or has spaces... - KALDI_ERR << "TableWriter: using invalid key " << key; - - std::string wxfilename; - if (!LookupFilename(key, &wxfilename)) { - if (opts_.permissive) { - return true; // In permissive mode, it's as if we're writing to /dev/null - // for missing keys. - } else { - KALDI_WARN << "TableWriter: script file " - << PrintableRxfilename(script_rxfilename_) - << " has no entry for key "<<key; - return false; - } - } - Output output; - if (!output.Open(wxfilename, opts_.binary, false)) { - // Open in the text/binary mode (on Windows) given by member var. "binary" - // (obtained from wspecifier), but do not put the binary-mode header (it - // will be written, if needed, by the Holder::Write function.) - KALDI_WARN << "TableWriter: failed to open stream: " - << PrintableWxfilename(wxfilename); - return false; - } - if (!Holder::Write(output.Stream(), opts_.binary, value) - || !output.Close()) { - KALDI_WARN << "TableWriter: failed to write data to " - << PrintableWxfilename(wxfilename); - return false; - } - return true; - } - - // Flush does nothing in this implementation, there is nothing to flush. - virtual void Flush() { } - - - virtual ~TableWriterScriptImpl() { - // Nothing to do in destructor. - } - - private: - // Note: this function is almost the same as in RandomAccessTableReaderScriptImpl. - bool LookupFilename(const std::string &key, std::string *wxfilename) { - // First, an optimization: if we're going consecutively, this will - // make the lookup very fast. - last_found_++; - if (last_found_ < script_.size() && script_[last_found_].first == key) { - *wxfilename = script_[last_found_].second; - return true; - } - std::pair<std::string, std::string> pr(key, ""); // Important that "" - // compares less than or equal to any string, so lower_bound points to the - // element that has the same key. - typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator - IterType; - IterType iter = std::lower_bound(script_.begin(), script_.end(), pr); - if (iter != script_.end() && iter->first == key) { - last_found_ = iter - script_.begin(); - *wxfilename = iter->second; - return true; - } else { - return false; - } - } - - - WspecifierOptions opts_; - std::string wspecifier_; - std::string script_rxfilename_; - - // the script_ variable contains pairs of (key, filename), sorted using - // std::sort. This can be used with binary_search to look up filenames for - // writing. If this becomes inefficient we can use std::unordered_map (but I - // suspect this wouldn't be significantly faster & would use more memory). - // If memory becomes a problem here, the user should probably be passing - // only the relevant part of the scp file rather than expecting us to get too - // clever in the code. - std::vector<std::pair<std::string, std::string> > script_; - size_t last_found_; // This is for an optimization used in LookupFilename. - - enum { - kUninitialized, - kReadScript, - kNotReadScript, // read of script failed. - } state_; -}; - - -// The implementation of TableWriter we use when writing directly -// to an archive plus an associated scp. -template<class Holder> -class TableWriterBothImpl: public TableWriterImplBase<Holder> { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &wspecifier) { - switch (state_) { - case kUninitialized: - break; - case kWriteError: - KALDI_ERR << "TableWriter: opening stream, already open with write error."; - case kOpen: default: - if (!Close()) // throw because this error may not have been previously detected by user. - KALDI_ERR << "TableWriter: opening stream, error closing previously open stream."; - } - wspecifier_ = wspecifier; - WspecifierType ws = ClassifyWspecifier(wspecifier, - &archive_wxfilename_, - &script_wxfilename_, - &opts_); - KALDI_ASSERT(ws == kBothWspecifier); // or wrongly called. - if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput) - KALDI_WARN << "When writing to both archive and script, the script file " - "will generally not be interpreted correctly unless the archive is " - "an actual file: wspecifier = " << wspecifier; - - if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header. - state_ = kUninitialized; - return false; - } - if (!script_output_.Open(script_wxfilename_, false, false)) { // first false means text mode: - // script files always text-mode. second false means don't write header (doesn't matter - // for text mode). - archive_output_.Close(); // Don't care about status: error anyway. - state_ = kUninitialized; - return false; - } - state_ = kOpen; - return true; - } - - virtual bool IsOpen() const { - switch (state_) { - case kUninitialized: return false; - case kOpen: case kWriteError: return true; - default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state."; - } - return false; - } - - void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const { - std::ostringstream ss; - ss << ':' << streampos; - KALDI_ASSERT(ss.str() != ":-1"); - *output = archive_wxfilename_ + ss.str(); - - // e.g. /some/file:12302. - // Note that we warned if archive_wxfilename_ is not an actual filename; - // the philosophy is we give the user rope and if they want to hang - // themselves, with it, fine. - } - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual bool Write(const std::string &key, const T &value) { - switch (state_) { - case kOpen: break; - case kWriteError: - // user should have known from the last - // call to Write that there was a problem. Warn about it. - KALDI_WARN << "TableWriter: writing to non-open TableWriter object."; - return false; - case kUninitialized: default: - KALDI_ERR << "TableWriter: Write called on invalid stream"; - } - // state is now kOpen or kWriteError. - if (!IsToken(key)) // e.g. empty string or has spaces... - KALDI_ERR << "TableWriter: using invalid key " << key; - std::ostream &archive_os = archive_output_.Stream(); - archive_os << key << ' '; - typename std::ostream::pos_type archive_os_pos = archive_os.tellp(); - // position at start of Write() to archive. We will record this in the script file. - std::string offset_rxfilename; // rxfilename with offset into the archive, - // e.g. some_archive_name.ark:431541423 - MakeFilename(archive_os_pos, &offset_rxfilename); - - // Write to the script file first. - // The idea is that we want to get all the information possible into the - // script file, to make it easier to unwind errors later. - std::ostream &script_os = script_output_.Stream(); - script_output_.Stream() << key << ' ' << offset_rxfilename << '\n'; - - if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) { - KALDI_WARN << "TableWriter: write failure to" - << PrintableWxfilename(archive_wxfilename_); - state_ = kWriteError; - return false; - } - - if (script_os.fail()) { - KALDI_WARN << "TableWriter: write failure to script file detected: " - << PrintableWxfilename(script_wxfilename_); - state_ = kWriteError; - return false; - } - - if (archive_os.fail()) { - KALDI_WARN << "TableWriter: write failure to archive file detected: " - << PrintableWxfilename(archive_wxfilename_); - state_ = kWriteError; - return false; - } - - if (state_ == kWriteError) return false; // Even if this Write seems to have - // succeeded, we fail because a previous Write failed and the archive may be - // corrupted and unreadable. - - if (opts_.flush) - Flush(); - return true; - } - - // Flush will flush any archive; it does not return error status, - // any errors will be reported on the next Write or Close. - virtual void Flush() { - switch (state_) { - case kWriteError: case kOpen: - archive_output_.Stream().flush(); // Don't check error status. - script_output_.Stream().flush(); // Don't check error status. - return; - default: - KALDI_WARN << "TableWriter: Flush called on not-open writer."; - } - } - - virtual bool Close() { - if (!this->IsOpen()) - KALDI_ERR << "TableWriter: Close called on a stream that was not open."; - bool close_success = true; - if (archive_output_.IsOpen()) - if (!archive_output_.Close()) close_success = false; - if (script_output_.IsOpen()) - if (!script_output_.Close()) close_success = false; - bool ans = close_success && (state_ != kWriteError); - state_ = kUninitialized; - return ans; - } - - TableWriterBothImpl(): state_(kUninitialized) {} - - // May throw on write error if Close() was not called. - // User can get the error status by calling Close(). - virtual ~TableWriterBothImpl() { - if (!IsOpen()) return; - else if (!Close()) - KALDI_ERR << "At TableWriter destructor: Write failed or stream close failed: " - << wspecifier_; - } - - private: - Output archive_output_; - Output script_output_; - WspecifierOptions opts_; - std::string archive_wxfilename_; - std::string script_wxfilename_; - std::string wspecifier_; - enum { // is stream open? - kUninitialized, // no - kOpen, // yes - kWriteError, // yes - } state_; -}; - - -template<class Holder> -TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) { - if (wspecifier != "" && !Open(wspecifier)) { - KALDI_ERR << "TableWriter: failed to write to " - << wspecifier; - } -} - -template<class Holder> -bool TableWriter<Holder>::IsOpen() const { - return (impl_ != NULL); -} - - -template<class Holder> -bool TableWriter<Holder>::Open(const std::string &wspecifier) { - - if (IsOpen()) { - if (!Close()) // call Close() yourself to suppress this exception. - KALDI_ERR << "TableWriter::Open, failed to close previously open writer."; - } - KALDI_ASSERT(impl_ == NULL); - WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL); - switch (wtype) { - case kBothWspecifier: - impl_ = new TableWriterBothImpl<Holder>(); - break; - case kArchiveWspecifier: - impl_ = new TableWriterArchiveImpl<Holder>(); - break; - case kScriptWspecifier: - impl_ = new TableWriterScriptImpl<Holder>(); - break; - case kNoWspecifier: default: - KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier; - return false; - } - if (impl_->Open(wspecifier)) return true; - else { // The class will have printed a more specific warning. - delete impl_; - impl_ = NULL; - return false; - } -} - -template<class Holder> -void TableWriter<Holder>::Write(const std::string &key, - const T &value) const { - CheckImpl(); - if (!impl_->Write(key, value)) - KALDI_ERR << "Error in TableWriter::Write"; - // More specific warning will have - // been printed in the Write function. -} - -template<class Holder> -void TableWriter<Holder>::Flush() { - CheckImpl(); - impl_->Flush(); -} - -template<class Holder> -bool TableWriter<Holder>::Close() { - CheckImpl(); - bool ans = impl_->Close(); - delete impl_; // We don't keep around non-open impl_ objects [c.f. definition of IsOpen()] - impl_ = NULL; - return ans; -} - -template<class Holder> -TableWriter<Holder>::~TableWriter() { - if (IsOpen() && !Close()) { - KALDI_ERR << "Error closing TableWriter [in destructor]."; - } -} - - -// Types of RandomAccessTableReader: -// In principle, we would like to have four types of RandomAccessTableReader: -// the 4 combinations [scp, archive], [seekable, not-seekable], -// where if something is seekable we only store a file offset. However, -// it seems sufficient for now to only implement two of these, in both -// cases assuming it's not seekable so we never store file offsets and always -// store either the scp line or the data in the archive. The reasons are: -// (1) -// For scp files, storing the actual entry is not that much more expensive -// than storing the file offsets (since the entries are just filenames), and -// avoids a lot of fseek operations that might be expensive. -// (2) -// For archive files, there is no real reason, if you have the archive file -// on disk somewhere, why you wouldn't access it via its associated scp. -// [i.e. write it as ark, scp]. The main reason to read archives directly -// is if they are part of a pipe, and in this case it's not seekable, so -// we implement only this case. -// -// Note that we will rarely in practice have to keep in memory everything in -// the archive, as long as things are only read once from the archive (the -// "o, " or "once" option) and as long as we keep our keys in sorted order; to take -// advantage of this we need the "s, " (sorted) option, so we would read archives -// as e.g. "s, o, ark:-" (this is the rspecifier we would use if it was the -// standard input and these conditions held). - -template<class Holder> class RandomAccessTableReaderImplBase { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &rspecifier) = 0; - - virtual bool HasKey(const std::string &key) = 0; - - virtual const T &Value(const std::string &key) = 0; - - virtual bool Close() = 0; - - virtual ~RandomAccessTableReaderImplBase() {} -}; - - -// Implementation of RandomAccessTableReader for a script file; for simplicity we -// just read it in all in one go, as it's unlikely someone would generate this -// from a pipe. In principle we could read it on-demand as for the archives, but -// this would probably be overkill. - -// Note: the code for this this class is similar to TableWriterScriptImpl: -// try to keep them in sync. -template<class Holder> -class RandomAccessTableReaderScriptImpl: - public RandomAccessTableReaderImplBase<Holder> { - - public: - typedef typename Holder::T T; - - RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {} - - virtual bool Open(const std::string &rspecifier) { - switch (state_) { - case kNotHaveObject: case kHaveObject: case kGaveObject: - KALDI_ERR << " Opening already open RandomAccessTableReader: call Close first."; - case kUninitialized: case kNotReadScript: - break; - } - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, - &script_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kScriptRspecifier); // or wrongly called. - KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point. - - if (! ReadScriptFile(script_rxfilename_, - true, // print any warnings - &script_)) { // error reading script file or invalid format - state_ = kNotReadScript; - return false; // no need to print further warnings. user gets the error. - } - - rspecifier_ = rspecifier; - // If opts_.sorted, the user has asserted that the keys are already sorted. - // Although we could easily sort them, we want to let the user know of this - // mistake. This same mistake could have serious effects if used with an - // archive rather than a script. - if (!opts_.sorted) - std::sort(script_.begin(), script_.end()); - for (size_t i = 0; i+1 < script_.size(); i++) { - if (script_[i].first.compare(script_[i+1].first) >= 0) { - // script[i] not < script[i+1] in lexical order... - bool same = (script_[i].first == script_[i+1].first); - KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_) - << (same ? " contains duplicate key: " : - " is not sorted (remove s, option or add ns, option): key is ") - << script_[i].first; - state_ = kNotReadScript; - return false; - } - } - state_ = kNotHaveObject; - return true; - } - - virtual bool IsOpen() const { - return (state_ == kNotHaveObject || state_ == kHaveObject || - state_ == kGaveObject); - } - - virtual bool Close() { - if (!IsOpen()) - KALDI_ERR << "Close() called on RandomAccessTableReader that was not open."; - holder_.Clear(); - state_ = kUninitialized; - last_found_ = 0; - script_.clear(); - current_key_ = ""; - // This one cannot fail because any errors of a "global" - // nature would have been detected when we did Open(). - // With archives it's different. - return true; - } - - virtual bool HasKey(const std::string &key) { - bool preload = opts_.permissive; - // In permissive mode, we have to check that we can read - // the scp entry before we assert that the key is there. - return HasKeyInternal(key, preload); - } - - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual const T& Value(const std::string &key) { - - if (!IsOpen()) - KALDI_ERR << "Value() called on non-open object."; - - if (!((state_ == kHaveObject || state_ == kGaveObject) - && key == current_key_)) { // Not already stored... - bool has_key = HasKeyInternal(key, true); // preload. - if (!has_key) - KALDI_ERR << "Could not get item for key " << key - << ", rspecifier is " << rspecifier_ << "[to ignore this, " - << "add the p, (permissive) option to the rspecifier."; - KALDI_ASSERT(state_ == kHaveObject && key == current_key_); - } - - if (state_ == kHaveObject) { - state_ = kGaveObject; - if (opts_.once) MakeTombstone(key); // make sure that future lookups fail. - return holder_.Value(); - } else { // state_ == kGaveObject - if (opts_.once) - KALDI_ERR << "Value called twice for the same key and ,o (once) option " - << "is used: rspecifier is " << rspecifier_; - return holder_.Value(); - } - } - - virtual ~RandomAccessTableReaderScriptImpl() { - if (state_ == kHaveObject || state_ == kGaveObject) - holder_.Clear(); - } - - private: - // HasKeyInternal when called with preload == false just tells us whether the - // key is in the scp. With preload == true, which happens when the ,p - // (permissive) option is given in the rspecifier, it will also check that we - // can preload the object from disk (loading from the rxfilename in the scp), - // and only return true if we can. This function is called both from HasKey - // and from Value(). - virtual bool HasKeyInternal(const std::string &key, bool preload) { - switch (state_) { - case kUninitialized: case kNotReadScript: - KALDI_ERR << "HasKey called on RandomAccessTableReader object that is not open."; - case kHaveObject: case kGaveObject: - if (key == current_key_) - return true; - break; - default: break; - } - KALDI_ASSERT(IsToken(key)); - size_t key_pos = 0; // set to zero to suppress warning - bool ans = LookupKey(key, &key_pos); - if (!ans) return false; - else { - // First do a check regarding the "once" option. - if (opts_.once && script_[key_pos].second == "") { // A "tombstone"; user is asking about - // already-read key. - KALDI_ERR << "HasKey called on key whose value was already read, and " - " you specified the \"once\" option (o, ): try removing o, or adding no, :" - " rspecifier is " << rspecifier_; - } - if (!preload) - return true; // we have the key. - else { // preload specified, so we have to pre-load the object before returning true. - if (!input_.Open(script_[key_pos].second)) { - KALDI_WARN << "Error opening stream " - << PrintableRxfilename(script_[key_pos].second); - return false; - } else { - // Make sure holder empty. - if (state_ == kHaveObject || state_ == kGaveObject) - holder_.Clear(); - if (holder_.Read(input_.Stream())) { - state_ = kHaveObject; - current_key_ = key; - return true; - } else { - KALDI_WARN << "Error reading object from " - "stream " << PrintableRxfilename(script_[key_pos].second); - state_ = kNotHaveObject; - return false; - } - } - } - } - } - void MakeTombstone(const std::string &key) { - size_t offset; - if (!LookupKey(key, &offset)) - KALDI_ERR << "RandomAccessTableReader object in inconsistent state."; - else - script_[offset].second = ""; - } - bool LookupKey(const std::string &key, size_t *script_offset) { - // First, an optimization: if we're going consecutively, this will - // make the lookup very fast. Since we may call HasKey and then - // Value(), which both may look up the key, we test if either the - // current or next position are correct. - if (last_found_ < script_.size() && script_[last_found_].first == key) { - *script_offset = last_found_; - return true; - } - last_found_++; - if (last_found_ < script_.size() && script_[last_found_].first == key) { - *script_offset = last_found_; - return true; - } - std::pair<std::string, std::string> pr(key, ""); // Important that "" - // compares less than or equal to any string, so lower_bound points to the - // element that has the same key. - typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator - IterType; - IterType iter = std::lower_bound(script_.begin(), script_.end(), pr); - if (iter != script_.end() && iter->first == key) { - last_found_ = *script_offset = iter - script_.begin(); - return true; - } else { - return false; - } - } - - - Input input_; // Use the same input_ object for reading each file, in case - // the scp specifies offsets in an archive (so we can keep the same file open). - RspecifierOptions opts_; - std::string rspecifier_; // rspecifier used to open it; used in debug messages - std::string script_rxfilename_; // filename of script. - - std::string current_key_; // Key of object in holder_ - Holder holder_; - - // the script_ variable contains pairs of (key, filename), sorted using - // std::sort. This can be used with binary_search to look up filenames for - // writing. If this becomes inefficient we can use std::unordered_map (but I - // suspect this wouldn't be significantly faster & would use more memory). - // If memory becomes a problem here, the user should probably be passing - // only the relevant part of the scp file rather than expecting us to get too - // clever in the code. - std::vector<std::pair<std::string, std::string> > script_; - size_t last_found_; // This is for an optimization used in FindFilename. - - enum { // [Do we have [Does holder_ - // script_ set up?] contain object?] - kUninitialized, // no no - kNotReadScript, // no no - kNotHaveObject, // yes no - kHaveObject, // yes yes - kGaveObject, // yes yes - // [kGaveObject is as kHaveObject but we note that the - // user has already read it; this is for checking that - // if "once" is specified, the user actually only reads - // it once. - } state_; - -}; - - - - -// This is the base-class (with some implemented functions) for the -// implementations of RandomAccessTableReader when it's an archive. This -// base-class handles opening the files, storing the state of the reading -// process, and loading objects. This is the only case in which we have -// an intermediate class in the hierarchy between the virtual ImplBase -// class and the actual Impl classes. -// The child classes vary in the assumptions regarding sorting, etc. - -template<class Holder> class RandomAccessTableReaderArchiveImplBase: - public RandomAccessTableReaderImplBase<Holder> { - public: - typedef typename Holder::T T; - - RandomAccessTableReaderArchiveImplBase(): holder_(NULL), state_(kUninitialized) { } - - virtual bool Open(const std::string &rspecifier) { - if (state_ != kUninitialized) { - if (! this->Close()) // call Close() yourself to suppress this exception. - KALDI_ERR << "TableReader::Open, error closing previous input."; - } - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kArchiveRspecifier); - - // NULL means don't expect binary-mode header - bool ans; - if (Holder::IsReadInBinary()) - ans = input_.Open(archive_rxfilename_, NULL); - else - ans = input_.OpenTextMode(archive_rxfilename_); - if (!ans) { // header. - KALDI_WARN << "TableReader: failed to open stream " - << PrintableRxfilename(archive_rxfilename_); - state_ = kUninitialized; // Failure on Open - return false; // User should print the error message. - } else { - state_ = kNoObject; - } - return true; - } - - // ReadNextObject() requires that the state be kNoObject, - // and it will try read the next object. If it succeeds, - // it sets the state to kHaveObject, and - // cur_key_ and holder_ have the key and value. If it fails, - // it sets the state to kError or kEof. - void ReadNextObject() { - if (state_ != kNoObject) - KALDI_ERR << "TableReader: ReadNextObject() called from wrong state."; // Code error - // somewhere in this class or a child class. - std::istream &is = input_.Stream(); - is.clear(); // Clear any fail bits that may have been set... just in case - // this happened in the Read function. - is >> cur_key_; // This eats up any leading whitespace and gets the string. - if (is.eof()) { - state_ = kEof; - return; - } - if (is.fail()) { // This shouldn't really happen, barring file-system errors. - KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_; - state_ = kError; - return; - } - int c; - if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key. - // We also allow tab, just so we can read archives generated by scripts that may - // not be fully aware of how this format works. - KALDI_WARN << "Invalid archive file format: expected space after key " <<cur_key_ - <<", got character " - << CharToString(static_cast<char>(is.peek())) << ", reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - if (c != '\n') is.get(); // Consume the space or tab. - holder_ = new Holder; - if (holder_->Read(is)) { - state_ = kHaveObject; - return; - } else { - KALDI_WARN << "Object read failed, reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - delete holder_; - holder_ = NULL; - return; - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kEof: case kError: case kHaveObject: case kNoObject: return true; - case kUninitialized: return false; - default: KALDI_ERR << "IsOpen() called on invalid object."; - return false; - } - } - - // Called by the child-class virutal Close() functions; does the - // shared parts of the cleanup. - bool CloseInternal() { - if (! this->IsOpen()) - KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly."; - if (input_.IsOpen()) - input_.Close(); - if (state_ == kHaveObject) { - KALDI_ASSERT(holder_ != NULL); - delete holder_; - holder_ = NULL; - } else KALDI_ASSERT(holder_ == NULL); - bool ans = (state_ != kError); - state_ = kUninitialized; - if (!ans && opts_.permissive) { - KALDI_WARN << "Error state detected closing reader. " - << "Ignoring it because you specified permissive mode."; - return true; - } - return ans; - } - - ~RandomAccessTableReaderArchiveImplBase() { - // The child class has the responsibility to call CloseInternal(). - KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL); - } - private: - Input input_; // Input object for the archive - protected: - // The variables below are accessed by child classes. - - std::string cur_key_; // current key (if state == kHaveObject). - Holder *holder_; // Holds the object we just read (if state == kHaveObject) - - std::string rspecifier_; - std::string archive_rxfilename_; - RspecifierOptions opts_; - - enum { // [The state of the reading process] [does holder_ [is input_ - // have object] open] - kUninitialized, // Uninitialized or closed no no - kNoObject, // Do not have object in holder_ no yes - kHaveObject, // Have object in holder_ yes yes - kEof, // End of file no yes - kError, // Some kind of error-state in the reading. no yes - } state_; - -}; - - -// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is the -// implementation for random-access reading of archives when both the archive, -// and the calling code, are in sorted order (i.e. we ask for the keys in sorted -// order). This is when the s and cs options are both given. It only ever has -// to keep one object in memory. It inherits from -// RandomAccessTableReaderArchiveImplBase which implements the common parts of -// RandomAccessTableReader that are used when it's an archive we're reading from. - -template<class Holder> class RandomAccessTableReaderDSortedArchiveImpl: - public RandomAccessTableReaderArchiveImplBase<Holder> { - using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; - using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; - using RandomAccessTableReaderArchiveImplBase<Holder>::kError; - using RandomAccessTableReaderArchiveImplBase<Holder>::state_; - using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; - using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; - using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; - using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; - using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; - using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; - public: - typedef typename Holder::T T; - - RandomAccessTableReaderDSortedArchiveImpl() { } - - virtual bool Close() { - // We don't have anything additional to clean up, so just - // call generic base-class one. - return this->CloseInternal(); - } - - virtual bool HasKey(const std::string &key) { - return FindKeyInternal(key); - } - virtual const T & Value(const std::string &key) { - if (FindKeyInternal(key)) { - KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_ - && holder_ != NULL); - return this->holder_->Value(); - } else { - KALDI_ERR << "Value() called but no such key " << key - << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. - } - } - - virtual ~RandomAccessTableReaderDSortedArchiveImpl() { - if (this->IsOpen()) - if (!Close()) // more specific warning will already have been printed. - // we are in some kind of error state & user did not find out by - // calling Close(). - KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " - << rspecifier_; - } - private: - // FindKeyInternal tries to find the key by calling "ReadNextObject()" - // as many times as necessary till we get to it. It is called from - // both FindKey and Value(). - bool FindKeyInternal(const std::string &key) { - // First check that the user is calling us right: should be - // in sorted order. If not, error. - if (!last_requested_key_.empty()) { - if (key.compare(last_requested_key_) < 0) { // key < last_requested_key_ - KALDI_ERR << "You provided the \"cs\" option " - << "but are not calling with keys in sorted order: " - << key << " < " << last_requested_key_ << ": rspecifier is " - << rspecifier_; - } - } - // last_requested_key_ is just for debugging of order of calling. - last_requested_key_ = key; - - if (state_ == kNoObject) - ReadNextObject(); // This can only happen - // once, the first time someone calls HasKey() or Value(). We don't - // do it in the initializer to stop the program hanging too soon, - // if reading from a pipe. - - if (state_ == kEof || state_ == kError) return false; - - if (state_ == kUninitialized) - KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open."; - - std::string last_key_; // To check that - // the archive we're reading is in sorted order. - while (1) { - KALDI_ASSERT(state_ == kHaveObject); - int compare = key.compare(cur_key_); - if (compare == 0) { // key == key_ - return true; // we got it.. - } else if (compare < 0) { // key < cur_key_, so we already read past the - // place where we want to be. This implies that we will never find it - // [due to the sorting etc., this means it just isn't in the archive]. - return false; - } else { // compare > 0, key > cur_key_. We need to read further ahead. - last_key_ = cur_key_; - // read next object.. we have to set state to kNoObject first. - KALDI_ASSERT(holder_ != NULL); - delete holder_; - holder_ = NULL; - state_ = kNoObject; - ReadNextObject(); - if (state_ != kHaveObject) - return false; // eof or read error. - if (cur_key_.compare(last_key_) <= 0) { - KALDI_ERR << "You provided the \"s\" option " - << " (sorted order), but keys are out of order or duplicated: " - << last_key_ << " is followed by " << cur_key_ - << ": rspecifier is " << rspecifier_; - } - } - } - } - - /// Last string provided to HasKey() or Value(); - std::string last_requested_key_; - - -}; - -// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of -// archives when the user specified the sorted (s) option but not the -// called-sorted (cs) options. -template<class Holder> class RandomAccessTableReaderSortedArchiveImpl: - public RandomAccessTableReaderArchiveImplBase<Holder> { - using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; - using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; - using RandomAccessTableReaderArchiveImplBase<Holder>::kError; - using RandomAccessTableReaderArchiveImplBase<Holder>::state_; - using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; - using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; - using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; - using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; - using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; - using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; - - public: - typedef typename Holder::T T; - - RandomAccessTableReaderSortedArchiveImpl(): - last_found_index_(static_cast<size_t>(-1)), - pending_delete_(static_cast<size_t>(-1)) { } - - virtual bool Close() { - for (size_t i = 0; i < seen_pairs_.size(); i++) - if (seen_pairs_[i].second) - delete seen_pairs_[i].second; - seen_pairs_.clear(); - - pending_delete_ = static_cast<size_t>(-1); - last_found_index_ = static_cast<size_t>(-1); - - return this->CloseInternal(); - } - virtual bool HasKey(const std::string &key) { - HandlePendingDelete(); - size_t index; - bool ans = FindKeyInternal(key, &index); - if (ans && opts_.once && seen_pairs_[index].second == NULL) { - // Just do a check RE the once option. "&&opts_.once" is for - // efficiency since this can only happen in that case. - KALDI_ERR << "Error: HasKey called after Value() already called for " - << " that key, and once (o) option specified: rspecifier is " - << rspecifier_; - } - return ans; - } - virtual const T & Value(const std::string &key) { - HandlePendingDelete(); - size_t index; - if (FindKeyInternal(key, &index)) { - if (seen_pairs_[index].second == NULL) { // can happen if opts.once_ - KALDI_ERR << "Error: Value() called more than once for key " - << key << " and once (o) option specified: rspecifier is " - << rspecifier_; - } - if (opts_.once) - pending_delete_ = index; // mark this index to be deleted on next call. - return seen_pairs_[index].second->Value(); - } else { - KALDI_ERR << "Value() called but no such key " << key - << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. - } - } - virtual ~RandomAccessTableReaderSortedArchiveImpl() { - if (this->IsOpen()) - if (!Close()) // more specific warning will already have been printed. - // we are in some kind of error state & user did not find out by - // calling Close(). - KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " - << rspecifier_; - } - private: - void HandlePendingDelete() { - const size_t npos = static_cast<size_t>(-1); - if (pending_delete_ != npos) { - KALDI_ASSERT(pending_delete_ < seen_pairs_.size()); - KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL); - delete seen_pairs_[pending_delete_].second; - seen_pairs_[pending_delete_].second = NULL; - pending_delete_ = npos; - } - } - - // FindKeyInternal tries to find the key in the array "seen_pairs_". - // If it is not already there, it reads ahead as far as necessary - // to determine whether we have the key or not. On success it returns - // true and puts the index into the array seen_pairs_, into "index"; - // on failure it returns false. - // It will leave the state as either kNoObject, kEof or kError. - // FindKeyInternal does not do any checking about whether you are asking - // about a key that has been already given (with the "once" option). - // That is the user's responsibility. - - bool FindKeyInternal(const std::string &key, size_t *index) { - // First, an optimization in case the previous call was for the - // same key, and we found it. - if (last_found_index_ < seen_pairs_.size() - && seen_pairs_[last_found_index_].first == key) { - *index = last_found_index_; - return true; - } - - if (state_ == kUninitialized) - KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open."; - - // Step one is to see whether we have to read ahead for the object.. - // Note, the possible states right now are kNoObject, kEof or kError. - // We are never in the state kHaveObject except just after calling - // ReadNextObject(). - bool looped = false; - while (state_ == kNoObject && - (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) { - looped = true; - // Read this as: - // while ( the stream is potentially good for reading && - // ([got no keys] || key > most_recent_key) ) { ... - // Try to read a new object. - // Note that the keys in seen_pairs_ are ordered from least to greatest. - ReadNextObject(); - if (state_ == kHaveObject) { // Successfully read object. - if (!seen_pairs_.empty() && // This is just a check. - cur_key_.compare(seen_pairs_.back().first) <= 0) { - // read the expression above as: !( cur_key_ > previous_key). - // it means we are not in sorted order [the user specified that we - // are, or we would not be using this implementation]. - KALDI_ERR << "You provided the sorted (s) option but keys in archive " - << PrintableRxfilename(archive_rxfilename_) << " are not " - << "in sorted order: " << seen_pairs_.back().first - << " is followed by " << cur_key_; - } - KALDI_ASSERT(holder_ != NULL); - seen_pairs_.push_back(std::make_pair(cur_key_, holder_)); - holder_ = NULL; - state_ = kNoObject; - } - } - if (looped) { // We only need to check the last element of the seen_pairs_ array, - // since we would not have read more after getting "key". - if (!seen_pairs_.empty() && seen_pairs_.back().first == key) { - last_found_index_ = *index = seen_pairs_.size() - 1; - return true; - } else return false; - } - // Now we have do an actual binary search in the seen_pairs_ array. - std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL)); - typename std::vector<std::pair<std::string, Holder*> >::iterator - iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(), - pr, PairCompare()); - if (iter != seen_pairs_.end() && - key == iter->first) { - last_found_index_ = *index = (iter - seen_pairs_.begin()); - return true; - } else return false; - } - - // These are the pairs of (key, object) we have read. We keep all the keys we - // have read but the actual objects (if they are stored with pointers inside - // the Holder object) may be deallocated if once == true, and the Holder - // pointer set to NULL. - std::vector<std::pair<std::string, Holder*> > seen_pairs_; - size_t last_found_index_; // An optimization s.t. if FindKeyInternal called twice with - // same key (as it often will), it doesn't have to do the key search twice. - size_t pending_delete_; // If opts_.once == true, this is the index of - // element of seen_pairs_ that is pending deletion. - struct PairCompare { - // PairCompare is the Less-than operator for the pairs of(key, Holder). - // compares the keys. - inline bool operator() (const std::pair<std::string, Holder*> &pr1, - const std::pair<std::string, Holder*> &pr2) { - return (pr1.first.compare(pr2.first) < 0); - } - }; -}; - - - -// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of -// archives when the user does not specify the sorted (s) option (in this case -// the called-sorted, or "cs" option, is ignored). This is the least efficient -// of the random access archive readers, in general, but it can be as efficient -// as the others, in speed, memory and latency, if the "once" option is specified -// and it happens that the keys of the archive are the same as the keys the code -// is called with (to HasKey() and Value()), and in the same order. However, if -// you ask it for a key that's not present it will have to read the archive till -// the end and store it all in memory. - -template<class Holder> class RandomAccessTableReaderUnsortedArchiveImpl: - public RandomAccessTableReaderArchiveImplBase<Holder> { - using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; - using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; - using RandomAccessTableReaderArchiveImplBase<Holder>::kError; - using RandomAccessTableReaderArchiveImplBase<Holder>::state_; - using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; - using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; - using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; - using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; - using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; - using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; - - typedef typename Holder::T T; - - public: - RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()), - to_delete_iter_valid_(false) - { - map_.max_load_factor(0.5); // make it quite empty -> quite efficient. - // default seems to be 1. - } - - virtual bool Close() { - for (typename MapType::iterator iter = map_.begin(); - iter != map_.end(); - ++iter) { - if (iter->second) - delete iter->second; - } - map_.clear(); - first_deleted_string_ = ""; - to_delete_iter_valid_ = false; - return this->CloseInternal(); - } - - virtual bool HasKey(const std::string &key) { - HandlePendingDelete(); - return FindKeyInternal(key, NULL); - } - virtual const T & Value(const std::string &key) { - HandlePendingDelete(); - const T *ans_ptr = NULL; - if (FindKeyInternal(key, &ans_ptr)) - return *ans_ptr; - else - KALDI_ERR << "Value() called but no such key " << key - << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. - } - virtual ~RandomAccessTableReaderUnsortedArchiveImpl() { - if (this->IsOpen()) - if (!Close()) // more specific warning will already have been printed. - // we are in some kind of error state & user did not find out by - // calling Close(). - KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " - << rspecifier_; - } - private: - void HandlePendingDelete() { - if (to_delete_iter_valid_) { - to_delete_iter_valid_ = false; - delete to_delete_iter_->second; // Delete Holder object. - if (first_deleted_string_.length() == 0) - first_deleted_string_ = to_delete_iter_->first; - map_.erase(to_delete_iter_); // delete that element. - } - } - - // FindKeyInternal tries to find the key in the map "map_" - // If it is not already there, it reads ahead either until it finds the - // key, or until end of file. If called with value_ptr == NULL, - // it assumes it's called from HasKey() and just returns true or false - // and doesn't otherwise have side effects. If called with value_ptr != - // NULL, it assumes it's called from Value(). Thus, it will crash - // if it cannot find the key. If it can find it it puts its address in - // *value_ptr, and if opts_once == true it will mark that element of the - // map to be deleted. - - bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) { - typename MapType::iterator iter = map_.find(key); - if (iter != map_.end()) { // Found in the map... - if (value_ptr == NULL) { // called from HasKey - return true; // this is all we have to do. - } else { - *value_ptr = &(iter->second->Value()); - if (opts_.once) { // value won't be needed again, so mark - // for deletion. - to_delete_iter_ = iter; // pending delete. - KALDI_ASSERT(!to_delete_iter_valid_); - to_delete_iter_valid_ = true; - } - return true; - } - } - while (state_ == kNoObject) { - ReadNextObject(); - if (state_ == kHaveObject) { // Successfully read object. - state_ = kNoObject; // we are about to transfer ownership - // of the object in holder_ to map_. - // Insert it into map_. - std::pair<typename MapType::iterator, bool> pr = - map_.insert(typename MapType::value_type(cur_key_, holder_)); - - if (!pr.second) { // Was not inserted-- previous element w/ same key - delete holder_; // map was not changed, no ownership transferred. - holder_ = NULL; - KALDI_ERR << "Error in RandomAccessTableReader: duplicate key " - << cur_key_ << " in archive " << archive_rxfilename_; - } - holder_ = NULL; // ownership transferred to map_. - if (cur_key_ == key) { // the one we wanted.. - if (value_ptr == NULL) { // called from HasKey - return true; - } else { // called from Value() - *value_ptr = &(pr.first->second->Value()); // this gives us the - // Value() from the Holder in the map. - if (opts_.once) { // mark for deletion, as won't be needed again. - to_delete_iter_ = pr.first; - KALDI_ASSERT(!to_delete_iter_valid_); - to_delete_iter_valid_ = true; - } - return true; - } - } - } - } - if (opts_.once && key == first_deleted_string_) { - KALDI_ERR << "You specified the once (o) option but " - << "you are calling using key " << key - << " more than once: rspecifier is " << rspecifier_; - } - return false; // We read the entire archive (or got to error state) and didn't - // find it. - } - - typedef unordered_map<std::string, Holder*, StringHasher> MapType; - MapType map_; - - typename MapType::iterator to_delete_iter_; - bool to_delete_iter_valid_; - - std::string first_deleted_string_; // keep the first string we deleted - // from map_ (if opts_.once == true). It's for an inexact spot-check that the - // "once" option isn't being used incorrectly. - -}; - - - - - -template<class Holder> -RandomAccessTableReader<Holder>::RandomAccessTableReader(const std::string &rspecifier): - impl_(NULL) { - if (rspecifier != "" && !Open(rspecifier)) - KALDI_ERR << "Error opening RandomAccessTableReader object " - " (rspecifier is: " << rspecifier << ")"; -} - -template<class Holder> -bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) { - if (IsOpen()) - KALDI_ERR << "Already open."; - RspecifierOptions opts; - RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts); - switch (rs) { - case kScriptRspecifier: - impl_ = new RandomAccessTableReaderScriptImpl<Holder>(); - break; - case kArchiveRspecifier: - if (opts.sorted) { - if (opts.called_sorted) // "doubly" sorted case. - impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>(); - else - impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>(); - } else impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>(); - break; - case kNoRspecifier: default: - KALDI_WARN << "Invalid rspecifier: " - << rspecifier; - return false; - } - if (impl_->Open(rspecifier)) - return true; - else { - // Warning will already have been printed. - delete impl_; - impl_ = NULL; - return false; - } -} - -template<class Holder> -bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) { - CheckImpl(); - if (!IsToken(key)) - KALDI_ERR << "Invalid key \"" << key << '"'; - return impl_->HasKey(key); -} - - -template<class Holder> -const typename RandomAccessTableReader<Holder>::T& -RandomAccessTableReader<Holder>::Value(const std::string &key) { - CheckImpl(); - return impl_->Value(key); -} - -template<class Holder> -bool RandomAccessTableReader<Holder>::Close() { - CheckImpl(); - bool ans =impl_->Close(); - delete impl_; - impl_ = NULL; - return ans; -} - -template<class Holder> -RandomAccessTableReader<Holder>::~RandomAccessTableReader() { - if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown. - KALDI_ERR << "failure detected in destructor."; -} - -template<class Holder> -void SequentialTableReader<Holder>::CheckImpl() const { - if (!impl_) { - KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you " - << "passed the empty string as an argument to a program?)"; - } -} - -template<class Holder> -void RandomAccessTableReader<Holder>::CheckImpl() const { - if (!impl_) { - KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you " - << "passed the empty string as an argument to a program?)"; - } -} - -template<class Holder> -void TableWriter<Holder>::CheckImpl() const { - if (!impl_) { - KALDI_ERR << "Trying to use empty TableWriter (perhaps you " - << "passed the empty string as an argument to a program?)"; - } -} - -template<class Holder> -RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped( - const std::string &table_rxfilename, - const std::string &utt2spk_rxfilename): - reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" : - utt2spk_rxfilename), - utt2spk_rxfilename_(utt2spk_rxfilename) { } - -template<class Holder> -bool RandomAccessTableReaderMapped<Holder>::Open( - const std::string &table_rxfilename, - const std::string &utt2spk_rxfilename) { - if (reader_.IsOpen()) reader_.Close(); - if (token_reader_.IsOpen()) token_reader_.Close(); - KALDI_ASSERT(!table_rxfilename.empty()); - if (!reader_.Open(table_rxfilename)) return false; // will have printed - // warning internally, probably. - if (!utt2spk_rxfilename.empty()) { - if (!token_reader_.Open(utt2spk_rxfilename)) { - reader_.Close(); - return false; - } - } - return true; -} - - -template<class Holder> -bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) { - // We don't check IsOpen, we let the call go through to the member variable - // (reader_), which will crash with a more informative error message than - // we can give here, as we don't any longer know the rxfilename. - if (token_reader_.IsOpen()) { // We need to map the key from utt to spk. - if (!token_reader_.HasKey(utt)) - KALDI_ERR << "Attempting to read key " << utt << ", which is not present " - << "in utt2spk map or similar map being read from " - << PrintableRxfilename(utt2spk_rxfilename_); - const std::string &spk = token_reader_.Value(utt); - return reader_.HasKey(spk); - } else { - return reader_.HasKey(utt); - } -} - -template<class Holder> -const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value( - const std::string &utt) { - if (token_reader_.IsOpen()) { // We need to map the key from utt to spk. - if (!token_reader_.HasKey(utt)) - KALDI_ERR << "Attempting to read key " << utt << ", which is not present " - << "in utt2spk map or similar map being read from " - << PrintableRxfilename(utt2spk_rxfilename_); - const std::string &spk = token_reader_.Value(utt); - return reader_.Value(spk); - } else { - return reader_.Value(utt); - } -} - - - -/// @} - -} // end namespace kaldi - - - -#endif |