diff options
author | Determinant <[email protected]> | 2015-08-14 11:51:42 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-08-14 11:51:42 +0800 |
commit | 96a32415ab43377cf1575bd3f4f2980f58028209 (patch) | |
tree | 30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/kaldi/util/kaldi-table-inl.h | |
parent | c177a7549bd90670af4b29fa813ddea32cfe0f78 (diff) |
add implementation for kaldi io (by ymz)
Diffstat (limited to 'kaldi_io/src/kaldi/util/kaldi-table-inl.h')
-rw-r--r-- | kaldi_io/src/kaldi/util/kaldi-table-inl.h | 2246 |
1 files changed, 2246 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/util/kaldi-table-inl.h b/kaldi_io/src/kaldi/util/kaldi-table-inl.h new file mode 100644 index 0000000..6b73c88 --- /dev/null +++ b/kaldi_io/src/kaldi/util/kaldi-table-inl.h @@ -0,0 +1,2246 @@ +// util/kaldi-table-inl.h + +// Copyright 2009-2011 Microsoft Corporation +// 2013 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_ +#define KALDI_UTIL_KALDI_TABLE_INL_H_ + +#include <algorithm> +#include "util/kaldi-io.h" +#include "util/text-utils.h" +#include "util/stl-utils.h" // for StringHasher. + + +namespace kaldi { + +/// \addtogroup table_impl_types +/// @{ + +template<class Holder> class SequentialTableReaderImplBase { + public: + typedef typename Holder::T T; + // note that Open takes rxfilename not rspecifier. + virtual bool Open(const std::string &rxfilename) = 0; + virtual bool Done() const = 0; + virtual bool IsOpen() const = 0; + virtual std::string Key() = 0; + virtual const T &Value() = 0; + virtual void FreeCurrent() = 0; + virtual void Next() = 0; + virtual bool Close() = 0; + SequentialTableReaderImplBase() { } + virtual ~SequentialTableReaderImplBase() { } + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase); +}; + + +// This is the implementation for SequentialTableReader +// when it's actually a script file. +template<class Holder> class SequentialTableReaderScriptImpl: + public SequentialTableReaderImplBase<Holder> { + public: + typedef typename Holder::T T; + + SequentialTableReaderScriptImpl(): state_(kUninitialized) { } + + virtual bool Open(const std::string &rspecifier) { + if (state_ != kUninitialized) + if (! Close()) // call Close() yourself to suppress this exception. + KALDI_ERR << "TableReader::Open, error closing previous input: " + << "rspecifier was " << rspecifier_; + bool binary; + rspecifier_ = rspecifier; + RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_, + &opts_); + KALDI_ASSERT(rs == kScriptRspecifier); + if (!script_input_.Open(script_rxfilename_, &binary)) { // Failure on Open + KALDI_WARN << "Failed to open script file " + << PrintableRxfilename(script_rxfilename_); + state_ = kUninitialized; + return false; + } else { // Open succeeded. + if (binary) { // script file should not be binary file.. + state_ = kError; // bad script file. + script_input_.Close(); + return false; + } else { + state_ = kFileStart; + Next(); + if (state_ == kError) { + script_input_.Close(); + return false; + } + if (opts_.permissive) { // Next() will have preloaded. + KALDI_ASSERT(state_ == kLoadSucceeded || state_ == kEof); + } else { + KALDI_ASSERT(state_ == kHaveScpLine || state_ == kEof); + } + return true; // Success. + } + } + } + + virtual bool IsOpen() const { + switch (state_) { + case kEof: case kError: case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: return true; + case kUninitialized: return false; + default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid + // state for user to call something on. + return false; + } + } + + virtual bool Done() const { + switch (state_) { + case kHaveScpLine: return false; + case kLoadSucceeded: case kLoadFailed: return false; + // These cases are because we want LoadCurrent() + // to be callable after Next() and to not change the Done() status [only Next() should change + // the Done() status]. + case kEof: case kError: return true; // Error condition, like Eof, counts as Done(); the destructor + // or Close() will inform the user of the error. + default: KALDI_ERR << "Done() called on TableReader object at the wrong time."; + return false; + } + } + + virtual std::string Key() { + // Valid to call this whenever Done() returns false. + switch (state_) { + case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: break; + default: + // coding error. + KALDI_ERR << "Key() called on TableReader object at the wrong time."; + } + return key_; + } + const T &Value() { + StateType orig_state = state_; + if (state_ == kHaveScpLine) LoadCurrent(); // Takes + // state_ to kLoadSucceeded or kLoadFailed. + if (state_ == kLoadFailed) { // this can happen due to + // a file listed in an scp file not existing, or + // read failure, failure of a command, etc. + if (orig_state == kHaveScpLine) + KALDI_ERR << "TableReader: failed to load object from " + << PrintableRxfilename(data_rxfilename_) + << " (to suppress this error, add the permissive " + << "(p, ) option to the rspecifier."; + + else // orig_state_ was kLoadFailed, which only could have happened + // if the user called FreeCurrent(). + KALDI_ERR << "TableReader: you called Value() after FreeCurrent()."; + } else if (state_ != kLoadSucceeded) { + // This would be a coding error. + KALDI_ERR << "TableReader: Value() called at the wrong time."; + } + return holder_.Value(); + } + void FreeCurrent() { + if (state_ == kLoadSucceeded) { + holder_.Clear(); + state_ = kLoadFailed; + } else { + KALDI_WARN << "TableReader: FreeCurrent called at the wrong time."; + } + } + void Next() { + while (1) { + NextScpLine(); + if (Done()) return; + if (opts_.permissive) { + // Permissive mode means, when reading scp files, we treat keys whose scp entry + // cannot be read as nonexistent. This means trying to read. + if (LoadCurrent()) return; // Success. + // else try the next scp line. + } else { + return; // We go the next key; Value() will crash if we can't + // read the scp line. + } + } + } + + virtual bool Close() { + // Close() will succeed if the stream was not in an error + // state. To clean up, it also closes the Input objects if + // they're open. + if (script_input_.IsOpen()) + script_input_.Close(); + if (data_input_.IsOpen()) + data_input_.Close(); + if (state_ == kLoadSucceeded) + holder_.Clear(); + if (!this->IsOpen()) + KALDI_ERR << "Close() called on input that was not open."; + StateType old_state = state_; + state_ = kUninitialized; + if (old_state == kError) { + if (opts_.permissive) { + KALDI_WARN << "Close() called on scp file with read error, ignoring the " + "error because permissive mode specified."; + return true; + } else return false; // User will do something with the error status. + } else return true; + } + + virtual ~SequentialTableReaderScriptImpl() { + if (state_ == kError) + KALDI_ERR << "TableReader: reading script file failed: from scp " + << PrintableRxfilename(script_rxfilename_); + // If you don't want this exception to be thrown you can + // call Close() and check the status. + if (state_ == kLoadSucceeded) + holder_.Clear(); + } + private: + bool LoadCurrent() { + // Attempts to load object whose rxfilename is on the current scp line. + if (state_ != kHaveScpLine) + KALDI_ERR << "TableReader: LoadCurrent() called at the wrong time."; + bool ans; + // note, NULL means it doesn't read the binary-mode header + if (Holder::IsReadInBinary()) ans = data_input_.Open(data_rxfilename_, NULL); + else ans = data_input_.OpenTextMode(data_rxfilename_); + if (!ans) { + // May want to make this warning a VLOG at some point + KALDI_WARN << "TableReader: failed to open file " + << PrintableRxfilename(data_rxfilename_); + state_ = kLoadFailed; + return false; + } else { + if (holder_.Read(data_input_.Stream())) { + state_ = kLoadSucceeded; + return true; + } else { // holder_ will not contain data. + KALDI_WARN << "TableReader: failed to load object from " + << PrintableRxfilename(data_rxfilename_); + state_ = kLoadFailed; + return false; + } + } + } + + // Reads the next line in the script file. + void NextScpLine() { + switch (state_) { + case kLoadSucceeded: holder_.Clear(); break; + case kHaveScpLine: case kLoadFailed: case kFileStart: break; + default: + // No other states are valid to call Next() from. + KALDI_ERR << "Reading script file: Next called wrongly."; + } + std::string line; + if (getline(script_input_.Stream(), line)) { + SplitStringOnFirstSpace(line, &key_, &data_rxfilename_); + if (!key_.empty() && !data_rxfilename_.empty()) { + // Got a valid line. + state_ = kHaveScpLine; + } else { + // Got an invalid line. + state_ = kError; // we can't make sense of this + // scp file and will now die. + } + } else { + state_ = kEof; // nothing more in the scp file. + // Might as well close the input streams as don't need them. + script_input_.Close(); + if (data_input_.IsOpen()) + data_input_.Close(); + } + } + + + Input script_input_; // Input object for the .scp file + Input data_input_; // Input object for the entries in + // the script file. + Holder holder_; // Holds the object. + bool binary_; // Binary-mode archive. + std::string key_; + std::string rspecifier_; + std::string script_rxfilename_; // of the script file. + RspecifierOptions opts_; // options. + std::string data_rxfilename_; // of the file we're reading. + enum StateType { + // [The state of the reading process] [does holder_ [is script_inp_ + // have object] open] + kUninitialized, // Uninitialized or closed. no no + kEof, // We did Next() and found eof in script file. no no + kError, // Some other error no yes + kHaveScpLine, // Just called Open() or Next() and have a no yes + // line of the script file but no data. + kLoadSucceeded, // Called LoadCurrent() and it succeeded. yes yes + kLoadFailed, // Called LoadCurrent() and it failed, no yes + // or the user called FreeCurrent().. note, + // if when called by user we are in this state, + // it means the user called FreeCurrent(). + kFileStart, // [state we only use internally] no yes + } state_; + private: +}; + + +// This is the implementation for SequentialTableReader +// when it's an archive. Note that the archive format is: +// key1 [space] object1 key2 [space] +// object2 ... eof. +// "object1" is the output of the Holder::Write function and will +// typically contain a binary header (in binary mode) and then +// the output of object.Write(os, binary). +// The archive itself does not care whether it is in binary +// or text mode, for reading purposes. + +template<class Holder> class SequentialTableReaderArchiveImpl: + public SequentialTableReaderImplBase<Holder> { + public: + typedef typename Holder::T T; + + SequentialTableReaderArchiveImpl(): state_(kUninitialized) { } + + virtual bool Open(const std::string &rspecifier) { + if (state_ != kUninitialized) { + if (! Close()) { // call Close() yourself to suppress this exception. + if (opts_.permissive) + KALDI_WARN << "TableReader::Open, error closing previous input " + "(only warning, since permissive mode)."; + else + KALDI_ERR << "TableReader::Open, error closing previous input."; + } + } + rspecifier_ = rspecifier; + RspecifierType rs = ClassifyRspecifier(rspecifier, + &archive_rxfilename_, + &opts_); + KALDI_ASSERT(rs == kArchiveRspecifier); + + bool ans; + // NULL means don't expect binary-mode header + if (Holder::IsReadInBinary()) + ans = input_.Open(archive_rxfilename_, NULL); + else + ans = input_.OpenTextMode(archive_rxfilename_); + if (!ans) { // header. + KALDI_WARN << "TableReader: failed to open stream " + << PrintableRxfilename(archive_rxfilename_); + state_ = kUninitialized; // Failure on Open + return false; // User should print the error message. + } + state_ = kFileStart; + Next(); + if (state_ == kError) { + KALDI_WARN << "Error beginning to read archive file (wrong filename?): " + << PrintableRxfilename(archive_rxfilename_); + input_.Close(); + state_ = kUninitialized; + return false; + } + KALDI_ASSERT(state_ == kHaveObject || state_ == kEof); + return true; + } + + virtual void Next() { + switch (state_) { + case kHaveObject: + holder_.Clear(); break; + case kFileStart: case kFreedObject: + break; + default: + KALDI_ERR << "TableReader: Next() called wrongly."; + } + std::istream &is = input_.Stream(); + is.clear(); // Clear any fail bits that may have been set... just in case + // this happened in the Read function. + is >> key_; // This eats up any leading whitespace and gets the string. + if (is.eof()) { + state_ = kEof; + return; + } + if (is.fail()) { // This shouldn't really happen, barring file-system errors. + KALDI_WARN << "Error reading archive " + << PrintableRxfilename(archive_rxfilename_); + state_ = kError; + return; + } + int c; + if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key. + // We also allow tab [which is consumed] and newline [which is not], just + // so we can read archives generated by scripts that may not be fully + // aware of how this format works. + KALDI_WARN << "Invalid archive file format: expected space after key " + << key_ << ", got character " + << CharToString(static_cast<char>(is.peek())) << ", reading " + << PrintableRxfilename(archive_rxfilename_); + state_ = kError; + return; + } + if (c != '\n') is.get(); // Consume the space or tab. + if (holder_.Read(is)) { + state_ = kHaveObject; + return; + } else { + KALDI_WARN << "Object read failed, reading archive " + << PrintableRxfilename(archive_rxfilename_); + state_ = kError; + return; + } + } + + virtual bool IsOpen() const { + switch (state_) { + case kEof: case kError: case kHaveObject: case kFreedObject: return true; + case kUninitialized: return false; + default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid + // state for user to call something on. + return false; + } + } + + virtual bool Done() const { + switch (state_) { + case kHaveObject: + return false; + case kEof: case kError: + return true; // Error-state counts as Done(), but destructor + // will fail (unless you check the status with Close()). + default: + KALDI_ERR << "Done() called on TableReader object at the wrong time."; + return false; + } + } + + virtual std::string Key() { + // Valid to call this whenever Done() returns false + switch (state_) { + case kHaveObject: break; // only valid case. + default: + // coding error. + KALDI_ERR << "Key() called on TableReader object at the wrong time."; + } + return key_; + } + const T &Value() { + switch (state_) { + case kHaveObject: + break; // only valid case. + default: + // coding error. + KALDI_ERR << "Value() called on TableReader object at the wrong time."; + } + return holder_.Value(); + } + virtual void FreeCurrent() { + if (state_ == kHaveObject) { + holder_.Clear(); + state_ = kFreedObject; + } else + KALDI_WARN << "TableReader: FreeCurernt called at the wrong time."; + } + + virtual bool Close() { + if (! this->IsOpen()) + KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly."; + if (input_.IsOpen()) + input_.Close(); + if (state_ == kHaveObject) + holder_.Clear(); + bool ans; + if (opts_.permissive) { + ans = true; // always return success. + if (state_ == kError) + KALDI_WARN << "Error detected closing TableReader for archive " + << PrintableRxfilename(archive_rxfilename_) << " but ignoring " + << "it as permissive mode specified."; + } else + ans = (state_ != kError); // If error state, user should detect it. + state_ = kUninitialized; + return ans; + } + + virtual ~SequentialTableReaderArchiveImpl() { + if (state_ == kError) { + if (opts_.permissive) + KALDI_WARN << "Error detected closing TableReader for archive " + << PrintableRxfilename(archive_rxfilename_) << " but ignoring " + << "it as permissive mode specified."; + else + KALDI_ERR << "TableReader: error detected closing archive " + << PrintableRxfilename(archive_rxfilename_); + } + // If you don't want this exception to be thrown you can + // call Close() and check the status. + if (state_ == kHaveObject) + holder_.Clear(); + } + private: + Input input_; // Input object for the archive + Holder holder_; // Holds the object. + std::string key_; + std::string rspecifier_; + std::string archive_rxfilename_; + RspecifierOptions opts_; + enum { // [The state of the reading process] [does holder_ [is input_ + // have object] open] + kUninitialized, // Uninitialized or closed. no no + kFileStart, // [state we use internally: just opened.] no yes + kEof, // We did Next() and found eof in archive no no + kError, // Some other error no no + kHaveObject, // We read the key and the object after it. yes yes + kFreedObject, // The user called FreeCurrent(). no yes + } state_; +}; + + +template<class Holder> +SequentialTableReader<Holder>::SequentialTableReader(const std::string &rspecifier): impl_(NULL) { + if (rspecifier != "" && !Open(rspecifier)) + KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier; +} + +template<class Holder> +bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) { + if (IsOpen()) + if (!Close()) + KALDI_ERR << "Could not close previously open object."; + // now impl_ will be NULL. + + RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, NULL); + switch (wt) { + case kArchiveRspecifier: + impl_ = new SequentialTableReaderArchiveImpl<Holder>(); + break; + case kScriptRspecifier: + impl_ = new SequentialTableReaderScriptImpl<Holder>(); + break; + case kNoRspecifier: default: + KALDI_WARN << "Invalid rspecifier " << rspecifier; + return false; + } + if (!impl_->Open(rspecifier)) { + delete impl_; + impl_ = NULL; + return false; // sub-object will have printed warnings. + } + else return true; +} + +template<class Holder> +bool SequentialTableReader<Holder>::Close() { + CheckImpl(); + bool ans = impl_->Close(); + delete impl_; // We don't keep around empty impl_ objects. + impl_ = NULL; + return ans; +} + + +template<class Holder> +bool SequentialTableReader<Holder>::IsOpen() const { + return (impl_ != NULL); // Because we delete the object whenever + // that object is not open. Thus, the IsOpen functions of the + // Impl objects are not really needed. +} + +template<class Holder> +std::string SequentialTableReader<Holder>::Key() { + CheckImpl(); + return impl_->Key(); // this call may throw if called wrongly in other ways, + // e.g. eof. +} + + +template<class Holder> +void SequentialTableReader<Holder>::FreeCurrent() { + CheckImpl(); + impl_->FreeCurrent(); +} + + +template<class Holder> +const typename SequentialTableReader<Holder>::T & +SequentialTableReader<Holder>::Value() { + CheckImpl(); + return impl_->Value(); // This may throw (if LoadCurrent() returned false you are safe.). +} + + +template<class Holder> +void SequentialTableReader<Holder>::Next() { + CheckImpl(); + impl_->Next(); +} + +template<class Holder> +bool SequentialTableReader<Holder>::Done() { + CheckImpl(); + return impl_->Done(); +} + + +template<class Holder> +SequentialTableReader<Holder>::~SequentialTableReader() { + if (impl_) delete impl_; + // Destructor of impl_ may throw. +} + + + +template<class Holder> class TableWriterImplBase { + public: + typedef typename Holder::T T; + + virtual bool Open(const std::string &wspecifier) = 0; + + // Write returns true on success, false on failure, but + // some errors may not be detected until we call Close(). + // It throws (via KALDI_ERR) if called wrongly. We could + // have just thrown on all errors, since this is what + // TableWriter does; it was designed this way because originally + // TableWriter::Write returned an exit status. + virtual bool Write(const std::string &key, const T &value) = 0; + + // Flush will flush any archive; it does not return error status, + // any errors will be reported on the next Write or Close. + virtual void Flush() = 0; + + virtual bool Close() = 0; + + virtual bool IsOpen() const = 0; + + // May throw on write error if Close was not called. + virtual ~TableWriterImplBase() { } + + TableWriterImplBase() { } + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase); +}; + + +// The implementation of TableWriter we use when writing directly +// to an archive with no associated scp. +template<class Holder> +class TableWriterArchiveImpl: public TableWriterImplBase<Holder> { + public: + typedef typename Holder::T T; + + virtual bool Open(const std::string &wspecifier) { + switch (state_) { + case kUninitialized: + break; + case kWriteError: + KALDI_ERR << "TableWriter: opening stream, already open with write error."; + case kOpen: default: + if (!Close()) // throw because this error may not have been previously + // detected by the user. + KALDI_ERR << "TableWriter: opening stream, error closing previously open stream."; + } + wspecifier_ = wspecifier; + WspecifierType ws = ClassifyWspecifier(wspecifier, + &archive_wxfilename_, + NULL, + &opts_); + KALDI_ASSERT(ws == kArchiveWspecifier); // or wrongly called. + + if (output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header. + state_ = kOpen; + return true; + } else { + // stream will not be open. User will report this error + // (we return bool), so don't bother printing anything. + state_ = kUninitialized; + return false; + } + } + + virtual bool IsOpen() const { + switch (state_) { + case kUninitialized: return false; + case kOpen: case kWriteError: return true; + default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state."; + } + return false; + } + + // Write returns true on success, false on failure, but + // some errors may not be detected till we call Close(). + virtual bool Write(const std::string &key, const T &value) { + switch (state_) { + case kOpen: break; + case kWriteError: + // user should have known from the last + // call to Write that there was a problem. + KALDI_WARN << "TableWriter: attempting to write to invalid stream."; + return false; + case kUninitialized: default: + KALDI_ERR << "TableWriter: Write called on invalid stream"; + + } + // state is now kOpen or kWriteError. + if (!IsToken(key)) // e.g. empty string or has spaces... + KALDI_ERR << "TableWriter: using invalid key " << key; + output_.Stream() << key << ' '; + if (!Holder::Write(output_.Stream(), opts_.binary, value)) { + KALDI_WARN << "TableWriter: write failure to " + << PrintableWxfilename(archive_wxfilename_); + state_ = kWriteError; + return false; + } + if (state_ == kWriteError) return false; // Even if this Write seems to have + // succeeded, we fail because a previous Write failed and the archive may be + // corrupted and unreadable. + + if (opts_.flush) + Flush(); + return true; + } + + // Flush will flush any archive; it does not return error status, + // any errors will be reported on the next Write or Close. + virtual void Flush() { + switch (state_) { + case kWriteError: case kOpen: + output_.Stream().flush(); // Don't check error status. + return; + default: + KALDI_WARN << "TableWriter: Flush called on not-open writer."; + } + } + + virtual bool Close() { + if (!this->IsOpen() || !output_.IsOpen()) + KALDI_ERR << "TableWriter: Close called on a stream that was not open." << this->IsOpen() << ", " << output_.IsOpen(); + bool close_success = output_.Close(); + if (!close_success) { + KALDI_WARN << "TableWriter: error closing stream: wspecifier is " + << wspecifier_; + state_ = kUninitialized; + return false; + } + if (state_ == kWriteError) { + KALDI_WARN << "TableWriter: closing writer in error state: wspecifier is " + << wspecifier_; + state_ = kUninitialized; + return false; + } + state_ = kUninitialized; + return true; + } + + TableWriterArchiveImpl(): state_(kUninitialized) {} + + // May throw on write error if Close was not called. + virtual ~TableWriterArchiveImpl() { + if (!IsOpen()) return; + else if (!Close()) + KALDI_ERR << "At TableWriter destructor: Write failed or stream close " + << "failed: wspecifier is "<< wspecifier_; + } + + private: + Output output_; + WspecifierOptions opts_; + std::string wspecifier_; + std::string archive_wxfilename_; + enum { // is stream open? + kUninitialized, // no + kOpen, // yes + kWriteError, // yes + } state_; +}; + + + + +// The implementation of TableWriter we use when writing to +// individual files (more generally, wxfilenames) specified +// in an scp file that we read. + +// Note: the code for this class is similar to RandomAccessTableReaderScriptImpl; +// try to keep them in sync. + +template<class Holder> +class TableWriterScriptImpl: public TableWriterImplBase<Holder> { + public: + typedef typename Holder::T T; + + TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {} + + virtual bool Open(const std::string &wspecifier) { + switch (state_) { + case kReadScript: + KALDI_ERR << " Opening already open TableWriter: call Close first."; + case kUninitialized: case kNotReadScript: + break; + } + wspecifier_ = wspecifier; + WspecifierType ws = ClassifyWspecifier(wspecifier, + NULL, + &script_rxfilename_, + &opts_); + KALDI_ASSERT(ws == kScriptWspecifier); // or wrongly called. + KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point. + + if (! ReadScriptFile(script_rxfilename_, + true, // print any warnings + &script_)) { // error reading script file or invalid format + state_ = kNotReadScript; + return false; // no need to print further warnings. user gets the error. + } + std::sort(script_.begin(), script_.end()); + for (size_t i = 0; i+1 < script_.size(); i++) { + if (script_[i].first.compare(script_[i+1].first) >= 0) { + // script[i] not < script[i+1] in lexical order... + KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_) + << " contains duplicate key " << script_[i].first; + state_ = kNotReadScript; + return false; + } + } + state_ = kReadScript; + return true; + } + + virtual bool IsOpen() const { return (state_ == kReadScript); } + + virtual bool Close() { + if (!IsOpen()) + KALDI_ERR << "Close() called on TableWriter that was not open."; + state_ = kUninitialized; + last_found_ = 0; + script_.clear(); + return true; + } + + // Write returns true on success, false on failure, but + // some errors may not be detected till we call Close(). + virtual bool Write(const std::string &key, const T &value) { + if (!IsOpen()) + KALDI_ERR << "TableWriter: Write called on invalid stream"; + + if (!IsToken(key)) // e.g. empty string or has spaces... + KALDI_ERR << "TableWriter: using invalid key " << key; + + std::string wxfilename; + if (!LookupFilename(key, &wxfilename)) { + if (opts_.permissive) { + return true; // In permissive mode, it's as if we're writing to /dev/null + // for missing keys. + } else { + KALDI_WARN << "TableWriter: script file " + << PrintableRxfilename(script_rxfilename_) + << " has no entry for key "<<key; + return false; + } + } + Output output; + if (!output.Open(wxfilename, opts_.binary, false)) { + // Open in the text/binary mode (on Windows) given by member var. "binary" + // (obtained from wspecifier), but do not put the binary-mode header (it + // will be written, if needed, by the Holder::Write function.) + KALDI_WARN << "TableWriter: failed to open stream: " + << PrintableWxfilename(wxfilename); + return false; + } + if (!Holder::Write(output.Stream(), opts_.binary, value) + || !output.Close()) { + KALDI_WARN << "TableWriter: failed to write data to " + << PrintableWxfilename(wxfilename); + return false; + } + return true; + } + + // Flush does nothing in this implementation, there is nothing to flush. + virtual void Flush() { } + + + virtual ~TableWriterScriptImpl() { + // Nothing to do in destructor. + } + + private: + // Note: this function is almost the same as in RandomAccessTableReaderScriptImpl. + bool LookupFilename(const std::string &key, std::string *wxfilename) { + // First, an optimization: if we're going consecutively, this will + // make the lookup very fast. + last_found_++; + if (last_found_ < script_.size() && script_[last_found_].first == key) { + *wxfilename = script_[last_found_].second; + return true; + } + std::pair<std::string, std::string> pr(key, ""); // Important that "" + // compares less than or equal to any string, so lower_bound points to the + // element that has the same key. + typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator + IterType; + IterType iter = std::lower_bound(script_.begin(), script_.end(), pr); + if (iter != script_.end() && iter->first == key) { + last_found_ = iter - script_.begin(); + *wxfilename = iter->second; + return true; + } else { + return false; + } + } + + + WspecifierOptions opts_; + std::string wspecifier_; + std::string script_rxfilename_; + + // the script_ variable contains pairs of (key, filename), sorted using + // std::sort. This can be used with binary_search to look up filenames for + // writing. If this becomes inefficient we can use std::unordered_map (but I + // suspect this wouldn't be significantly faster & would use more memory). + // If memory becomes a problem here, the user should probably be passing + // only the relevant part of the scp file rather than expecting us to get too + // clever in the code. + std::vector<std::pair<std::string, std::string> > script_; + size_t last_found_; // This is for an optimization used in LookupFilename. + + enum { + kUninitialized, + kReadScript, + kNotReadScript, // read of script failed. + } state_; +}; + + +// The implementation of TableWriter we use when writing directly +// to an archive plus an associated scp. +template<class Holder> +class TableWriterBothImpl: public TableWriterImplBase<Holder> { + public: + typedef typename Holder::T T; + + virtual bool Open(const std::string &wspecifier) { + switch (state_) { + case kUninitialized: + break; + case kWriteError: + KALDI_ERR << "TableWriter: opening stream, already open with write error."; + case kOpen: default: + if (!Close()) // throw because this error may not have been previously detected by user. + KALDI_ERR << "TableWriter: opening stream, error closing previously open stream."; + } + wspecifier_ = wspecifier; + WspecifierType ws = ClassifyWspecifier(wspecifier, + &archive_wxfilename_, + &script_wxfilename_, + &opts_); + KALDI_ASSERT(ws == kBothWspecifier); // or wrongly called. + if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput) + KALDI_WARN << "When writing to both archive and script, the script file " + "will generally not be interpreted correctly unless the archive is " + "an actual file: wspecifier = " << wspecifier; + + if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header. + state_ = kUninitialized; + return false; + } + if (!script_output_.Open(script_wxfilename_, false, false)) { // first false means text mode: + // script files always text-mode. second false means don't write header (doesn't matter + // for text mode). + archive_output_.Close(); // Don't care about status: error anyway. + state_ = kUninitialized; + return false; + } + state_ = kOpen; + return true; + } + + virtual bool IsOpen() const { + switch (state_) { + case kUninitialized: return false; + case kOpen: case kWriteError: return true; + default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state."; + } + return false; + } + + void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const { + std::ostringstream ss; + ss << ':' << streampos; + KALDI_ASSERT(ss.str() != ":-1"); + *output = archive_wxfilename_ + ss.str(); + + // e.g. /some/file:12302. + // Note that we warned if archive_wxfilename_ is not an actual filename; + // the philosophy is we give the user rope and if they want to hang + // themselves, with it, fine. + } + + // Write returns true on success, false on failure, but + // some errors may not be detected till we call Close(). + virtual bool Write(const std::string &key, const T &value) { + switch (state_) { + case kOpen: break; + case kWriteError: + // user should have known from the last + // call to Write that there was a problem. Warn about it. + KALDI_WARN << "TableWriter: writing to non-open TableWriter object."; + return false; + case kUninitialized: default: + KALDI_ERR << "TableWriter: Write called on invalid stream"; + } + // state is now kOpen or kWriteError. + if (!IsToken(key)) // e.g. empty string or has spaces... + KALDI_ERR << "TableWriter: using invalid key " << key; + std::ostream &archive_os = archive_output_.Stream(); + archive_os << key << ' '; + typename std::ostream::pos_type archive_os_pos = archive_os.tellp(); + // position at start of Write() to archive. We will record this in the script file. + std::string offset_rxfilename; // rxfilename with offset into the archive, + // e.g. some_archive_name.ark:431541423 + MakeFilename(archive_os_pos, &offset_rxfilename); + + // Write to the script file first. + // The idea is that we want to get all the information possible into the + // script file, to make it easier to unwind errors later. + std::ostream &script_os = script_output_.Stream(); + script_output_.Stream() << key << ' ' << offset_rxfilename << '\n'; + + if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) { + KALDI_WARN << "TableWriter: write failure to" + << PrintableWxfilename(archive_wxfilename_); + state_ = kWriteError; + return false; + } + + if (script_os.fail()) { + KALDI_WARN << "TableWriter: write failure to script file detected: " + << PrintableWxfilename(script_wxfilename_); + state_ = kWriteError; + return false; + } + + if (archive_os.fail()) { + KALDI_WARN << "TableWriter: write failure to archive file detected: " + << PrintableWxfilename(archive_wxfilename_); + state_ = kWriteError; + return false; + } + + if (state_ == kWriteError) return false; // Even if this Write seems to have + // succeeded, we fail because a previous Write failed and the archive may be + // corrupted and unreadable. + + if (opts_.flush) + Flush(); + return true; + } + + // Flush will flush any archive; it does not return error status, + // any errors will be reported on the next Write or Close. + virtual void Flush() { + switch (state_) { + case kWriteError: case kOpen: + archive_output_.Stream().flush(); // Don't check error status. + script_output_.Stream().flush(); // Don't check error status. + return; + default: + KALDI_WARN << "TableWriter: Flush called on not-open writer."; + } + } + + virtual bool Close() { + if (!this->IsOpen()) + KALDI_ERR << "TableWriter: Close called on a stream that was not open."; + bool close_success = true; + if (archive_output_.IsOpen()) + if (!archive_output_.Close()) close_success = false; + if (script_output_.IsOpen()) + if (!script_output_.Close()) close_success = false; + bool ans = close_success && (state_ != kWriteError); + state_ = kUninitialized; + return ans; + } + + TableWriterBothImpl(): state_(kUninitialized) {} + + // May throw on write error if Close() was not called. + // User can get the error status by calling Close(). + virtual ~TableWriterBothImpl() { + if (!IsOpen()) return; + else if (!Close()) + KALDI_ERR << "At TableWriter destructor: Write failed or stream close failed: " + << wspecifier_; + } + + private: + Output archive_output_; + Output script_output_; + WspecifierOptions opts_; + std::string archive_wxfilename_; + std::string script_wxfilename_; + std::string wspecifier_; + enum { // is stream open? + kUninitialized, // no + kOpen, // yes + kWriteError, // yes + } state_; +}; + + +template<class Holder> +TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) { + if (wspecifier != "" && !Open(wspecifier)) { + KALDI_ERR << "TableWriter: failed to write to " + << wspecifier; + } +} + +template<class Holder> +bool TableWriter<Holder>::IsOpen() const { + return (impl_ != NULL); +} + + +template<class Holder> +bool TableWriter<Holder>::Open(const std::string &wspecifier) { + + if (IsOpen()) { + if (!Close()) // call Close() yourself to suppress this exception. + KALDI_ERR << "TableWriter::Open, failed to close previously open writer."; + } + KALDI_ASSERT(impl_ == NULL); + WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL); + switch (wtype) { + case kBothWspecifier: + impl_ = new TableWriterBothImpl<Holder>(); + break; + case kArchiveWspecifier: + impl_ = new TableWriterArchiveImpl<Holder>(); + break; + case kScriptWspecifier: + impl_ = new TableWriterScriptImpl<Holder>(); + break; + case kNoWspecifier: default: + KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier; + return false; + } + if (impl_->Open(wspecifier)) return true; + else { // The class will have printed a more specific warning. + delete impl_; + impl_ = NULL; + return false; + } +} + +template<class Holder> +void TableWriter<Holder>::Write(const std::string &key, + const T &value) const { + CheckImpl(); + if (!impl_->Write(key, value)) + KALDI_ERR << "Error in TableWriter::Write"; + // More specific warning will have + // been printed in the Write function. +} + +template<class Holder> +void TableWriter<Holder>::Flush() { + CheckImpl(); + impl_->Flush(); +} + +template<class Holder> +bool TableWriter<Holder>::Close() { + CheckImpl(); + bool ans = impl_->Close(); + delete impl_; // We don't keep around non-open impl_ objects [c.f. definition of IsOpen()] + impl_ = NULL; + return ans; +} + +template<class Holder> +TableWriter<Holder>::~TableWriter() { + if (IsOpen() && !Close()) { + KALDI_ERR << "Error closing TableWriter [in destructor]."; + } +} + + +// Types of RandomAccessTableReader: +// In principle, we would like to have four types of RandomAccessTableReader: +// the 4 combinations [scp, archive], [seekable, not-seekable], +// where if something is seekable we only store a file offset. However, +// it seems sufficient for now to only implement two of these, in both +// cases assuming it's not seekable so we never store file offsets and always +// store either the scp line or the data in the archive. The reasons are: +// (1) +// For scp files, storing the actual entry is not that much more expensive +// than storing the file offsets (since the entries are just filenames), and +// avoids a lot of fseek operations that might be expensive. +// (2) +// For archive files, there is no real reason, if you have the archive file +// on disk somewhere, why you wouldn't access it via its associated scp. +// [i.e. write it as ark, scp]. The main reason to read archives directly +// is if they are part of a pipe, and in this case it's not seekable, so +// we implement only this case. +// +// Note that we will rarely in practice have to keep in memory everything in +// the archive, as long as things are only read once from the archive (the +// "o, " or "once" option) and as long as we keep our keys in sorted order; to take +// advantage of this we need the "s, " (sorted) option, so we would read archives +// as e.g. "s, o, ark:-" (this is the rspecifier we would use if it was the +// standard input and these conditions held). + +template<class Holder> class RandomAccessTableReaderImplBase { + public: + typedef typename Holder::T T; + + virtual bool Open(const std::string &rspecifier) = 0; + + virtual bool HasKey(const std::string &key) = 0; + + virtual const T &Value(const std::string &key) = 0; + + virtual bool Close() = 0; + + virtual ~RandomAccessTableReaderImplBase() {} +}; + + +// Implementation of RandomAccessTableReader for a script file; for simplicity we +// just read it in all in one go, as it's unlikely someone would generate this +// from a pipe. In principle we could read it on-demand as for the archives, but +// this would probably be overkill. + +// Note: the code for this this class is similar to TableWriterScriptImpl: +// try to keep them in sync. +template<class Holder> +class RandomAccessTableReaderScriptImpl: + public RandomAccessTableReaderImplBase<Holder> { + + public: + typedef typename Holder::T T; + + RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {} + + virtual bool Open(const std::string &rspecifier) { + switch (state_) { + case kNotHaveObject: case kHaveObject: case kGaveObject: + KALDI_ERR << " Opening already open RandomAccessTableReader: call Close first."; + case kUninitialized: case kNotReadScript: + break; + } + rspecifier_ = rspecifier; + RspecifierType rs = ClassifyRspecifier(rspecifier, + &script_rxfilename_, + &opts_); + KALDI_ASSERT(rs == kScriptRspecifier); // or wrongly called. + KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point. + + if (! ReadScriptFile(script_rxfilename_, + true, // print any warnings + &script_)) { // error reading script file or invalid format + state_ = kNotReadScript; + return false; // no need to print further warnings. user gets the error. + } + + rspecifier_ = rspecifier; + // If opts_.sorted, the user has asserted that the keys are already sorted. + // Although we could easily sort them, we want to let the user know of this + // mistake. This same mistake could have serious effects if used with an + // archive rather than a script. + if (!opts_.sorted) + std::sort(script_.begin(), script_.end()); + for (size_t i = 0; i+1 < script_.size(); i++) { + if (script_[i].first.compare(script_[i+1].first) >= 0) { + // script[i] not < script[i+1] in lexical order... + bool same = (script_[i].first == script_[i+1].first); + KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_) + << (same ? " contains duplicate key: " : + " is not sorted (remove s, option or add ns, option): key is ") + << script_[i].first; + state_ = kNotReadScript; + return false; + } + } + state_ = kNotHaveObject; + return true; + } + + virtual bool IsOpen() const { + return (state_ == kNotHaveObject || state_ == kHaveObject || + state_ == kGaveObject); + } + + virtual bool Close() { + if (!IsOpen()) + KALDI_ERR << "Close() called on RandomAccessTableReader that was not open."; + holder_.Clear(); + state_ = kUninitialized; + last_found_ = 0; + script_.clear(); + current_key_ = ""; + // This one cannot fail because any errors of a "global" + // nature would have been detected when we did Open(). + // With archives it's different. + return true; + } + + virtual bool HasKey(const std::string &key) { + bool preload = opts_.permissive; + // In permissive mode, we have to check that we can read + // the scp entry before we assert that the key is there. + return HasKeyInternal(key, preload); + } + + + // Write returns true on success, false on failure, but + // some errors may not be detected till we call Close(). + virtual const T& Value(const std::string &key) { + + if (!IsOpen()) + KALDI_ERR << "Value() called on non-open object."; + + if (!((state_ == kHaveObject || state_ == kGaveObject) + && key == current_key_)) { // Not already stored... + bool has_key = HasKeyInternal(key, true); // preload. + if (!has_key) + KALDI_ERR << "Could not get item for key " << key + << ", rspecifier is " << rspecifier_ << "[to ignore this, " + << "add the p, (permissive) option to the rspecifier."; + KALDI_ASSERT(state_ == kHaveObject && key == current_key_); + } + + if (state_ == kHaveObject) { + state_ = kGaveObject; + if (opts_.once) MakeTombstone(key); // make sure that future lookups fail. + return holder_.Value(); + } else { // state_ == kGaveObject + if (opts_.once) + KALDI_ERR << "Value called twice for the same key and ,o (once) option " + << "is used: rspecifier is " << rspecifier_; + return holder_.Value(); + } + } + + virtual ~RandomAccessTableReaderScriptImpl() { + if (state_ == kHaveObject || state_ == kGaveObject) + holder_.Clear(); + } + + private: + // HasKeyInternal when called with preload == false just tells us whether the + // key is in the scp. With preload == true, which happens when the ,p + // (permissive) option is given in the rspecifier, it will also check that we + // can preload the object from disk (loading from the rxfilename in the scp), + // and only return true if we can. This function is called both from HasKey + // and from Value(). + virtual bool HasKeyInternal(const std::string &key, bool preload) { + switch (state_) { + case kUninitialized: case kNotReadScript: + KALDI_ERR << "HasKey called on RandomAccessTableReader object that is not open."; + case kHaveObject: case kGaveObject: + if (key == current_key_) + return true; + break; + default: break; + } + KALDI_ASSERT(IsToken(key)); + size_t key_pos = 0; // set to zero to suppress warning + bool ans = LookupKey(key, &key_pos); + if (!ans) return false; + else { + // First do a check regarding the "once" option. + if (opts_.once && script_[key_pos].second == "") { // A "tombstone"; user is asking about + // already-read key. + KALDI_ERR << "HasKey called on key whose value was already read, and " + " you specified the \"once\" option (o, ): try removing o, or adding no, :" + " rspecifier is " << rspecifier_; + } + if (!preload) + return true; // we have the key. + else { // preload specified, so we have to pre-load the object before returning true. + if (!input_.Open(script_[key_pos].second)) { + KALDI_WARN << "Error opening stream " + << PrintableRxfilename(script_[key_pos].second); + return false; + } else { + // Make sure holder empty. + if (state_ == kHaveObject || state_ == kGaveObject) + holder_.Clear(); + if (holder_.Read(input_.Stream())) { + state_ = kHaveObject; + current_key_ = key; + return true; + } else { + KALDI_WARN << "Error reading object from " + "stream " << PrintableRxfilename(script_[key_pos].second); + state_ = kNotHaveObject; + return false; + } + } + } + } + } + void MakeTombstone(const std::string &key) { + size_t offset; + if (!LookupKey(key, &offset)) + KALDI_ERR << "RandomAccessTableReader object in inconsistent state."; + else + script_[offset].second = ""; + } + bool LookupKey(const std::string &key, size_t *script_offset) { + // First, an optimization: if we're going consecutively, this will + // make the lookup very fast. Since we may call HasKey and then + // Value(), which both may look up the key, we test if either the + // current or next position are correct. + if (last_found_ < script_.size() && script_[last_found_].first == key) { + *script_offset = last_found_; + return true; + } + last_found_++; + if (last_found_ < script_.size() && script_[last_found_].first == key) { + *script_offset = last_found_; + return true; + } + std::pair<std::string, std::string> pr(key, ""); // Important that "" + // compares less than or equal to any string, so lower_bound points to the + // element that has the same key. + typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator + IterType; + IterType iter = std::lower_bound(script_.begin(), script_.end(), pr); + if (iter != script_.end() && iter->first == key) { + last_found_ = *script_offset = iter - script_.begin(); + return true; + } else { + return false; + } + } + + + Input input_; // Use the same input_ object for reading each file, in case + // the scp specifies offsets in an archive (so we can keep the same file open). + RspecifierOptions opts_; + std::string rspecifier_; // rspecifier used to open it; used in debug messages + std::string script_rxfilename_; // filename of script. + + std::string current_key_; // Key of object in holder_ + Holder holder_; + + // the script_ variable contains pairs of (key, filename), sorted using + // std::sort. This can be used with binary_search to look up filenames for + // writing. If this becomes inefficient we can use std::unordered_map (but I + // suspect this wouldn't be significantly faster & would use more memory). + // If memory becomes a problem here, the user should probably be passing + // only the relevant part of the scp file rather than expecting us to get too + // clever in the code. + std::vector<std::pair<std::string, std::string> > script_; + size_t last_found_; // This is for an optimization used in FindFilename. + + enum { // [Do we have [Does holder_ + // script_ set up?] contain object?] + kUninitialized, // no no + kNotReadScript, // no no + kNotHaveObject, // yes no + kHaveObject, // yes yes + kGaveObject, // yes yes + // [kGaveObject is as kHaveObject but we note that the + // user has already read it; this is for checking that + // if "once" is specified, the user actually only reads + // it once. + } state_; + +}; + + + + +// This is the base-class (with some implemented functions) for the +// implementations of RandomAccessTableReader when it's an archive. This +// base-class handles opening the files, storing the state of the reading +// process, and loading objects. This is the only case in which we have +// an intermediate class in the hierarchy between the virtual ImplBase +// class and the actual Impl classes. +// The child classes vary in the assumptions regarding sorting, etc. + +template<class Holder> class RandomAccessTableReaderArchiveImplBase: + public RandomAccessTableReaderImplBase<Holder> { + public: + typedef typename Holder::T T; + + RandomAccessTableReaderArchiveImplBase(): holder_(NULL), state_(kUninitialized) { } + + virtual bool Open(const std::string &rspecifier) { + if (state_ != kUninitialized) { + if (! this->Close()) // call Close() yourself to suppress this exception. + KALDI_ERR << "TableReader::Open, error closing previous input."; + } + rspecifier_ = rspecifier; + RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_, + &opts_); + KALDI_ASSERT(rs == kArchiveRspecifier); + + // NULL means don't expect binary-mode header + bool ans; + if (Holder::IsReadInBinary()) + ans = input_.Open(archive_rxfilename_, NULL); + else + ans = input_.OpenTextMode(archive_rxfilename_); + if (!ans) { // header. + KALDI_WARN << "TableReader: failed to open stream " + << PrintableRxfilename(archive_rxfilename_); + state_ = kUninitialized; // Failure on Open + return false; // User should print the error message. + } else { + state_ = kNoObject; + } + return true; + } + + // ReadNextObject() requires that the state be kNoObject, + // and it will try read the next object. If it succeeds, + // it sets the state to kHaveObject, and + // cur_key_ and holder_ have the key and value. If it fails, + // it sets the state to kError or kEof. + void ReadNextObject() { + if (state_ != kNoObject) + KALDI_ERR << "TableReader: ReadNextObject() called from wrong state."; // Code error + // somewhere in this class or a child class. + std::istream &is = input_.Stream(); + is.clear(); // Clear any fail bits that may have been set... just in case + // this happened in the Read function. + is >> cur_key_; // This eats up any leading whitespace and gets the string. + if (is.eof()) { + state_ = kEof; + return; + } + if (is.fail()) { // This shouldn't really happen, barring file-system errors. + KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_; + state_ = kError; + return; + } + int c; + if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key. + // We also allow tab, just so we can read archives generated by scripts that may + // not be fully aware of how this format works. + KALDI_WARN << "Invalid archive file format: expected space after key " <<cur_key_ + <<", got character " + << CharToString(static_cast<char>(is.peek())) << ", reading archive " + << PrintableRxfilename(archive_rxfilename_); + state_ = kError; + return; + } + if (c != '\n') is.get(); // Consume the space or tab. + holder_ = new Holder; + if (holder_->Read(is)) { + state_ = kHaveObject; + return; + } else { + KALDI_WARN << "Object read failed, reading archive " + << PrintableRxfilename(archive_rxfilename_); + state_ = kError; + delete holder_; + holder_ = NULL; + return; + } + } + + virtual bool IsOpen() const { + switch (state_) { + case kEof: case kError: case kHaveObject: case kNoObject: return true; + case kUninitialized: return false; + default: KALDI_ERR << "IsOpen() called on invalid object."; + return false; + } + } + + // Called by the child-class virutal Close() functions; does the + // shared parts of the cleanup. + bool CloseInternal() { + if (! this->IsOpen()) + KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly."; + if (input_.IsOpen()) + input_.Close(); + if (state_ == kHaveObject) { + KALDI_ASSERT(holder_ != NULL); + delete holder_; + holder_ = NULL; + } else KALDI_ASSERT(holder_ == NULL); + bool ans = (state_ != kError); + state_ = kUninitialized; + if (!ans && opts_.permissive) { + KALDI_WARN << "Error state detected closing reader. " + << "Ignoring it because you specified permissive mode."; + return true; + } + return ans; + } + + ~RandomAccessTableReaderArchiveImplBase() { + // The child class has the responsibility to call CloseInternal(). + KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL); + } + private: + Input input_; // Input object for the archive + protected: + // The variables below are accessed by child classes. + + std::string cur_key_; // current key (if state == kHaveObject). + Holder *holder_; // Holds the object we just read (if state == kHaveObject) + + std::string rspecifier_; + std::string archive_rxfilename_; + RspecifierOptions opts_; + + enum { // [The state of the reading process] [does holder_ [is input_ + // have object] open] + kUninitialized, // Uninitialized or closed no no + kNoObject, // Do not have object in holder_ no yes + kHaveObject, // Have object in holder_ yes yes + kEof, // End of file no yes + kError, // Some kind of error-state in the reading. no yes + } state_; + +}; + + +// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is the +// implementation for random-access reading of archives when both the archive, +// and the calling code, are in sorted order (i.e. we ask for the keys in sorted +// order). This is when the s and cs options are both given. It only ever has +// to keep one object in memory. It inherits from +// RandomAccessTableReaderArchiveImplBase which implements the common parts of +// RandomAccessTableReader that are used when it's an archive we're reading from. + +template<class Holder> class RandomAccessTableReaderDSortedArchiveImpl: + public RandomAccessTableReaderArchiveImplBase<Holder> { + using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; + using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; + using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; + using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; + using RandomAccessTableReaderArchiveImplBase<Holder>::kError; + using RandomAccessTableReaderArchiveImplBase<Holder>::state_; + using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; + using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; + using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; + using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; + using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; + using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; + public: + typedef typename Holder::T T; + + RandomAccessTableReaderDSortedArchiveImpl() { } + + virtual bool Close() { + // We don't have anything additional to clean up, so just + // call generic base-class one. + return this->CloseInternal(); + } + + virtual bool HasKey(const std::string &key) { + return FindKeyInternal(key); + } + virtual const T & Value(const std::string &key) { + if (FindKeyInternal(key)) { + KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_ + && holder_ != NULL); + return this->holder_->Value(); + } else { + KALDI_ERR << "Value() called but no such key " << key + << " in archive " << PrintableRxfilename(archive_rxfilename_); + return *(const T*)NULL; // keep compiler happy. + } + } + + virtual ~RandomAccessTableReaderDSortedArchiveImpl() { + if (this->IsOpen()) + if (!Close()) // more specific warning will already have been printed. + // we are in some kind of error state & user did not find out by + // calling Close(). + KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " + << rspecifier_; + } + private: + // FindKeyInternal tries to find the key by calling "ReadNextObject()" + // as many times as necessary till we get to it. It is called from + // both FindKey and Value(). + bool FindKeyInternal(const std::string &key) { + // First check that the user is calling us right: should be + // in sorted order. If not, error. + if (!last_requested_key_.empty()) { + if (key.compare(last_requested_key_) < 0) { // key < last_requested_key_ + KALDI_ERR << "You provided the \"cs\" option " + << "but are not calling with keys in sorted order: " + << key << " < " << last_requested_key_ << ": rspecifier is " + << rspecifier_; + } + } + // last_requested_key_ is just for debugging of order of calling. + last_requested_key_ = key; + + if (state_ == kNoObject) + ReadNextObject(); // This can only happen + // once, the first time someone calls HasKey() or Value(). We don't + // do it in the initializer to stop the program hanging too soon, + // if reading from a pipe. + + if (state_ == kEof || state_ == kError) return false; + + if (state_ == kUninitialized) + KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open."; + + std::string last_key_; // To check that + // the archive we're reading is in sorted order. + while (1) { + KALDI_ASSERT(state_ == kHaveObject); + int compare = key.compare(cur_key_); + if (compare == 0) { // key == key_ + return true; // we got it.. + } else if (compare < 0) { // key < cur_key_, so we already read past the + // place where we want to be. This implies that we will never find it + // [due to the sorting etc., this means it just isn't in the archive]. + return false; + } else { // compare > 0, key > cur_key_. We need to read further ahead. + last_key_ = cur_key_; + // read next object.. we have to set state to kNoObject first. + KALDI_ASSERT(holder_ != NULL); + delete holder_; + holder_ = NULL; + state_ = kNoObject; + ReadNextObject(); + if (state_ != kHaveObject) + return false; // eof or read error. + if (cur_key_.compare(last_key_) <= 0) { + KALDI_ERR << "You provided the \"s\" option " + << " (sorted order), but keys are out of order or duplicated: " + << last_key_ << " is followed by " << cur_key_ + << ": rspecifier is " << rspecifier_; + } + } + } + } + + /// Last string provided to HasKey() or Value(); + std::string last_requested_key_; + + +}; + +// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of +// archives when the user specified the sorted (s) option but not the +// called-sorted (cs) options. +template<class Holder> class RandomAccessTableReaderSortedArchiveImpl: + public RandomAccessTableReaderArchiveImplBase<Holder> { + using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; + using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; + using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; + using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; + using RandomAccessTableReaderArchiveImplBase<Holder>::kError; + using RandomAccessTableReaderArchiveImplBase<Holder>::state_; + using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; + using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; + using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; + using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; + using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; + using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; + + public: + typedef typename Holder::T T; + + RandomAccessTableReaderSortedArchiveImpl(): + last_found_index_(static_cast<size_t>(-1)), + pending_delete_(static_cast<size_t>(-1)) { } + + virtual bool Close() { + for (size_t i = 0; i < seen_pairs_.size(); i++) + if (seen_pairs_[i].second) + delete seen_pairs_[i].second; + seen_pairs_.clear(); + + pending_delete_ = static_cast<size_t>(-1); + last_found_index_ = static_cast<size_t>(-1); + + return this->CloseInternal(); + } + virtual bool HasKey(const std::string &key) { + HandlePendingDelete(); + size_t index; + bool ans = FindKeyInternal(key, &index); + if (ans && opts_.once && seen_pairs_[index].second == NULL) { + // Just do a check RE the once option. "&&opts_.once" is for + // efficiency since this can only happen in that case. + KALDI_ERR << "Error: HasKey called after Value() already called for " + << " that key, and once (o) option specified: rspecifier is " + << rspecifier_; + } + return ans; + } + virtual const T & Value(const std::string &key) { + HandlePendingDelete(); + size_t index; + if (FindKeyInternal(key, &index)) { + if (seen_pairs_[index].second == NULL) { // can happen if opts.once_ + KALDI_ERR << "Error: Value() called more than once for key " + << key << " and once (o) option specified: rspecifier is " + << rspecifier_; + } + if (opts_.once) + pending_delete_ = index; // mark this index to be deleted on next call. + return seen_pairs_[index].second->Value(); + } else { + KALDI_ERR << "Value() called but no such key " << key + << " in archive " << PrintableRxfilename(archive_rxfilename_); + return *(const T*)NULL; // keep compiler happy. + } + } + virtual ~RandomAccessTableReaderSortedArchiveImpl() { + if (this->IsOpen()) + if (!Close()) // more specific warning will already have been printed. + // we are in some kind of error state & user did not find out by + // calling Close(). + KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " + << rspecifier_; + } + private: + void HandlePendingDelete() { + const size_t npos = static_cast<size_t>(-1); + if (pending_delete_ != npos) { + KALDI_ASSERT(pending_delete_ < seen_pairs_.size()); + KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL); + delete seen_pairs_[pending_delete_].second; + seen_pairs_[pending_delete_].second = NULL; + pending_delete_ = npos; + } + } + + // FindKeyInternal tries to find the key in the array "seen_pairs_". + // If it is not already there, it reads ahead as far as necessary + // to determine whether we have the key or not. On success it returns + // true and puts the index into the array seen_pairs_, into "index"; + // on failure it returns false. + // It will leave the state as either kNoObject, kEof or kError. + // FindKeyInternal does not do any checking about whether you are asking + // about a key that has been already given (with the "once" option). + // That is the user's responsibility. + + bool FindKeyInternal(const std::string &key, size_t *index) { + // First, an optimization in case the previous call was for the + // same key, and we found it. + if (last_found_index_ < seen_pairs_.size() + && seen_pairs_[last_found_index_].first == key) { + *index = last_found_index_; + return true; + } + + if (state_ == kUninitialized) + KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open."; + + // Step one is to see whether we have to read ahead for the object.. + // Note, the possible states right now are kNoObject, kEof or kError. + // We are never in the state kHaveObject except just after calling + // ReadNextObject(). + bool looped = false; + while (state_ == kNoObject && + (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) { + looped = true; + // Read this as: + // while ( the stream is potentially good for reading && + // ([got no keys] || key > most_recent_key) ) { ... + // Try to read a new object. + // Note that the keys in seen_pairs_ are ordered from least to greatest. + ReadNextObject(); + if (state_ == kHaveObject) { // Successfully read object. + if (!seen_pairs_.empty() && // This is just a check. + cur_key_.compare(seen_pairs_.back().first) <= 0) { + // read the expression above as: !( cur_key_ > previous_key). + // it means we are not in sorted order [the user specified that we + // are, or we would not be using this implementation]. + KALDI_ERR << "You provided the sorted (s) option but keys in archive " + << PrintableRxfilename(archive_rxfilename_) << " are not " + << "in sorted order: " << seen_pairs_.back().first + << " is followed by " << cur_key_; + } + KALDI_ASSERT(holder_ != NULL); + seen_pairs_.push_back(std::make_pair(cur_key_, holder_)); + holder_ = NULL; + state_ = kNoObject; + } + } + if (looped) { // We only need to check the last element of the seen_pairs_ array, + // since we would not have read more after getting "key". + if (!seen_pairs_.empty() && seen_pairs_.back().first == key) { + last_found_index_ = *index = seen_pairs_.size() - 1; + return true; + } else return false; + } + // Now we have do an actual binary search in the seen_pairs_ array. + std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL)); + typename std::vector<std::pair<std::string, Holder*> >::iterator + iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(), + pr, PairCompare()); + if (iter != seen_pairs_.end() && + key == iter->first) { + last_found_index_ = *index = (iter - seen_pairs_.begin()); + return true; + } else return false; + } + + // These are the pairs of (key, object) we have read. We keep all the keys we + // have read but the actual objects (if they are stored with pointers inside + // the Holder object) may be deallocated if once == true, and the Holder + // pointer set to NULL. + std::vector<std::pair<std::string, Holder*> > seen_pairs_; + size_t last_found_index_; // An optimization s.t. if FindKeyInternal called twice with + // same key (as it often will), it doesn't have to do the key search twice. + size_t pending_delete_; // If opts_.once == true, this is the index of + // element of seen_pairs_ that is pending deletion. + struct PairCompare { + // PairCompare is the Less-than operator for the pairs of(key, Holder). + // compares the keys. + inline bool operator() (const std::pair<std::string, Holder*> &pr1, + const std::pair<std::string, Holder*> &pr2) { + return (pr1.first.compare(pr2.first) < 0); + } + }; +}; + + + +// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of +// archives when the user does not specify the sorted (s) option (in this case +// the called-sorted, or "cs" option, is ignored). This is the least efficient +// of the random access archive readers, in general, but it can be as efficient +// as the others, in speed, memory and latency, if the "once" option is specified +// and it happens that the keys of the archive are the same as the keys the code +// is called with (to HasKey() and Value()), and in the same order. However, if +// you ask it for a key that's not present it will have to read the archive till +// the end and store it all in memory. + +template<class Holder> class RandomAccessTableReaderUnsortedArchiveImpl: + public RandomAccessTableReaderArchiveImplBase<Holder> { + using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; + using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; + using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; + using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; + using RandomAccessTableReaderArchiveImplBase<Holder>::kError; + using RandomAccessTableReaderArchiveImplBase<Holder>::state_; + using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; + using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; + using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; + using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; + using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; + using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; + + typedef typename Holder::T T; + + public: + RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()), + to_delete_iter_valid_(false) + { + map_.max_load_factor(0.5); // make it quite empty -> quite efficient. + // default seems to be 1. + } + + virtual bool Close() { + for (typename MapType::iterator iter = map_.begin(); + iter != map_.end(); + ++iter) { + if (iter->second) + delete iter->second; + } + map_.clear(); + first_deleted_string_ = ""; + to_delete_iter_valid_ = false; + return this->CloseInternal(); + } + + virtual bool HasKey(const std::string &key) { + HandlePendingDelete(); + return FindKeyInternal(key, NULL); + } + virtual const T & Value(const std::string &key) { + HandlePendingDelete(); + const T *ans_ptr = NULL; + if (FindKeyInternal(key, &ans_ptr)) + return *ans_ptr; + else + KALDI_ERR << "Value() called but no such key " << key + << " in archive " << PrintableRxfilename(archive_rxfilename_); + return *(const T*)NULL; // keep compiler happy. + } + virtual ~RandomAccessTableReaderUnsortedArchiveImpl() { + if (this->IsOpen()) + if (!Close()) // more specific warning will already have been printed. + // we are in some kind of error state & user did not find out by + // calling Close(). + KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " + << rspecifier_; + } + private: + void HandlePendingDelete() { + if (to_delete_iter_valid_) { + to_delete_iter_valid_ = false; + delete to_delete_iter_->second; // Delete Holder object. + if (first_deleted_string_.length() == 0) + first_deleted_string_ = to_delete_iter_->first; + map_.erase(to_delete_iter_); // delete that element. + } + } + + // FindKeyInternal tries to find the key in the map "map_" + // If it is not already there, it reads ahead either until it finds the + // key, or until end of file. If called with value_ptr == NULL, + // it assumes it's called from HasKey() and just returns true or false + // and doesn't otherwise have side effects. If called with value_ptr != + // NULL, it assumes it's called from Value(). Thus, it will crash + // if it cannot find the key. If it can find it it puts its address in + // *value_ptr, and if opts_once == true it will mark that element of the + // map to be deleted. + + bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) { + typename MapType::iterator iter = map_.find(key); + if (iter != map_.end()) { // Found in the map... + if (value_ptr == NULL) { // called from HasKey + return true; // this is all we have to do. + } else { + *value_ptr = &(iter->second->Value()); + if (opts_.once) { // value won't be needed again, so mark + // for deletion. + to_delete_iter_ = iter; // pending delete. + KALDI_ASSERT(!to_delete_iter_valid_); + to_delete_iter_valid_ = true; + } + return true; + } + } + while (state_ == kNoObject) { + ReadNextObject(); + if (state_ == kHaveObject) { // Successfully read object. + state_ = kNoObject; // we are about to transfer ownership + // of the object in holder_ to map_. + // Insert it into map_. + std::pair<typename MapType::iterator, bool> pr = + map_.insert(typename MapType::value_type(cur_key_, holder_)); + + if (!pr.second) { // Was not inserted-- previous element w/ same key + delete holder_; // map was not changed, no ownership transferred. + holder_ = NULL; + KALDI_ERR << "Error in RandomAccessTableReader: duplicate key " + << cur_key_ << " in archive " << archive_rxfilename_; + } + holder_ = NULL; // ownership transferred to map_. + if (cur_key_ == key) { // the one we wanted.. + if (value_ptr == NULL) { // called from HasKey + return true; + } else { // called from Value() + *value_ptr = &(pr.first->second->Value()); // this gives us the + // Value() from the Holder in the map. + if (opts_.once) { // mark for deletion, as won't be needed again. + to_delete_iter_ = pr.first; + KALDI_ASSERT(!to_delete_iter_valid_); + to_delete_iter_valid_ = true; + } + return true; + } + } + } + } + if (opts_.once && key == first_deleted_string_) { + KALDI_ERR << "You specified the once (o) option but " + << "you are calling using key " << key + << " more than once: rspecifier is " << rspecifier_; + } + return false; // We read the entire archive (or got to error state) and didn't + // find it. + } + + typedef unordered_map<std::string, Holder*, StringHasher> MapType; + MapType map_; + + typename MapType::iterator to_delete_iter_; + bool to_delete_iter_valid_; + + std::string first_deleted_string_; // keep the first string we deleted + // from map_ (if opts_.once == true). It's for an inexact spot-check that the + // "once" option isn't being used incorrectly. + +}; + + + + + +template<class Holder> +RandomAccessTableReader<Holder>::RandomAccessTableReader(const std::string &rspecifier): + impl_(NULL) { + if (rspecifier != "" && !Open(rspecifier)) + KALDI_ERR << "Error opening RandomAccessTableReader object " + " (rspecifier is: " << rspecifier << ")"; +} + +template<class Holder> +bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) { + if (IsOpen()) + KALDI_ERR << "Already open."; + RspecifierOptions opts; + RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts); + switch (rs) { + case kScriptRspecifier: + impl_ = new RandomAccessTableReaderScriptImpl<Holder>(); + break; + case kArchiveRspecifier: + if (opts.sorted) { + if (opts.called_sorted) // "doubly" sorted case. + impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>(); + else + impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>(); + } else impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>(); + break; + case kNoRspecifier: default: + KALDI_WARN << "Invalid rspecifier: " + << rspecifier; + return false; + } + if (impl_->Open(rspecifier)) + return true; + else { + // Warning will already have been printed. + delete impl_; + impl_ = NULL; + return false; + } +} + +template<class Holder> +bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) { + CheckImpl(); + if (!IsToken(key)) + KALDI_ERR << "Invalid key \"" << key << '"'; + return impl_->HasKey(key); +} + + +template<class Holder> +const typename RandomAccessTableReader<Holder>::T& +RandomAccessTableReader<Holder>::Value(const std::string &key) { + CheckImpl(); + return impl_->Value(key); +} + +template<class Holder> +bool RandomAccessTableReader<Holder>::Close() { + CheckImpl(); + bool ans =impl_->Close(); + delete impl_; + impl_ = NULL; + return ans; +} + +template<class Holder> +RandomAccessTableReader<Holder>::~RandomAccessTableReader() { + if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown. + KALDI_ERR << "failure detected in destructor."; +} + +template<class Holder> +void SequentialTableReader<Holder>::CheckImpl() const { + if (!impl_) { + KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you " + << "passed the empty string as an argument to a program?)"; + } +} + +template<class Holder> +void RandomAccessTableReader<Holder>::CheckImpl() const { + if (!impl_) { + KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you " + << "passed the empty string as an argument to a program?)"; + } +} + +template<class Holder> +void TableWriter<Holder>::CheckImpl() const { + if (!impl_) { + KALDI_ERR << "Trying to use empty TableWriter (perhaps you " + << "passed the empty string as an argument to a program?)"; + } +} + +template<class Holder> +RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped( + const std::string &table_rxfilename, + const std::string &utt2spk_rxfilename): + reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" : + utt2spk_rxfilename), + utt2spk_rxfilename_(utt2spk_rxfilename) { } + +template<class Holder> +bool RandomAccessTableReaderMapped<Holder>::Open( + const std::string &table_rxfilename, + const std::string &utt2spk_rxfilename) { + if (reader_.IsOpen()) reader_.Close(); + if (token_reader_.IsOpen()) token_reader_.Close(); + KALDI_ASSERT(!table_rxfilename.empty()); + if (!reader_.Open(table_rxfilename)) return false; // will have printed + // warning internally, probably. + if (!utt2spk_rxfilename.empty()) { + if (!token_reader_.Open(utt2spk_rxfilename)) { + reader_.Close(); + return false; + } + } + return true; +} + + +template<class Holder> +bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) { + // We don't check IsOpen, we let the call go through to the member variable + // (reader_), which will crash with a more informative error message than + // we can give here, as we don't any longer know the rxfilename. + if (token_reader_.IsOpen()) { // We need to map the key from utt to spk. + if (!token_reader_.HasKey(utt)) + KALDI_ERR << "Attempting to read key " << utt << ", which is not present " + << "in utt2spk map or similar map being read from " + << PrintableRxfilename(utt2spk_rxfilename_); + const std::string &spk = token_reader_.Value(utt); + return reader_.HasKey(spk); + } else { + return reader_.HasKey(utt); + } +} + +template<class Holder> +const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value( + const std::string &utt) { + if (token_reader_.IsOpen()) { // We need to map the key from utt to spk. + if (!token_reader_.HasKey(utt)) + KALDI_ERR << "Attempting to read key " << utt << ", which is not present " + << "in utt2spk map or similar map being read from " + << PrintableRxfilename(utt2spk_rxfilename_); + const std::string &spk = token_reader_.Value(utt); + return reader_.Value(spk); + } else { + return reader_.Value(utt); + } +} + + + +/// @} + +} // end namespace kaldi + + + +#endif |