add implementation for kaldi io (by ymz)

author: Determinant <[email protected]> 2015-08-14 11:51:42 +0800
committer: Determinant <[email protected]> 2015-08-14 11:51:42 +0800
commit: 96a32415ab43377cf1575bd3f4f2980f58028209 (patch)
tree: 30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/kaldi/util/kaldi-table-inl.h
parent: c177a7549bd90670af4b29fa813ddea32cfe0f78 (diff)
1 files changed, 2246 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/util/kaldi-table-inl.h b/kaldi_io/src/kaldi/util/kaldi-table-inl.h
new file mode 100644
index 0000000..6b73c88
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-table-inl.h
@@ -0,0 +1,2246 @@
+// util/kaldi-table-inl.h
+
+// Copyright 2009-2011    Microsoft Corporation
+//                2013    Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_
+#define KALDI_UTIL_KALDI_TABLE_INL_H_
+
+#include <algorithm>
+#include "util/kaldi-io.h"
+#include "util/text-utils.h"
+#include "util/stl-utils.h" // for StringHasher.
+
+
+namespace kaldi {
+
+/// \addtogroup table_impl_types
+/// @{
+
+template<class Holder> class SequentialTableReaderImplBase {
+ public:
+  typedef typename Holder::T T;
+  // note that Open takes rxfilename not rspecifier.
+  virtual bool Open(const std::string &rxfilename) = 0;
+  virtual bool Done() const = 0;
+  virtual bool IsOpen() const = 0;
+  virtual std::string Key() = 0;
+  virtual const T &Value() = 0;
+  virtual void FreeCurrent() = 0;
+  virtual void Next() = 0;
+  virtual bool Close() = 0;
+  SequentialTableReaderImplBase() { }
+  virtual ~SequentialTableReaderImplBase() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase);  
+};
+
+
+// This is the implementation for SequentialTableReader
+// when it's actually a script file.
+template<class Holder>  class SequentialTableReaderScriptImpl:
+      public SequentialTableReaderImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  SequentialTableReaderScriptImpl(): state_(kUninitialized) { }
+
+  virtual bool Open(const std::string &rspecifier) {
+    if (state_ != kUninitialized)
+      if (! Close()) // call Close() yourself to suppress this exception.
+        KALDI_ERR << "TableReader::Open, error closing previous input: "
+                  << "rspecifier was " << rspecifier_;
+    bool binary;
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kScriptRspecifier);
+    if (!script_input_.Open(script_rxfilename_, &binary)) {  // Failure on Open
+      KALDI_WARN << "Failed to open script file "
+                 << PrintableRxfilename(script_rxfilename_);
+      state_ = kUninitialized;
+      return false;
+    } else {  // Open succeeded.
+      if (binary) {  // script file should not be binary file..
+        state_ = kError;  // bad script file.
+        script_input_.Close();
+        return false;
+      } else {
+        state_ = kFileStart;
+        Next();
+        if (state_ == kError) {
+          script_input_.Close();
+          return false;
+        }
+        if (opts_.permissive) {  // Next() will have preloaded.
+          KALDI_ASSERT(state_ == kLoadSucceeded || state_ == kEof);
+        } else {
+          KALDI_ASSERT(state_ == kHaveScpLine || state_ == kEof);
+        }
+        return true;  // Success.
+      }
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kEof: case kError: case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: return true;
+      case kUninitialized:  return false;
+      default: KALDI_ERR << "IsOpen() called on invalid object.";  // kFileStart is not valid
+        // state for user to call something on.
+        return false;
+    }
+  }
+
+  virtual bool Done() const {
+    switch (state_) {
+      case kHaveScpLine: return false;
+      case kLoadSucceeded: case kLoadFailed: return false;
+        // These cases are because we want LoadCurrent()
+        // to be callable after Next() and to not change the Done() status [only Next() should change
+        // the Done() status].
+      case kEof: case kError: return true;  // Error condition, like Eof, counts as Done(); the destructor
+        // or Close() will inform the user of the error.
+      default: KALDI_ERR << "Done() called on TableReader object at the wrong time.";
+        return false;
+    }
+  }
+
+  virtual std::string Key() {
+    // Valid to call this whenever Done() returns false.
+    switch (state_) {
+      case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: break;
+      default:
+        // coding error.
+        KALDI_ERR << "Key() called on TableReader object at the wrong time.";
+    }
+    return key_;
+  }
+  const T &Value() {
+    StateType orig_state = state_;
+    if (state_ == kHaveScpLine) LoadCurrent();  // Takes
+    // state_ to kLoadSucceeded or kLoadFailed.
+    if (state_ == kLoadFailed) {  // this can happen due to
+      // a file listed in an scp file not existing, or
+      // read failure, failure of a command, etc.
+      if (orig_state == kHaveScpLine)
+        KALDI_ERR << "TableReader: failed to load object from "
+                  << PrintableRxfilename(data_rxfilename_)
+                  << " (to suppress this error, add the permissive "
+                  << "(p, ) option to the rspecifier.";
+
+      else // orig_state_ was kLoadFailed, which only could have happened
+        // if the user called FreeCurrent().
+        KALDI_ERR << "TableReader: you called Value() after FreeCurrent().";
+    } else if (state_ != kLoadSucceeded) {
+      // This would be a coding error.
+      KALDI_ERR << "TableReader: Value() called at the wrong time.";
+    }
+    return holder_.Value();
+  }
+  void FreeCurrent() {
+    if (state_ == kLoadSucceeded) {
+      holder_.Clear();
+      state_ = kLoadFailed;
+    } else {
+      KALDI_WARN << "TableReader: FreeCurrent called at the wrong time.";
+    }
+  }
+  void Next() {
+    while (1) {
+      NextScpLine();
+      if (Done()) return;
+      if (opts_.permissive) {
+        // Permissive mode means, when reading scp files, we treat keys whose scp entry
+        // cannot be read as nonexistent.  This means trying to read.
+        if (LoadCurrent()) return;  // Success.
+        // else try the next scp line.
+      } else {
+        return;  // We go the next key; Value() will crash if we can't
+        // read the scp line.
+      }
+    }
+  }
+
+  virtual bool Close() {
+    // Close() will succeed if the stream was not in an error
+    // state.  To clean up, it also closes the Input objects if
+    // they're open.
+    if (script_input_.IsOpen())
+      script_input_.Close();
+    if (data_input_.IsOpen())
+      data_input_.Close();
+    if (state_ == kLoadSucceeded)
+      holder_.Clear();
+    if (!this->IsOpen())
+      KALDI_ERR << "Close() called on input that was not open.";
+    StateType old_state = state_;
+    state_ = kUninitialized;
+    if (old_state == kError) {
+      if (opts_.permissive) {
+        KALDI_WARN << "Close() called on scp file with read error, ignoring the "
+            "error because permissive mode specified.";
+        return true;
+      } else  return false;  // User will do something with the error status.
+    } else  return true;
+  }
+
+  virtual ~SequentialTableReaderScriptImpl() {
+    if (state_ == kError)
+      KALDI_ERR << "TableReader: reading script file failed: from scp "
+                << PrintableRxfilename(script_rxfilename_);
+    // If you don't want this exception to be thrown you can
+    // call Close() and check the status.
+    if (state_ == kLoadSucceeded)
+      holder_.Clear();
+  }
+ private:  
+  bool LoadCurrent() {
+    // Attempts to load object whose rxfilename is on the current scp line.
+    if (state_ != kHaveScpLine)
+      KALDI_ERR << "TableReader: LoadCurrent() called at the wrong time.";
+    bool ans;
+    // note, NULL means it doesn't read the binary-mode header
+    if (Holder::IsReadInBinary()) ans = data_input_.Open(data_rxfilename_, NULL);
+    else ans = data_input_.OpenTextMode(data_rxfilename_);
+    if (!ans) {
+      // May want to make this warning a VLOG at some point
+      KALDI_WARN << "TableReader: failed to open file "
+                 << PrintableRxfilename(data_rxfilename_);
+      state_ = kLoadFailed;
+      return false;
+    } else {
+      if (holder_.Read(data_input_.Stream())) {
+        state_ = kLoadSucceeded;
+        return true;
+      } else {  // holder_ will not contain data.
+        KALDI_WARN << "TableReader: failed to load object from "
+                   << PrintableRxfilename(data_rxfilename_);
+        state_ = kLoadFailed;
+        return false;
+      }
+    }
+  }
+
+  // Reads the next line in the script file.
+  void NextScpLine() {
+    switch (state_) {
+      case kLoadSucceeded: holder_.Clear(); break;
+      case kHaveScpLine: case kLoadFailed: case kFileStart: break;
+      default:
+        // No other states are valid to call Next() from.
+        KALDI_ERR << "Reading script file: Next called wrongly.";
+    }
+    std::string line;
+    if (getline(script_input_.Stream(), line)) {
+      SplitStringOnFirstSpace(line, &key_, &data_rxfilename_);
+      if (!key_.empty() && !data_rxfilename_.empty()) {
+        // Got a valid line.
+        state_ = kHaveScpLine;
+      } else {
+        // Got an invalid line.
+        state_ = kError;  // we can't make sense of this
+        // scp file and will now die.
+      }
+    } else {
+      state_ = kEof;  // nothing more in the scp file.
+      // Might as well close the input streams as don't need them.
+      script_input_.Close();
+      if (data_input_.IsOpen())
+        data_input_.Close();
+    }
+  }
+
+
+  Input script_input_;  // Input object for the .scp file
+  Input data_input_;   // Input object for the entries in
+  // the script file.
+  Holder holder_;  // Holds the object.
+  bool binary_;  // Binary-mode archive.
+  std::string key_;
+  std::string rspecifier_;
+  std::string script_rxfilename_;  // of the script file.
+  RspecifierOptions opts_;  // options.
+  std::string data_rxfilename_;  // of the file we're reading.
+  enum StateType {
+    //       [The state of the reading process]               [does holder_ [is script_inp_
+    //                                                         have object]   open]
+    kUninitialized,  // Uninitialized or closed.                    no         no
+    kEof,     // We did Next() and found eof in script file.       no         no
+    kError,   // Some other error                                  no         yes
+    kHaveScpLine,  // Just called Open() or Next() and have a       no         yes
+    // line of the script file but no data.
+    kLoadSucceeded,  // Called LoadCurrent() and it succeeded.     yes         yes
+    kLoadFailed,  // Called LoadCurrent() and it failed,           no         yes
+    // or the user called FreeCurrent().. note,
+    // if when called by user we are in this state,
+    // it means the user called FreeCurrent().
+    kFileStart,        // [state we only use internally]           no         yes
+  } state_;
+ private:
+};
+
+
+// This is the implementation for SequentialTableReader
+// when it's an archive.  Note that the archive format is:
+// key1 [space] object1 key2 [space]
+// object2 ... eof.
+// "object1" is the output of the Holder::Write function and will
+// typically contain a binary header (in binary mode) and then
+// the output of object.Write(os, binary).
+// The archive itself does not care whether it is in binary
+// or text mode, for reading purposes.
+
+template<class Holder>  class SequentialTableReaderArchiveImpl:
+      public SequentialTableReaderImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  SequentialTableReaderArchiveImpl(): state_(kUninitialized) { }
+
+  virtual bool Open(const std::string &rspecifier) {
+    if (state_ != kUninitialized) {
+      if (! Close()) {  // call Close() yourself to suppress this exception.
+        if (opts_.permissive)
+          KALDI_WARN << "TableReader::Open, error closing previous input "
+              "(only warning, since permissive mode).";
+        else
+          KALDI_ERR << "TableReader::Open, error closing previous input.";
+      }
+    }
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier,
+                                           &archive_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kArchiveRspecifier);
+
+    bool ans;
+    // NULL means don't expect binary-mode header
+    if (Holder::IsReadInBinary())
+      ans = input_.Open(archive_rxfilename_, NULL);
+    else
+      ans = input_.OpenTextMode(archive_rxfilename_);
+    if (!ans) {  // header.
+      KALDI_WARN << "TableReader: failed to open stream "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kUninitialized;  // Failure on Open
+      return false;  // User should print the error message.
+    }
+    state_ = kFileStart;
+    Next();
+    if (state_ == kError) {
+      KALDI_WARN << "Error beginning to read archive file (wrong filename?): "
+                 << PrintableRxfilename(archive_rxfilename_);
+      input_.Close();
+      state_ = kUninitialized;
+      return false;
+    }
+    KALDI_ASSERT(state_ == kHaveObject || state_ == kEof);
+    return true;
+  }
+
+  virtual void Next() {
+    switch (state_) {
+      case kHaveObject:
+        holder_.Clear(); break;
+      case kFileStart: case kFreedObject:
+        break;
+      default:
+        KALDI_ERR << "TableReader: Next() called wrongly.";
+    }
+    std::istream &is = input_.Stream();
+    is.clear();  // Clear any fail bits that may have been set... just in case
+    // this happened in the Read function.
+    is >> key_;  // This eats up any leading whitespace and gets the string.
+    if (is.eof()) {
+      state_ = kEof;
+      return;
+    }
+    if (is.fail()) {  // This shouldn't really happen, barring file-system errors.
+      KALDI_WARN << "Error reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+    int c;
+    if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') {  // We expect a space ' ' after the key.
+      // We also allow tab [which is consumed] and newline [which is not], just
+      // so we can read archives generated by scripts that may not be fully
+      // aware of how this format works.
+      KALDI_WARN << "Invalid archive file format: expected space after key "
+                 << key_ << ", got character "
+                 << CharToString(static_cast<char>(is.peek())) << ", reading "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+    if (c != '\n') is.get();  // Consume the space or tab.
+    if (holder_.Read(is)) {
+      state_ = kHaveObject;
+      return;
+    } else {
+      KALDI_WARN << "Object read failed, reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kEof: case kError: case kHaveObject: case kFreedObject: return true;
+      case kUninitialized: return false;
+      default: KALDI_ERR << "IsOpen() called on invalid object.";  // kFileStart is not valid
+        // state for user to call something on.
+        return false;
+    }
+  }
+
+  virtual bool Done() const {
+    switch (state_) {
+      case kHaveObject:
+        return false;
+      case kEof: case kError:
+        return true;  // Error-state counts as Done(), but destructor
+        // will fail (unless you check the status with Close()).
+      default:
+        KALDI_ERR << "Done() called on TableReader object at the wrong time.";
+        return false;
+    }
+  }
+
+  virtual std::string Key() {
+    // Valid to call this whenever Done() returns false
+    switch (state_) {
+      case kHaveObject: break;  // only valid case.
+      default:
+        // coding error.
+        KALDI_ERR << "Key() called on TableReader object at the wrong time.";
+    }
+    return key_;
+  }
+  const T &Value() {
+    switch (state_) {
+      case kHaveObject:
+        break;  // only valid case.
+      default:
+        // coding error.
+        KALDI_ERR << "Value() called on TableReader object at the wrong time.";
+    }
+    return holder_.Value();
+  }
+  virtual void FreeCurrent() {
+    if (state_ == kHaveObject) {
+      holder_.Clear();
+      state_ = kFreedObject;
+    } else
+      KALDI_WARN << "TableReader: FreeCurernt called at the wrong time.";
+  }
+
+  virtual bool Close() {
+    if (! this->IsOpen())
+      KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
+    if (input_.IsOpen())
+      input_.Close();
+    if (state_ == kHaveObject)
+      holder_.Clear();
+    bool ans;
+    if (opts_.permissive) {
+      ans = true;  // always return success.
+      if (state_ == kError)
+        KALDI_WARN << "Error detected closing TableReader for archive "
+                   << PrintableRxfilename(archive_rxfilename_) << " but ignoring "
+                   << "it as permissive mode specified.";
+    } else
+      ans = (state_ != kError);  // If error state, user should detect it.
+    state_ = kUninitialized;
+    return ans;
+  }
+
+  virtual ~SequentialTableReaderArchiveImpl() {
+    if (state_ == kError) {
+      if (opts_.permissive)
+        KALDI_WARN << "Error detected closing TableReader for archive "
+                   << PrintableRxfilename(archive_rxfilename_) << " but ignoring "
+                   << "it as permissive mode specified.";
+      else
+        KALDI_ERR << "TableReader: error detected closing archive "
+                  << PrintableRxfilename(archive_rxfilename_);
+    }
+    // If you don't want this exception to be thrown you can
+    // call Close() and check the status.
+    if (state_ == kHaveObject)
+      holder_.Clear();
+  }
+ private:
+  Input input_;  // Input object for the archive
+  Holder holder_;     // Holds the object.
+  std::string key_;
+  std::string rspecifier_;
+  std::string archive_rxfilename_;
+  RspecifierOptions opts_;
+  enum {  //  [The state of the reading process]               [does holder_ [is input_
+    //                                                         have object]   open]
+    kUninitialized,  // Uninitialized or closed.                    no         no
+    kFileStart,      // [state we use internally: just opened.]    no         yes
+    kEof,     // We did Next() and found eof in archive            no         no
+    kError,   // Some other error                                  no         no
+    kHaveObject,  // We read the key and the object after it.       yes        yes
+    kFreedObject,  // The user called FreeCurrent().                no         yes
+  } state_;
+};
+
+
+template<class Holder>
+SequentialTableReader<Holder>::SequentialTableReader(const std::string &rspecifier): impl_(NULL) {
+  if (rspecifier != "" && !Open(rspecifier))
+    KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier;
+}
+
+template<class Holder>
+bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) {
+  if (IsOpen())
+    if (!Close())
+      KALDI_ERR << "Could not close previously open object.";
+  // now impl_ will be NULL.
+
+  RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, NULL);
+  switch (wt) {
+    case kArchiveRspecifier:
+      impl_ = new SequentialTableReaderArchiveImpl<Holder>();
+      break;
+    case kScriptRspecifier:
+      impl_ = new SequentialTableReaderScriptImpl<Holder>();
+      break;
+    case kNoRspecifier: default:
+      KALDI_WARN << "Invalid rspecifier " << rspecifier;
+      return false;
+  }
+  if (!impl_->Open(rspecifier)) {
+    delete impl_;
+    impl_ = NULL;
+    return false;  // sub-object will have printed warnings.
+  }
+  else return true;
+}
+
+template<class Holder>
+bool SequentialTableReader<Holder>::Close() {
+  CheckImpl();  
+  bool ans = impl_->Close();
+  delete impl_;  // We don't keep around empty impl_ objects.
+  impl_ = NULL;
+  return ans;
+}
+
+
+template<class Holder>
+bool SequentialTableReader<Holder>::IsOpen() const {
+  return (impl_ != NULL);  // Because we delete the object whenever
+  // that object is not open.  Thus, the IsOpen functions of the
+  // Impl objects are not really needed.
+}
+
+template<class Holder>
+std::string SequentialTableReader<Holder>::Key() {
+  CheckImpl();
+  return impl_->Key();  // this call may throw if called wrongly in other ways,
+  // e.g. eof.
+}
+
+
+template<class Holder>
+void SequentialTableReader<Holder>::FreeCurrent() {
+  CheckImpl();
+  impl_->FreeCurrent();
+}
+
+
+template<class Holder>
+const typename SequentialTableReader<Holder>::T &
+SequentialTableReader<Holder>::Value() {
+  CheckImpl();
+  return impl_->Value();  // This may throw (if LoadCurrent() returned false you are safe.).
+}
+
+
+template<class Holder>
+void SequentialTableReader<Holder>::Next() {
+  CheckImpl();
+  impl_->Next();
+}
+
+template<class Holder>
+bool SequentialTableReader<Holder>::Done() {
+  CheckImpl();
+  return impl_->Done();
+}
+
+
+template<class Holder>
+SequentialTableReader<Holder>::~SequentialTableReader() {
+  if (impl_)  delete impl_;
+  // Destructor of impl_ may throw.
+}
+
+
+
+template<class Holder> class TableWriterImplBase {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &wspecifier) = 0;
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected until we call Close().
+  // It throws (via KALDI_ERR) if called wrongly.  We could
+  // have just thrown on all errors, since this is what
+  // TableWriter does; it was designed this way because originally
+  // TableWriter::Write returned an exit status.
+  virtual bool Write(const std::string &key, const T &value) = 0;
+
+  // Flush will flush any archive; it does not return error status,
+  //  any errors will be reported on the next Write or Close.
+  virtual void Flush() = 0;
+
+  virtual bool Close() = 0;
+
+  virtual bool IsOpen() const = 0;
+
+  // May throw on write error if Close was not called.
+  virtual ~TableWriterImplBase() { }
+
+  TableWriterImplBase() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase);
+};
+
+
+// The implementation of TableWriter we use when writing directly
+// to an archive with no associated scp.
+template<class Holder>
+class TableWriterArchiveImpl: public TableWriterImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &wspecifier) {
+    switch (state_) {
+      case kUninitialized:
+        break;
+      case kWriteError:
+        KALDI_ERR << "TableWriter: opening stream, already open with write error.";
+      case kOpen: default:
+        if (!Close())  // throw because this error may not have been previously
+          // detected by the user.
+          KALDI_ERR << "TableWriter: opening stream, error closing previously open stream.";
+    }
+    wspecifier_ = wspecifier;
+    WspecifierType ws = ClassifyWspecifier(wspecifier,
+                                           &archive_wxfilename_,
+                                           NULL,
+                                           &opts_);
+    KALDI_ASSERT(ws == kArchiveWspecifier);  // or wrongly called.
+
+    if (output_.Open(archive_wxfilename_, opts_.binary, false)) {  // false means no binary header.
+      state_ = kOpen;
+      return true;
+    } else {
+      // stream will not be open.  User will report this error
+      // (we return bool), so don't bother printing anything.
+      state_ = kUninitialized;
+      return false;
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kUninitialized: return false;
+      case kOpen: case kWriteError: return true;
+      default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
+    }
+    return false;
+  }
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual bool Write(const std::string &key, const T &value) {
+    switch (state_) {
+      case kOpen: break;
+      case kWriteError:
+        // user should have known from the last
+        // call to Write that there was a problem.
+        KALDI_WARN << "TableWriter: attempting to write to invalid stream.";
+        return false;
+      case kUninitialized: default:
+        KALDI_ERR << "TableWriter: Write called on invalid stream";
+
+    }
+    // state is now kOpen or kWriteError.
+    if (!IsToken(key)) // e.g. empty string or has spaces...
+      KALDI_ERR << "TableWriter: using invalid key " << key;
+    output_.Stream() << key << ' ';
+    if (!Holder::Write(output_.Stream(), opts_.binary, value)) {
+      KALDI_WARN << "TableWriter: write failure to "
+                 << PrintableWxfilename(archive_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+    if (state_ == kWriteError) return false;  // Even if this Write seems to have
+    // succeeded, we fail because a previous Write failed and the archive may be
+    // corrupted and unreadable.
+
+    if (opts_.flush)
+      Flush();
+    return true;
+  }
+
+  // Flush will flush any archive; it does not return error status,
+  //  any errors will be reported on the next Write or Close.
+  virtual void Flush() {
+    switch (state_) {
+      case kWriteError: case kOpen:
+        output_.Stream().flush();  // Don't check error status.
+        return;
+      default:
+        KALDI_WARN << "TableWriter: Flush called on not-open writer.";
+    }
+  }
+
+  virtual bool Close() {
+    if (!this->IsOpen() || !output_.IsOpen())
+      KALDI_ERR << "TableWriter: Close called on a stream that was not open." << this->IsOpen() << ", " << output_.IsOpen();
+    bool close_success = output_.Close();
+    if (!close_success) {
+      KALDI_WARN << "TableWriter: error closing stream: wspecifier is "
+                 << wspecifier_;
+      state_ = kUninitialized;
+      return false;
+    }
+    if (state_ == kWriteError) {
+      KALDI_WARN << "TableWriter: closing writer in error state: wspecifier is "
+                 << wspecifier_;
+      state_ = kUninitialized;
+      return false;
+    }
+    state_ = kUninitialized;
+    return true;
+  }
+
+  TableWriterArchiveImpl(): state_(kUninitialized) {}
+
+  // May throw on write error if Close was not called.
+  virtual ~TableWriterArchiveImpl() {
+    if (!IsOpen()) return;
+    else if (!Close())
+      KALDI_ERR << "At TableWriter destructor: Write failed or stream close "
+                << "failed: wspecifier is "<<  wspecifier_;
+  }
+
+ private:
+  Output output_;
+  WspecifierOptions opts_;
+  std::string wspecifier_;
+  std::string archive_wxfilename_;
+  enum {               // is stream open?
+    kUninitialized,    // no
+    kOpen,             // yes
+    kWriteError,       // yes
+  } state_;
+};
+
+
+
+
+// The implementation of TableWriter we use when writing to
+// individual files (more generally, wxfilenames) specified
+// in an scp file that we read.
+
+// Note: the code for this class is similar to RandomAccessTableReaderScriptImpl;
+// try to keep them in sync.
+
+template<class Holder>
+class TableWriterScriptImpl: public TableWriterImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {}
+
+  virtual bool Open(const std::string &wspecifier) {
+    switch (state_) {
+      case kReadScript:
+        KALDI_ERR << " Opening already open TableWriter: call Close first.";
+      case kUninitialized: case kNotReadScript:
+        break;
+    }
+    wspecifier_ = wspecifier;
+    WspecifierType ws = ClassifyWspecifier(wspecifier,
+                                           NULL,
+                                           &script_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(ws == kScriptWspecifier);  // or wrongly called.
+    KALDI_ASSERT(script_.empty());  // no way it could be nonempty at this point.
+
+    if (! ReadScriptFile(script_rxfilename_,
+                         true,  // print any warnings
+                         &script_)) {  // error reading script file or invalid format
+      state_ = kNotReadScript;
+      return false;  // no need to print further warnings.  user gets the error.
+    }
+    std::sort(script_.begin(), script_.end());
+    for (size_t i = 0; i+1 < script_.size(); i++) {
+      if (script_[i].first.compare(script_[i+1].first) >= 0) {
+        // script[i] not < script[i+1] in lexical order...
+        KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
+                   << " contains duplicate key " << script_[i].first;
+        state_ = kNotReadScript;
+        return false;
+      }
+    }
+    state_ = kReadScript;
+    return true;
+  }
+
+  virtual bool IsOpen() const {  return (state_ == kReadScript);  }
+
+  virtual bool Close() {
+    if (!IsOpen())
+      KALDI_ERR << "Close() called on TableWriter that was not open.";
+    state_ = kUninitialized;
+    last_found_ = 0;
+    script_.clear();
+    return true;
+  }
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual bool Write(const std::string &key, const T &value) {
+    if (!IsOpen())
+      KALDI_ERR << "TableWriter: Write called on invalid stream";
+
+    if (!IsToken(key)) // e.g. empty string or has spaces...
+      KALDI_ERR << "TableWriter: using invalid key " << key;
+
+    std::string wxfilename;
+    if (!LookupFilename(key, &wxfilename)) {
+      if (opts_.permissive) {
+        return true; // In permissive mode, it's as if we're writing to /dev/null
+                     // for missing keys.
+      } else {
+        KALDI_WARN << "TableWriter: script file "
+                   << PrintableRxfilename(script_rxfilename_)
+                   << " has no entry for key "<<key;
+        return false;
+      }
+    }
+    Output output;
+    if (!output.Open(wxfilename, opts_.binary, false)) {
+      // Open in the text/binary mode (on Windows) given by member var. "binary"
+      // (obtained from wspecifier), but do not put the binary-mode header (it
+      // will be written, if needed, by the Holder::Write function.)
+      KALDI_WARN << "TableWriter: failed to open stream: "
+                 << PrintableWxfilename(wxfilename);
+      return false;
+    }
+    if (!Holder::Write(output.Stream(), opts_.binary, value)
+        || !output.Close()) {
+      KALDI_WARN << "TableWriter: failed to write data to "
+                 << PrintableWxfilename(wxfilename);
+      return false;
+    }
+    return true;
+  }
+
+  // Flush does nothing in this implementation, there is nothing to flush.
+  virtual void Flush() { }
+
+
+  virtual ~TableWriterScriptImpl() {
+    // Nothing to do in destructor.
+  }
+
+ private:
+  // Note: this function is almost the same as in RandomAccessTableReaderScriptImpl.
+  bool LookupFilename(const std::string &key, std::string *wxfilename) {
+    // First, an optimization: if we're going consecutively, this will
+    // make the lookup very fast.
+    last_found_++;
+    if (last_found_ < script_.size() && script_[last_found_].first == key) {
+      *wxfilename = script_[last_found_].second;
+      return true;
+    }
+    std::pair<std::string, std::string> pr(key, "");  // Important that ""
+    // compares less than or equal to any string, so lower_bound points to the
+    // element that has the same key.
+    typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator 
+        IterType;
+    IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
+    if (iter != script_.end() && iter->first == key) {
+      last_found_ = iter - script_.begin();
+      *wxfilename = iter->second;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+
+  WspecifierOptions opts_;
+  std::string wspecifier_;
+  std::string script_rxfilename_;
+
+  // the script_ variable contains pairs of (key, filename), sorted using
+  // std::sort.  This can be used with binary_search to look up filenames for
+  // writing.  If this becomes inefficient we can use std::unordered_map (but I
+  // suspect this wouldn't be significantly faster & would use more memory).
+  // If memory becomes a problem here, the user should probably be passing
+  // only the relevant part of the scp file rather than expecting us to get too
+  // clever in the code.
+  std::vector<std::pair<std::string, std::string> > script_;
+  size_t last_found_;  // This is for an optimization used in LookupFilename.
+
+  enum {
+    kUninitialized,
+    kReadScript,
+    kNotReadScript,  // read of script failed.
+  } state_;
+};
+
+
+// The implementation of TableWriter we use when writing directly
+// to an archive plus an associated scp.
+template<class Holder>
+class TableWriterBothImpl: public TableWriterImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &wspecifier) {
+    switch (state_) {
+      case kUninitialized:
+        break;
+      case kWriteError:
+        KALDI_ERR << "TableWriter: opening stream, already open with write error.";
+      case kOpen: default:
+        if (!Close())  // throw because this error may not have been previously detected by user.
+          KALDI_ERR << "TableWriter: opening stream, error closing previously open stream.";
+    }
+    wspecifier_ = wspecifier;
+    WspecifierType ws = ClassifyWspecifier(wspecifier,
+                                           &archive_wxfilename_,
+                                           &script_wxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(ws == kBothWspecifier);  // or wrongly called.
+    if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput)
+      KALDI_WARN << "When writing to both archive and script, the script file "
+          "will generally not be interpreted correctly unless the archive is "
+          "an actual file: wspecifier = " << wspecifier;
+
+    if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) {  // false means no binary header.
+      state_ = kUninitialized;
+      return false;
+    }
+    if (!script_output_.Open(script_wxfilename_, false, false)) {  // first false means text mode:
+      // script files always text-mode.   second false means don't write header (doesn't matter
+      // for text mode).
+      archive_output_.Close();  // Don't care about status: error anyway.
+      state_ = kUninitialized;
+      return false;
+    }
+    state_ = kOpen;
+    return true;
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kUninitialized: return false;
+      case kOpen: case kWriteError: return true;
+      default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
+    }
+    return false;
+  }
+
+  void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const {
+    std::ostringstream ss;
+    ss << ':' << streampos;
+    KALDI_ASSERT(ss.str() != ":-1");
+    *output = archive_wxfilename_ + ss.str();
+    
+    // e.g. /some/file:12302.
+    // Note that we warned if archive_wxfilename_ is not an actual filename;
+    // the philosophy is we give the user rope and if they want to hang
+    // themselves, with it, fine.
+  }
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual bool Write(const std::string &key, const T &value) {
+    switch (state_) {
+      case kOpen: break;
+      case kWriteError:
+        // user should have known from the last
+        // call to Write that there was a problem.  Warn about it.
+        KALDI_WARN << "TableWriter: writing to non-open TableWriter object.";
+        return false;
+      case kUninitialized: default:
+        KALDI_ERR << "TableWriter: Write called on invalid stream";
+    }
+    // state is now kOpen or kWriteError.
+    if (!IsToken(key)) // e.g. empty string or has spaces...
+      KALDI_ERR << "TableWriter: using invalid key " << key;
+    std::ostream &archive_os = archive_output_.Stream();
+    archive_os << key << ' ';
+    typename std::ostream::pos_type archive_os_pos = archive_os.tellp();
+    // position at start of Write() to archive.  We will record this in the script file.
+    std::string offset_rxfilename;  // rxfilename with offset into the archive,
+    // e.g. some_archive_name.ark:431541423
+    MakeFilename(archive_os_pos, &offset_rxfilename);
+
+    // Write to the script file first.
+    // The idea is that we want to get all the information possible into the
+    // script file, to make it easier to unwind errors later.
+    std::ostream &script_os = script_output_.Stream();
+    script_output_.Stream() << key << ' ' << offset_rxfilename << '\n';
+
+    if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) {
+      KALDI_WARN << "TableWriter: write failure to"
+                 << PrintableWxfilename(archive_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+
+    if (script_os.fail()) {
+      KALDI_WARN << "TableWriter: write failure to script file detected: "
+                 << PrintableWxfilename(script_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+
+    if (archive_os.fail()) {
+      KALDI_WARN << "TableWriter: write failure to archive file detected: "
+                 << PrintableWxfilename(archive_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+
+    if (state_ == kWriteError) return false;  // Even if this Write seems to have
+    // succeeded, we fail because a previous Write failed and the archive may be
+    // corrupted and unreadable.
+
+    if (opts_.flush)
+      Flush();
+    return true;
+  }
+
+  // Flush will flush any archive; it does not return error status,
+  //  any errors will be reported on the next Write or Close.
+  virtual void Flush() {
+    switch (state_) {
+      case kWriteError: case kOpen:
+        archive_output_.Stream().flush();  // Don't check error status.
+        script_output_.Stream().flush();  // Don't check error status.
+        return;
+      default:
+        KALDI_WARN << "TableWriter: Flush called on not-open writer.";
+    }
+  }
+
+  virtual bool Close() {
+    if (!this->IsOpen())
+      KALDI_ERR << "TableWriter: Close called on a stream that was not open.";
+    bool close_success = true;
+    if (archive_output_.IsOpen())
+      if (!archive_output_.Close()) close_success = false;
+    if (script_output_.IsOpen())
+      if (!script_output_.Close()) close_success = false;
+    bool ans = close_success && (state_ != kWriteError);
+    state_ = kUninitialized;
+    return ans;
+  }
+
+  TableWriterBothImpl(): state_(kUninitialized) {}
+
+  // May throw on write error if Close() was not called.
+  // User can get the error status by calling Close().
+  virtual ~TableWriterBothImpl() {
+    if (!IsOpen()) return;
+    else if (!Close())
+      KALDI_ERR << "At TableWriter destructor: Write failed or stream close failed: "
+                << wspecifier_;
+  }
+
+ private:
+  Output archive_output_;
+  Output script_output_;
+  WspecifierOptions opts_;
+  std::string archive_wxfilename_;
+  std::string script_wxfilename_;
+  std::string wspecifier_;
+  enum {               // is stream open?
+    kUninitialized,    // no
+    kOpen,             // yes
+    kWriteError,       // yes
+  } state_;
+};
+
+
+template<class Holder>
+TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) {
+  if (wspecifier != "" && !Open(wspecifier)) {
+    KALDI_ERR << "TableWriter: failed to write to "
+              << wspecifier;
+  }
+}
+
+template<class Holder>
+bool TableWriter<Holder>::IsOpen() const {
+  return (impl_ != NULL);
+}
+
+
+template<class Holder>
+bool TableWriter<Holder>::Open(const std::string &wspecifier) {
+
+  if (IsOpen()) {
+    if (!Close()) // call Close() yourself to suppress this exception.
+      KALDI_ERR << "TableWriter::Open, failed to close previously open writer.";
+  }
+  KALDI_ASSERT(impl_ == NULL);
+  WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL);
+  switch (wtype) {
+    case kBothWspecifier:
+      impl_ = new TableWriterBothImpl<Holder>();
+      break;
+    case kArchiveWspecifier:
+      impl_ = new TableWriterArchiveImpl<Holder>();
+      break;
+    case kScriptWspecifier:
+      impl_ = new TableWriterScriptImpl<Holder>();
+      break;
+    case kNoWspecifier: default:
+      KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier;
+      return false;
+  }
+  if (impl_->Open(wspecifier)) return true;
+  else {  // The class will have printed a more specific warning.
+    delete impl_;
+    impl_ = NULL;
+    return false;
+  }
+}
+
+template<class Holder>
+void TableWriter<Holder>::Write(const std::string &key,
+                                const T &value) const {
+  CheckImpl();
+  if (!impl_->Write(key, value))
+    KALDI_ERR << "Error in TableWriter::Write";
+  // More specific warning will have
+  // been printed in the Write function.
+}
+
+template<class Holder>
+void TableWriter<Holder>::Flush() {
+  CheckImpl();
+  impl_->Flush();
+}
+
+template<class Holder>
+bool TableWriter<Holder>::Close() {
+  CheckImpl();
+  bool ans = impl_->Close();
+  delete impl_;  // We don't keep around non-open impl_ objects [c.f. definition of IsOpen()]
+  impl_ = NULL;
+  return ans;
+}
+
+template<class Holder>
+TableWriter<Holder>::~TableWriter() {
+  if (IsOpen() && !Close()) {
+    KALDI_ERR << "Error closing TableWriter [in destructor].";
+  }
+}
+
+
+// Types of RandomAccessTableReader:
+// In principle, we would like to have four types of RandomAccessTableReader:
+//  the 4 combinations  [scp, archive], [seekable, not-seekable],
+// where if something is seekable we only store a file offset.  However,
+// it seems sufficient for now to only implement two of these, in both
+// cases assuming it's not seekable so we never store file offsets and always
+// store either the scp line or the data in the archive.  The reasons are:
+// (1)
+// For scp files, storing the actual entry is not that much more expensive
+// than storing the file offsets (since the entries are just filenames), and
+// avoids a lot of fseek operations that might be expensive.
+// (2)
+// For archive files, there is no real reason, if you have the archive file
+// on disk somewhere, why you wouldn't access it via its associated scp.
+// [i.e. write it as ark, scp].  The main reason to read archives directly
+// is if they are part of a pipe, and in this case it's not seekable, so
+// we implement only this case.
+//
+// Note that we will rarely in practice have to keep in memory everything in
+// the archive, as long as things are only read once from the archive (the
+// "o, " or "once" option) and as long as we keep our keys in sorted order; to take
+// advantage of this we need the "s, " (sorted) option, so we would read archives
+// as e.g. "s, o, ark:-" (this is the rspecifier we would use if it was the
+// standard input and these conditions held).
+
+template<class Holder> class RandomAccessTableReaderImplBase {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &rspecifier) = 0;
+
+  virtual bool HasKey(const std::string &key) = 0;
+
+  virtual const T &Value(const std::string &key) = 0;
+
+  virtual bool Close() = 0;
+
+  virtual ~RandomAccessTableReaderImplBase() {}
+};
+
+
+// Implementation of RandomAccessTableReader for a script file; for simplicity we
+// just read it in all in one go, as it's unlikely someone would generate this
+// from a pipe.  In principle we could read it on-demand as for the archives, but
+// this would probably be overkill.
+
+// Note: the code for this this class is similar to TableWriterScriptImpl:
+// try to keep them in sync.
+template<class Holder>
+class RandomAccessTableReaderScriptImpl:
+      public RandomAccessTableReaderImplBase<Holder> {
+
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {}
+
+  virtual bool Open(const std::string &rspecifier) {
+    switch (state_) {
+      case kNotHaveObject: case kHaveObject: case kGaveObject:
+        KALDI_ERR << " Opening already open RandomAccessTableReader: call Close first.";
+      case kUninitialized: case kNotReadScript:
+        break;
+    }
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier,
+                                           &script_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kScriptRspecifier);  // or wrongly called.
+    KALDI_ASSERT(script_.empty());  // no way it could be nonempty at this point.
+
+    if (! ReadScriptFile(script_rxfilename_,
+                        true,  // print any warnings
+                        &script_)) {  // error reading script file or invalid format
+      state_ = kNotReadScript;
+      return false;  // no need to print further warnings.  user gets the error.
+    }
+
+    rspecifier_ = rspecifier;
+    // If opts_.sorted, the user has asserted that the keys are already sorted.
+    // Although we could easily sort them, we want to let the user know of this
+    // mistake.  This same mistake could have serious effects if used with an
+    // archive rather than a script.
+    if (!opts_.sorted)
+      std::sort(script_.begin(), script_.end());
+    for (size_t i = 0; i+1 < script_.size(); i++) {
+      if (script_[i].first.compare(script_[i+1].first) >= 0) {
+        // script[i] not < script[i+1] in lexical order...
+        bool same = (script_[i].first == script_[i+1].first);
+        KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
+                   << (same ? " contains duplicate key: " :
+                       " is not sorted (remove s, option or add ns, option): key is ")
+                   << script_[i].first;
+        state_ = kNotReadScript;
+        return false;
+      }
+    }
+    state_ = kNotHaveObject;
+    return true;
+  }
+
+  virtual bool IsOpen() const {
+    return  (state_ == kNotHaveObject || state_ == kHaveObject ||
+             state_ == kGaveObject);
+  }
+
+  virtual bool Close() {
+    if (!IsOpen())
+      KALDI_ERR << "Close() called on RandomAccessTableReader that was not open.";
+    holder_.Clear();
+    state_ = kUninitialized;
+    last_found_ = 0;
+    script_.clear();
+    current_key_ = "";
+    // This one cannot fail because any errors of a "global"
+    // nature would have been detected when we did Open().
+    // With archives it's different.
+    return true;
+  }
+
+  virtual bool HasKey(const std::string &key) {
+    bool preload = opts_.permissive;
+    // In permissive mode, we have to check that we can read
+    // the scp entry before we assert that the key is there.
+    return HasKeyInternal(key, preload);
+  }
+
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual const T&  Value(const std::string &key) {
+
+    if (!IsOpen())
+      KALDI_ERR << "Value() called on non-open object.";
+
+    if (!((state_ == kHaveObject || state_ == kGaveObject)
+          && key == current_key_)) {  // Not already stored...
+      bool has_key = HasKeyInternal(key, true);  // preload.
+      if (!has_key)
+        KALDI_ERR << "Could not get item for key " << key
+                  << ", rspecifier is " << rspecifier_ << "[to ignore this, "
+                  << "add the p, (permissive) option to the rspecifier.";
+      KALDI_ASSERT(state_ == kHaveObject && key == current_key_);
+    }
+
+    if (state_ == kHaveObject) {
+      state_ = kGaveObject;
+      if (opts_.once) MakeTombstone(key);  // make sure that future lookups fail.
+      return holder_.Value();
+    } else {  // state_ == kGaveObject
+      if (opts_.once)
+        KALDI_ERR << "Value called twice for the same key and ,o (once) option "
+                  << "is used: rspecifier is " << rspecifier_;
+      return holder_.Value();
+    }
+  }
+
+  virtual ~RandomAccessTableReaderScriptImpl() {
+    if (state_ == kHaveObject || state_ == kGaveObject)
+      holder_.Clear();
+  }
+
+ private:
+  // HasKeyInternal when called with preload == false just tells us whether the
+  // key is in the scp.  With preload == true, which happens when the ,p
+  // (permissive) option is given in the rspecifier, it will also check that we
+  // can preload the object from disk (loading from the rxfilename in the scp),
+  // and only return true if we can.  This function is called both from HasKey
+  // and from Value().
+  virtual bool HasKeyInternal(const std::string &key, bool preload) {
+    switch (state_) {
+      case kUninitialized: case kNotReadScript:
+        KALDI_ERR << "HasKey called on RandomAccessTableReader object that is not open.";
+      case kHaveObject: case kGaveObject:
+        if (key == current_key_)
+          return true;
+        break;
+      default: break;
+    }
+    KALDI_ASSERT(IsToken(key));
+    size_t key_pos = 0; // set to zero to suppress warning
+    bool ans = LookupKey(key, &key_pos);
+    if (!ans) return false;
+    else {
+      // First do a check regarding the "once" option.
+      if (opts_.once && script_[key_pos].second == "") {  // A "tombstone"; user is asking about
+        // already-read key.
+        KALDI_ERR << "HasKey called on key whose value was already read, and "
+            " you specified the \"once\" option (o, ): try removing o, or adding no, :"
+            " rspecifier is " << rspecifier_;
+      }
+      if (!preload)
+        return true;  // we have the key.
+      else {  // preload specified, so we have to pre-load the object before returning true.
+        if (!input_.Open(script_[key_pos].second)) {
+          KALDI_WARN << "Error opening stream "
+                     << PrintableRxfilename(script_[key_pos].second);
+          return false;
+        } else {
+          // Make sure holder empty.
+          if (state_ == kHaveObject || state_ == kGaveObject)
+            holder_.Clear();
+          if (holder_.Read(input_.Stream())) {
+            state_ = kHaveObject;
+            current_key_ = key;
+            return true;
+          } else {
+            KALDI_WARN << "Error reading object from "
+                "stream " << PrintableRxfilename(script_[key_pos].second);
+            state_ = kNotHaveObject;
+            return false;
+          }
+        }
+      }
+    }
+  }
+  void MakeTombstone(const std::string &key) {
+    size_t offset;
+    if (!LookupKey(key, &offset))
+      KALDI_ERR << "RandomAccessTableReader object in inconsistent state.";
+    else
+      script_[offset].second = "";
+  }
+  bool LookupKey(const std::string &key, size_t *script_offset) {
+    // First, an optimization: if we're going consecutively, this will
+    // make the lookup very fast.  Since we may call HasKey and then
+    // Value(), which both may look up the key, we test if either the
+    // current or next position are correct.
+    if (last_found_ < script_.size() && script_[last_found_].first == key) {
+      *script_offset = last_found_;
+      return true;
+    }
+    last_found_++;
+    if (last_found_ < script_.size() && script_[last_found_].first == key) {
+      *script_offset = last_found_;
+      return true;
+    }
+    std::pair<std::string, std::string> pr(key, "");  // Important that ""
+    // compares less than or equal to any string, so lower_bound points to the
+    // element that has the same key.
+    typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator 
+        IterType;
+    IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
+    if (iter != script_.end() && iter->first == key) {
+      last_found_ = *script_offset = iter - script_.begin();
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+
+  Input input_;  // Use the same input_ object for reading each file, in case
+  // the scp specifies offsets in an archive (so we can keep the same file open).
+  RspecifierOptions opts_;
+  std::string rspecifier_;  // rspecifier used to open it; used in debug messages
+  std::string script_rxfilename_;  // filename of script.
+
+  std::string current_key_;  // Key of object in holder_
+  Holder holder_;
+
+  // the script_ variable contains pairs of (key, filename), sorted using
+  // std::sort.  This can be used with binary_search to look up filenames for
+  // writing.  If this becomes inefficient we can use std::unordered_map (but I
+  // suspect this wouldn't be significantly faster & would use more memory).
+  // If memory becomes a problem here, the user should probably be passing
+  // only the relevant part of the scp file rather than expecting us to get too
+  // clever in the code.
+  std::vector<std::pair<std::string, std::string> > script_;
+  size_t last_found_;  // This is for an optimization used in FindFilename.
+
+  enum {  //           [Do we have          [Does holder_
+    //                script_ set up?]      contain object?]
+    kUninitialized,  //     no                     no
+    kNotReadScript,  //     no                     no
+    kNotHaveObject,  //     yes                    no
+    kHaveObject,   //     yes                    yes
+    kGaveObject,   //     yes                    yes
+    // [kGaveObject is as kHaveObject but we note that the
+    //  user has already read it; this is for checking that
+    // if "once" is specified, the user actually only reads
+    // it once.
+  } state_;
+
+};
+
+
+
+
+// This is the base-class (with some implemented functions) for the
+// implementations of RandomAccessTableReader when it's an archive.  This
+// base-class handles opening the files, storing the state of the reading
+// process, and loading objects.  This is the only case in which we have
+// an intermediate class in the hierarchy between the virtual ImplBase
+// class and the actual Impl classes.
+// The child classes vary in the assumptions regarding sorting, etc.
+
+template<class Holder>  class RandomAccessTableReaderArchiveImplBase:
+      public RandomAccessTableReaderImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderArchiveImplBase(): holder_(NULL), state_(kUninitialized) { }
+
+  virtual bool Open(const std::string &rspecifier) {
+    if (state_ != kUninitialized) {
+      if (! this->Close()) // call Close() yourself to suppress this exception.
+        KALDI_ERR << "TableReader::Open, error closing previous input.";
+    }
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kArchiveRspecifier);
+
+    // NULL means don't expect binary-mode header
+    bool ans;
+    if (Holder::IsReadInBinary())
+      ans = input_.Open(archive_rxfilename_, NULL);
+    else
+      ans = input_.OpenTextMode(archive_rxfilename_);
+    if (!ans) {  // header.
+      KALDI_WARN << "TableReader: failed to open stream "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kUninitialized;  // Failure on Open
+      return false;  // User should print the error message.
+    } else {
+      state_ = kNoObject;
+    }
+    return true;
+  }
+
+  // ReadNextObject() requires that the state be kNoObject,
+  // and it will try read the next object.  If it succeeds,
+  // it sets the state to kHaveObject, and
+  // cur_key_ and holder_ have the key and value.  If it fails,
+  // it sets the state to kError or kEof.
+  void ReadNextObject() {
+    if (state_ != kNoObject)
+      KALDI_ERR << "TableReader: ReadNextObject() called from wrong state.";  // Code error
+    // somewhere in this class or a child class.
+    std::istream &is = input_.Stream();
+    is.clear();  // Clear any fail bits that may have been set... just in case
+    // this happened in the Read function.
+    is >> cur_key_;  // This eats up any leading whitespace and gets the string.
+    if (is.eof()) {
+      state_ = kEof;
+      return;
+    }
+    if (is.fail()) {  // This shouldn't really happen, barring file-system errors.
+      KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_;
+      state_ = kError;
+      return;
+    }
+    int c;
+    if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') {  // We expect a space ' ' after the key.
+      // We also allow tab, just so we can read archives generated by scripts that may
+      // not be fully aware of how this format works.
+      KALDI_WARN << "Invalid archive file format: expected space after key " <<cur_key_
+                 <<", got character "
+                 << CharToString(static_cast<char>(is.peek())) << ", reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+    if (c != '\n') is.get();  // Consume the space or tab.
+    holder_ = new Holder;
+    if (holder_->Read(is)) {
+      state_ = kHaveObject;
+      return;
+    } else {
+      KALDI_WARN << "Object read failed, reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      delete holder_;
+      holder_ = NULL;
+      return;
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kEof: case kError: case kHaveObject: case kNoObject: return true;
+      case kUninitialized: return false;
+      default: KALDI_ERR << "IsOpen() called on invalid object.";
+        return false;
+    }
+  }
+
+  // Called by the child-class virutal Close() functions; does the
+  // shared parts of the cleanup.
+  bool CloseInternal() {
+    if (! this->IsOpen())
+      KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
+    if (input_.IsOpen())
+      input_.Close();
+    if (state_ == kHaveObject) {
+      KALDI_ASSERT(holder_ != NULL);
+      delete holder_;
+      holder_ = NULL;
+    } else KALDI_ASSERT(holder_ == NULL);
+    bool ans = (state_ != kError);
+    state_ = kUninitialized;
+    if (!ans && opts_.permissive) {
+      KALDI_WARN << "Error state detected closing reader.  "
+                 << "Ignoring it because you specified permissive mode.";
+      return true;
+    }
+    return ans;
+  }
+
+  ~RandomAccessTableReaderArchiveImplBase() {
+    // The child class has the responsibility to call CloseInternal().
+    KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL);
+  }
+ private:
+  Input input_;       // Input object for the archive
+ protected:
+  // The variables below are accessed by child classes.
+
+  std::string cur_key_;   // current key (if state == kHaveObject).
+  Holder *holder_;     // Holds the object we just read (if state == kHaveObject)
+
+  std::string rspecifier_;
+  std::string archive_rxfilename_;
+  RspecifierOptions opts_;
+
+  enum {  //  [The state of the reading process]               [does holder_ [is input_
+    //                                                         have object]   open]
+    kUninitialized,  // Uninitialized or closed                     no         no
+    kNoObject,      // Do not have object in holder_               no         yes
+    kHaveObject,    // Have object in holder_                      yes        yes
+    kEof,           // End of file                                 no         yes
+    kError,         // Some kind of error-state in the reading.    no         yes
+  } state_;
+
+};
+
+
+// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is the
+// implementation for random-access reading of archives when both the archive,
+// and the calling code, are in sorted order (i.e. we ask for the keys in sorted
+// order).  This is when the s and cs options are both given.  It only ever has
+// to keep one object in memory.  It inherits from
+// RandomAccessTableReaderArchiveImplBase which implements the common parts of
+// RandomAccessTableReader that are used when it's an archive we're reading from.
+
+template<class Holder>  class RandomAccessTableReaderDSortedArchiveImpl:
+      public RandomAccessTableReaderArchiveImplBase<Holder> {
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderDSortedArchiveImpl() { }
+
+  virtual bool Close() {
+    // We don't have anything additional to clean up, so just
+    // call generic base-class one.
+    return this->CloseInternal();
+  }
+
+  virtual bool HasKey(const std::string &key) {
+    return FindKeyInternal(key);
+  }
+  virtual const T & Value(const std::string &key) {
+    if (FindKeyInternal(key)) {
+      KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_
+                   && holder_ != NULL);
+      return this->holder_->Value();
+    } else {
+      KALDI_ERR << "Value() called but no such key " << key
+                << " in archive " << PrintableRxfilename(archive_rxfilename_);
+      return *(const T*)NULL;  // keep compiler happy.
+    }
+  }
+
+  virtual ~RandomAccessTableReaderDSortedArchiveImpl() {
+    if (this->IsOpen())
+      if (!Close()) // more specific warning will already have been printed.
+        // we are in some kind of error state & user did not find out by
+        // calling Close().
+        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
+                  << rspecifier_;
+  }
+ private:
+  // FindKeyInternal tries to find the key by calling "ReadNextObject()"
+  // as many times as necessary till we get to it.  It is called from
+  // both FindKey and Value().
+  bool FindKeyInternal(const std::string &key) {
+    // First check that the user is calling us right: should be
+    // in sorted order.  If not, error.
+    if (!last_requested_key_.empty()) {
+      if (key.compare(last_requested_key_) < 0) {  // key < last_requested_key_
+        KALDI_ERR << "You provided the \"cs\" option "
+                  << "but are not calling with keys in sorted order: "
+                  << key << " < " << last_requested_key_ << ": rspecifier is "
+                  << rspecifier_;
+      }
+    }
+    // last_requested_key_ is just for debugging of order of calling.
+    last_requested_key_ = key;
+
+    if (state_ == kNoObject)
+      ReadNextObject();  // This can only happen
+      // once, the first time someone calls HasKey() or Value().  We don't
+      // do it in the initializer to stop the program hanging too soon,
+      // if reading from a pipe.
+
+    if (state_ == kEof || state_ == kError) return false;
+
+    if (state_ == kUninitialized)
+      KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open.";
+
+    std::string last_key_;  // To check that
+    // the archive we're reading is in sorted order.
+    while (1) {
+      KALDI_ASSERT(state_ == kHaveObject);
+      int compare = key.compare(cur_key_);
+      if (compare == 0) {  // key == key_
+        return true;  // we got it..
+      } else if (compare < 0) {  // key < cur_key_, so we already read past the
+        // place where we want to be.  This implies that we will never find it
+        // [due to the sorting etc., this means it just isn't in the archive].
+        return false;
+      } else {  // compare > 0, key > cur_key_.  We need to read further ahead.
+        last_key_ = cur_key_;
+        // read next object.. we have to set state to kNoObject first.
+        KALDI_ASSERT(holder_ != NULL);
+        delete holder_;
+        holder_ = NULL;
+        state_ = kNoObject;
+        ReadNextObject();
+        if (state_ != kHaveObject)
+          return false;  // eof or read error.
+        if (cur_key_.compare(last_key_) <= 0) {
+          KALDI_ERR << "You provided the \"s\" option "
+                    << " (sorted order), but keys are out of order or duplicated: "
+                    << last_key_ << " is followed by " << cur_key_
+                    << ": rspecifier is " << rspecifier_;
+        }
+      }
+    }
+  }
+
+  /// Last string provided to HasKey() or Value();
+  std::string last_requested_key_;
+
+
+};
+
+// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of
+// archives when the user specified the sorted (s) option but not the
+// called-sorted (cs) options.
+template<class Holder>  class RandomAccessTableReaderSortedArchiveImpl:
+      public RandomAccessTableReaderArchiveImplBase<Holder> {
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
+
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderSortedArchiveImpl():
+      last_found_index_(static_cast<size_t>(-1)),
+      pending_delete_(static_cast<size_t>(-1)) { }
+
+  virtual bool Close() {
+    for (size_t i = 0; i < seen_pairs_.size(); i++)
+      if (seen_pairs_[i].second)
+        delete seen_pairs_[i].second;
+    seen_pairs_.clear();
+
+    pending_delete_ = static_cast<size_t>(-1);
+    last_found_index_ = static_cast<size_t>(-1);
+
+    return this->CloseInternal();
+  }
+  virtual bool HasKey(const std::string &key) {
+    HandlePendingDelete();
+    size_t index;
+    bool ans = FindKeyInternal(key, &index);
+    if (ans && opts_.once && seen_pairs_[index].second == NULL) {
+      // Just do a check RE the once option. "&&opts_.once" is for
+      // efficiency since this can only happen in that case.
+      KALDI_ERR << "Error: HasKey called after Value() already called for "
+                << " that key, and once (o) option specified: rspecifier is "
+                << rspecifier_;
+    }
+    return ans;
+  }
+  virtual const T & Value(const std::string &key) {
+    HandlePendingDelete();
+    size_t index;
+    if (FindKeyInternal(key, &index)) {
+      if (seen_pairs_[index].second == NULL) {  // can happen if opts.once_
+        KALDI_ERR << "Error: Value() called more than once for key "
+                  << key << " and once (o) option specified: rspecifier is "
+                  << rspecifier_;
+      }
+      if (opts_.once)
+        pending_delete_ = index;  // mark this index to be deleted on next call.
+      return seen_pairs_[index].second->Value();
+    } else {
+      KALDI_ERR << "Value() called but no such key " << key
+                << " in archive " << PrintableRxfilename(archive_rxfilename_);
+      return *(const T*)NULL;  // keep compiler happy.
+    }
+  }
+  virtual ~RandomAccessTableReaderSortedArchiveImpl() {
+    if (this->IsOpen())
+      if (!Close()) // more specific warning will already have been printed.
+        // we are in some kind of error state & user did not find out by
+        // calling Close().
+        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
+                  << rspecifier_;
+  }
+ private:
+  void HandlePendingDelete() {
+    const size_t npos = static_cast<size_t>(-1);
+    if (pending_delete_ != npos) {
+      KALDI_ASSERT(pending_delete_ < seen_pairs_.size());
+      KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL);
+      delete seen_pairs_[pending_delete_].second;
+      seen_pairs_[pending_delete_].second = NULL;
+      pending_delete_ = npos;
+    }
+  }
+
+  // FindKeyInternal tries to find the key in the array "seen_pairs_".
+  // If it is not already there, it reads ahead as far as necessary
+  // to determine whether we have the key or not.  On success it returns
+  // true and puts the index into the array seen_pairs_, into "index";
+  // on failure it returns false.
+  // It will leave the state as either kNoObject, kEof or kError.
+  // FindKeyInternal does not do any checking about whether you are asking
+  // about a key that has been already given (with the "once" option).
+  // That is the user's responsibility.
+
+  bool FindKeyInternal(const std::string &key, size_t *index) {
+    // First, an optimization in case the previous call was for the
+    // same key, and we found it.
+    if (last_found_index_ < seen_pairs_.size()
+       && seen_pairs_[last_found_index_].first == key) {
+      *index = last_found_index_;
+      return true;
+    }
+
+    if (state_ == kUninitialized)
+      KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open.";
+    
+    // Step one is to see whether we have to read ahead for the object..
+    // Note, the possible states right now are kNoObject, kEof or kError.
+    // We are never in the state kHaveObject except just after calling
+    // ReadNextObject().
+    bool looped = false;
+    while (state_ == kNoObject &&
+          (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) {
+      looped = true;
+      // Read this as:
+      //  while ( the stream is potentially good for reading &&
+      //        ([got no keys] || key > most_recent_key) ) { ...
+      //     Try to read a new object.
+      // Note that the keys in seen_pairs_ are ordered from least to greatest.
+      ReadNextObject();
+      if (state_ == kHaveObject) {  // Successfully read object.
+        if (!seen_pairs_.empty() && // This is just a check.
+           cur_key_.compare(seen_pairs_.back().first) <= 0) {
+          // read the expression above as: !( cur_key_ > previous_key).
+          // it means we are not in sorted order [the user specified that we
+          // are, or we would not be using this implementation].
+          KALDI_ERR << "You provided the sorted (s) option but keys in archive "
+                    << PrintableRxfilename(archive_rxfilename_) << " are not "
+                    << "in sorted order: " << seen_pairs_.back().first
+                    << " is followed by " << cur_key_;
+        }
+        KALDI_ASSERT(holder_ != NULL);
+        seen_pairs_.push_back(std::make_pair(cur_key_, holder_));
+        holder_ = NULL;
+        state_ = kNoObject;
+      }
+    }
+    if (looped) {  // We only need to check the last element of the seen_pairs_ array,
+      // since we would not have read more after getting "key".
+      if (!seen_pairs_.empty() && seen_pairs_.back().first == key) {
+        last_found_index_ = *index = seen_pairs_.size() - 1;
+        return true;
+      } else return false;
+    }
+    // Now we have do an actual binary search in the seen_pairs_ array.
+    std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL));
+    typename std::vector<std::pair<std::string, Holder*> >::iterator
+        iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(),
+                                pr, PairCompare());
+    if (iter != seen_pairs_.end() &&
+       key == iter->first) {
+      last_found_index_ = *index = (iter - seen_pairs_.begin());
+      return true;
+    } else return false;
+  }
+
+  // These are the pairs of (key, object) we have read.  We keep all the keys we
+  // have read but the actual objects (if they are stored with pointers inside
+  // the Holder object) may be deallocated if once == true, and the Holder
+  // pointer set to NULL.
+  std::vector<std::pair<std::string, Holder*> > seen_pairs_;
+  size_t last_found_index_;  // An optimization s.t. if FindKeyInternal called twice with
+  // same key (as it often will), it doesn't have to do the key search twice.
+  size_t pending_delete_;  // If opts_.once == true, this is the index of
+  // element of seen_pairs_ that is pending deletion.
+  struct PairCompare {
+    // PairCompare is the Less-than operator for the pairs of(key, Holder).
+    // compares the keys.
+    inline bool operator() (const std::pair<std::string, Holder*> &pr1,
+                            const std::pair<std::string, Holder*> &pr2) {
+      return  (pr1.first.compare(pr2.first) < 0);
+    }
+  };
+};
+
+
+
+// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of
+// archives when the user does not specify the sorted (s) option (in this case
+// the called-sorted, or "cs" option, is ignored).  This is the least efficient
+// of the random access archive readers, in general, but it can be as efficient
+// as the others, in speed, memory and latency, if the "once" option is specified
+// and it happens that the keys of the archive are the same as the keys the code
+// is called with (to HasKey() and Value()), and in the same order.  However, if
+// you ask it for a key that's not present it will have to read the archive till
+// the end and store it all in memory.
+
+template<class Holder>  class RandomAccessTableReaderUnsortedArchiveImpl:
+      public RandomAccessTableReaderArchiveImplBase<Holder> {
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
+
+  typedef typename Holder::T T;
+
+ public:
+  RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()),
+                                                to_delete_iter_valid_(false)
+                                                 {
+    map_.max_load_factor(0.5);  // make it quite empty -> quite efficient.
+    // default seems to be 1.
+  }
+
+  virtual bool Close() {
+    for (typename MapType::iterator iter = map_.begin();
+        iter != map_.end();
+        ++iter) {
+      if (iter->second)
+        delete iter->second;
+    }
+    map_.clear();
+    first_deleted_string_ = "";
+    to_delete_iter_valid_ = false;
+    return this->CloseInternal();
+  }
+
+  virtual bool HasKey(const std::string &key) {
+    HandlePendingDelete();
+    return FindKeyInternal(key, NULL);
+  }
+  virtual const T & Value(const std::string &key) {
+    HandlePendingDelete();
+    const T *ans_ptr = NULL;
+    if (FindKeyInternal(key, &ans_ptr))
+      return *ans_ptr;
+    else
+      KALDI_ERR << "Value() called but no such key " << key
+                << " in archive " << PrintableRxfilename(archive_rxfilename_);
+    return *(const T*)NULL;  // keep compiler happy.
+  }
+  virtual ~RandomAccessTableReaderUnsortedArchiveImpl() {
+    if (this->IsOpen())
+      if (!Close()) // more specific warning will already have been printed.
+        // we are in some kind of error state & user did not find out by
+        // calling Close().
+        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
+                  << rspecifier_;
+  }
+ private:
+  void HandlePendingDelete() {
+    if (to_delete_iter_valid_) {
+      to_delete_iter_valid_ = false;
+      delete to_delete_iter_->second;  // Delete Holder object.
+      if (first_deleted_string_.length() == 0)
+        first_deleted_string_ = to_delete_iter_->first;
+      map_.erase(to_delete_iter_);  // delete that element.
+    }
+  }
+
+  // FindKeyInternal tries to find the key in the map "map_"
+  // If it is not already there, it reads ahead either until it finds the
+  // key, or until end of file.  If called with value_ptr == NULL,
+  // it assumes it's called from HasKey() and just returns true or false
+  // and doesn't otherwise have side effects.  If called with value_ptr != 
+  // NULL, it assumes it's called from Value().  Thus, it will crash
+  // if it cannot find the key.  If it can find it it puts its address in
+  // *value_ptr, and if opts_once == true it will mark that element of the
+  // map to be deleted.
+
+  bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) {
+    typename MapType::iterator iter = map_.find(key);
+    if (iter != map_.end()) {  // Found in the map...
+      if (value_ptr == NULL) {  // called from HasKey
+        return true;  // this is all we have to do.
+      } else {
+        *value_ptr = &(iter->second->Value());
+        if (opts_.once) {  // value won't be needed again, so mark
+          // for deletion.
+          to_delete_iter_ = iter;  // pending delete.
+          KALDI_ASSERT(!to_delete_iter_valid_);
+          to_delete_iter_valid_ = true;
+        }
+        return true;
+      }
+    }
+    while (state_ == kNoObject) {
+      ReadNextObject();
+      if (state_ == kHaveObject) {  // Successfully read object.
+        state_ = kNoObject;  // we are about to transfer ownership
+        // of the object in holder_ to map_.
+        // Insert it into map_.
+        std::pair<typename MapType::iterator, bool> pr =
+            map_.insert(typename MapType::value_type(cur_key_, holder_));
+
+        if (!pr.second) {  // Was not inserted-- previous element w/ same key
+          delete holder_;  // map was not changed, no ownership transferred.
+          holder_ = NULL;
+          KALDI_ERR << "Error in RandomAccessTableReader: duplicate key "
+                    << cur_key_ << " in archive " << archive_rxfilename_;
+        }
+        holder_ = NULL;  // ownership transferred to map_.
+        if (cur_key_ == key) {  // the one we wanted..
+          if (value_ptr == NULL) {  // called from HasKey
+            return true;
+          } else {  // called from Value()
+            *value_ptr = &(pr.first->second->Value());  // this gives us the
+            // Value() from the Holder in the map.
+            if (opts_.once) {  // mark for deletion, as won't be needed again.
+              to_delete_iter_ = pr.first;
+              KALDI_ASSERT(!to_delete_iter_valid_);
+              to_delete_iter_valid_ = true;
+            }
+            return true;
+          }
+        }
+      }
+    }
+    if (opts_.once && key == first_deleted_string_) {
+      KALDI_ERR << "You specified the once (o) option but "
+                << "you are calling using key " << key
+                << " more than once: rspecifier is " << rspecifier_;
+    }
+    return false;  // We read the entire archive (or got to error state) and didn't
+    // find it.
+  }
+
+  typedef unordered_map<std::string, Holder*, StringHasher>  MapType;
+  MapType map_;
+
+  typename MapType::iterator to_delete_iter_;
+  bool to_delete_iter_valid_;
+
+  std::string first_deleted_string_;  // keep the first string we deleted
+  // from map_ (if opts_.once == true).  It's for an inexact spot-check that the
+  // "once" option isn't being used incorrectly.
+
+};
+
+
+
+
+
+template<class Holder>
+RandomAccessTableReader<Holder>::RandomAccessTableReader(const std::string &rspecifier):
+    impl_(NULL) {
+  if (rspecifier != "" && !Open(rspecifier))
+    KALDI_ERR << "Error opening RandomAccessTableReader object "
+        " (rspecifier is: " << rspecifier << ")";
+}
+
+template<class Holder>
+bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) {
+  if (IsOpen())
+    KALDI_ERR << "Already open.";
+  RspecifierOptions opts;
+  RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts);
+  switch (rs) {
+    case kScriptRspecifier:
+      impl_ = new RandomAccessTableReaderScriptImpl<Holder>();
+      break;
+    case kArchiveRspecifier:
+      if (opts.sorted) {
+        if (opts.called_sorted) // "doubly" sorted case.
+          impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>();
+        else
+          impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>();
+      } else impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>();
+      break;
+    case kNoRspecifier: default:
+      KALDI_WARN << "Invalid rspecifier: "
+                 << rspecifier;
+      return false;
+  }
+  if (impl_->Open(rspecifier))
+    return true;
+  else {
+    // Warning will already have been printed.
+    delete impl_;
+    impl_ = NULL;
+    return false;
+  }
+}
+
+template<class Holder>
+bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) {
+  CheckImpl();
+  if (!IsToken(key))
+    KALDI_ERR << "Invalid key \"" << key << '"';
+  return impl_->HasKey(key);
+}
+
+
+template<class Holder>
+const typename RandomAccessTableReader<Holder>::T&
+RandomAccessTableReader<Holder>::Value(const std::string &key) {
+  CheckImpl();  
+  return impl_->Value(key);
+}
+
+template<class Holder>
+bool RandomAccessTableReader<Holder>::Close() {
+  CheckImpl();
+  bool ans =impl_->Close();
+  delete impl_;
+  impl_ = NULL;
+  return ans;
+}
+
+template<class Holder>
+RandomAccessTableReader<Holder>::~RandomAccessTableReader() {
+  if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown.
+    KALDI_ERR << "failure detected in destructor.";
+}
+
+template<class Holder>
+void SequentialTableReader<Holder>::CheckImpl() const {
+  if (!impl_) {
+    KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you "
+              << "passed the empty string as an argument to a program?)";
+  }
+}
+
+template<class Holder>
+void RandomAccessTableReader<Holder>::CheckImpl() const {
+  if (!impl_) {
+    KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you "
+              << "passed the empty string as an argument to a program?)";
+  }
+}
+
+template<class Holder>
+void TableWriter<Holder>::CheckImpl() const {
+  if (!impl_) {
+    KALDI_ERR << "Trying to use empty TableWriter (perhaps you "
+              << "passed the empty string as an argument to a program?)";
+  }
+}
+
+template<class Holder>
+RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped(
+    const std::string &table_rxfilename,
+    const std::string &utt2spk_rxfilename):
+    reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" :
+                                             utt2spk_rxfilename),
+    utt2spk_rxfilename_(utt2spk_rxfilename) { }
+
+template<class Holder>
+bool RandomAccessTableReaderMapped<Holder>::Open(
+    const std::string &table_rxfilename,
+    const std::string &utt2spk_rxfilename) {
+  if (reader_.IsOpen()) reader_.Close();
+  if (token_reader_.IsOpen()) token_reader_.Close();
+  KALDI_ASSERT(!table_rxfilename.empty());
+  if (!reader_.Open(table_rxfilename)) return false; // will have printed
+  // warning internally, probably.
+  if (!utt2spk_rxfilename.empty()) {
+    if (!token_reader_.Open(utt2spk_rxfilename)) {
+      reader_.Close();
+      return false;
+    }
+  }
+  return true;
+}
+
+
+template<class Holder>
+bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) {
+  // We don't check IsOpen, we let the call go through to the member variable
+  // (reader_), which will crash with a more informative error message than
+  // we can give here, as we don't any longer know the rxfilename.
+  if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
+    if (!token_reader_.HasKey(utt))
+      KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
+                << "in utt2spk map or similar map being read from "
+                << PrintableRxfilename(utt2spk_rxfilename_);
+    const std::string &spk = token_reader_.Value(utt);
+    return reader_.HasKey(spk);
+  } else {
+    return reader_.HasKey(utt);
+  }
+}
+
+template<class Holder>
+const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value(
+    const std::string &utt) {
+  if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
+    if (!token_reader_.HasKey(utt))
+      KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
+                << "in utt2spk map or similar map being read from "
+                << PrintableRxfilename(utt2spk_rxfilename_);
+    const std::string &spk = token_reader_.Value(utt);
+    return reader_.Value(spk);
+  } else {
+    return reader_.Value(utt);
+  }
+}
+
+
+
+/// @}
+
+} // end namespace kaldi
+
+
+
+#endif
author	Determinant <[email protected]>	2015-08-14 11:51:42 +0800
committer	Determinant <[email protected]>	2015-08-14 11:51:42 +0800
commit	96a32415ab43377cf1575bd3f4f2980f58028209 (patch)
tree	30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/kaldi/util/kaldi-table-inl.h
parent	c177a7549bd90670af4b29fa813ddea32cfe0f78 (diff)