summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h532
1 files changed, 532 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h
new file mode 100644
index 0000000..acce76e
--- /dev/null
+++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h
@@ -0,0 +1,532 @@
+// far.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Finite-State Transducer (FST) archive classes.
+//
+
+#ifndef FST_EXTENSIONS_FAR_FAR_H__
+#define FST_EXTENSIONS_FAR_FAR_H__
+
+#include <fst/extensions/far/stlist.h>
+#include <fst/extensions/far/sttable.h>
+#include <fst/fst.h>
+#include <fst/vector-fst.h>
+
+namespace fst {
+
+enum FarEntryType { FET_LINE, FET_FILE };
+enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
+
+inline bool IsFst(const string &filename) {
+ ifstream strm(filename.c_str());
+ if (!strm)
+ return false;
+ return IsFstHeader(strm, filename);
+}
+
+// FST archive header class
+class FarHeader {
+ public:
+ const string &FarType() const { return fartype_; }
+ const string &ArcType() const { return arctype_; }
+
+ bool Read(const string &filename) {
+ FstHeader fsthdr;
+ if (filename.empty()) {
+ // Header reading unsupported on stdin. Assumes STList and StdArc.
+ fartype_ = "stlist";
+ arctype_ = "standard";
+ return true;
+ } else if (IsSTTable(filename)) { // Check if STTable
+ ReadSTTableHeader(filename, &fsthdr);
+ fartype_ = "sttable";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ } else if (IsSTList(filename)) { // Check if STList
+ ReadSTListHeader(filename, &fsthdr);
+ fartype_ = "sttable";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ } else if (IsFst(filename)) { // Check if Fst
+ ifstream istrm(filename.c_str());
+ fsthdr.Read(istrm, filename);
+ fartype_ = "fst";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ string fartype_;
+ string arctype_;
+};
+
+enum FarType {
+ FAR_DEFAULT = 0,
+ FAR_STTABLE = 1,
+ FAR_STLIST = 2,
+ FAR_FST = 3,
+};
+
+// This class creates an archive of FSTs.
+template <class A>
+class FarWriter {
+ public:
+ typedef A Arc;
+
+ // Creates a new (empty) FST archive; returns NULL on error.
+ static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
+
+ // Adds an FST to the end of an archive. Keys must be non-empty and
+ // in lexicographic order. FSTs must have a suitable write method.
+ virtual void Add(const string &key, const Fst<A> &fst) = 0;
+
+ virtual FarType Type() const = 0;
+
+ virtual bool Error() const = 0;
+
+ virtual ~FarWriter() {}
+
+ protected:
+ FarWriter() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FarWriter);
+};
+
+
+// This class iterates through an existing archive of FSTs.
+template <class A>
+class FarReader {
+ public:
+ typedef A Arc;
+
+ // Opens an existing FST archive in a single file; returns NULL on error.
+ // Sets current position to the beginning of the achive.
+ static FarReader *Open(const string &filename);
+
+ // Opens an existing FST archive in multiple files; returns NULL on error.
+ // Sets current position to the beginning of the achive.
+ static FarReader *Open(const vector<string> &filenames);
+
+ // Resets current posision to beginning of archive.
+ virtual void Reset() = 0;
+
+ // Sets current position to first entry >= key. Returns true if a match.
+ virtual bool Find(const string &key) = 0;
+
+ // Current position at end of archive?
+ virtual bool Done() const = 0;
+
+ // Move current position to next FST.
+ virtual void Next() = 0;
+
+ // Returns key at the current position. This reference is invalidated if
+ // the current position in the archive is changed.
+ virtual const string &GetKey() const = 0;
+
+ // Returns FST at the current position. This reference is invalidated if
+ // the current position in the archive is changed.
+ virtual const Fst<A> &GetFst() const = 0;
+
+ virtual FarType Type() const = 0;
+
+ virtual bool Error() const = 0;
+
+ virtual ~FarReader() {}
+
+ protected:
+ FarReader() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FarReader);
+};
+
+
+template <class A>
+class FstWriter {
+ public:
+ void operator()(ostream &strm, const Fst<A> &fst) const {
+ fst.Write(strm, FstWriteOptions());
+ }
+};
+
+
+template <class A>
+class STTableFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ static STTableFarWriter *Create(const string &filename) {
+ STTableWriter<Fst<A>, FstWriter<A> > *writer =
+ STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
+ return new STTableFarWriter(writer);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
+
+ FarType Type() const { return FAR_STTABLE; }
+
+ bool Error() const { return writer_->Error(); }
+
+ ~STTableFarWriter() { delete writer_; }
+
+ private:
+ explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
+ : writer_(writer) {}
+
+ private:
+ STTableWriter<Fst<A>, FstWriter<A> > *writer_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
+};
+
+
+template <class A>
+class STListFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ static STListFarWriter *Create(const string &filename) {
+ STListWriter<Fst<A>, FstWriter<A> > *writer =
+ STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
+ return new STListFarWriter(writer);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
+
+ FarType Type() const { return FAR_STLIST; }
+
+ bool Error() const { return writer_->Error(); }
+
+ ~STListFarWriter() { delete writer_; }
+
+ private:
+ explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
+ : writer_(writer) {}
+
+ private:
+ STListWriter<Fst<A>, FstWriter<A> > *writer_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
+};
+
+
+template <class A>
+class FstFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ explicit FstFarWriter(const string &filename)
+ : filename_(filename), error_(false), written_(false) {}
+
+ static FstFarWriter *Create(const string &filename) {
+ return new FstFarWriter(filename);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) {
+ if (written_) {
+ LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
+ << " subsequent entries discarded.";
+ } else {
+ error_ = !fst.Write(filename_);
+ written_ = true;
+ }
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarWriter() {}
+
+ private:
+ string filename_;
+ bool error_;
+ bool written_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
+};
+
+
+template <class A>
+FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
+ switch(type) {
+ case FAR_DEFAULT:
+ if (filename.empty())
+ return STListFarWriter<A>::Create(filename);
+ case FAR_STTABLE:
+ return STTableFarWriter<A>::Create(filename);
+ break;
+ case FAR_STLIST:
+ return STListFarWriter<A>::Create(filename);
+ break;
+ case FAR_FST:
+ return FstFarWriter<A>::Create(filename);
+ break;
+ default:
+ LOG(ERROR) << "FarWriter::Create: unknown far type";
+ return 0;
+ }
+}
+
+
+template <class A>
+class FstReader {
+ public:
+ Fst<A> *operator()(istream &strm) const {
+ return Fst<A>::Read(strm, FstReadOptions());
+ }
+};
+
+
+template <class A>
+class STTableFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static STTableFarReader *Open(const string &filename) {
+ STTableReader<Fst<A>, FstReader<A> > *reader =
+ STTableReader<Fst<A>, FstReader<A> >::Open(filename);
+ // TODO: error check
+ return new STTableFarReader(reader);
+ }
+
+ static STTableFarReader *Open(const vector<string> &filenames) {
+ STTableReader<Fst<A>, FstReader<A> > *reader =
+ STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
+ // TODO: error check
+ return new STTableFarReader(reader);
+ }
+
+ void Reset() { reader_->Reset(); }
+
+ bool Find(const string &key) { return reader_->Find(key); }
+
+ bool Done() const { return reader_->Done(); }
+
+ void Next() { return reader_->Next(); }
+
+ const string &GetKey() const { return reader_->GetKey(); }
+
+ const Fst<A> &GetFst() const { return reader_->GetEntry(); }
+
+ FarType Type() const { return FAR_STTABLE; }
+
+ bool Error() const { return reader_->Error(); }
+
+ ~STTableFarReader() { delete reader_; }
+
+ private:
+ explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
+ : reader_(reader) {}
+
+ private:
+ STTableReader<Fst<A>, FstReader<A> > *reader_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
+};
+
+
+template <class A>
+class STListFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static STListFarReader *Open(const string &filename) {
+ STListReader<Fst<A>, FstReader<A> > *reader =
+ STListReader<Fst<A>, FstReader<A> >::Open(filename);
+ // TODO: error check
+ return new STListFarReader(reader);
+ }
+
+ static STListFarReader *Open(const vector<string> &filenames) {
+ STListReader<Fst<A>, FstReader<A> > *reader =
+ STListReader<Fst<A>, FstReader<A> >::Open(filenames);
+ // TODO: error check
+ return new STListFarReader(reader);
+ }
+
+ void Reset() { reader_->Reset(); }
+
+ bool Find(const string &key) { return reader_->Find(key); }
+
+ bool Done() const { return reader_->Done(); }
+
+ void Next() { return reader_->Next(); }
+
+ const string &GetKey() const { return reader_->GetKey(); }
+
+ const Fst<A> &GetFst() const { return reader_->GetEntry(); }
+
+ FarType Type() const { return FAR_STLIST; }
+
+ bool Error() const { return reader_->Error(); }
+
+ ~STListFarReader() { delete reader_; }
+
+ private:
+ explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
+ : reader_(reader) {}
+
+ private:
+ STListReader<Fst<A>, FstReader<A> > *reader_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListFarReader);
+};
+
+template <class A>
+class FstFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static FstFarReader *Open(const string &filename) {
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new FstFarReader<A>(filenames);
+ }
+
+ static FstFarReader *Open(const vector<string> &filenames) {
+ return new FstFarReader<A>(filenames);
+ }
+
+ FstFarReader(const vector<string> &filenames)
+ : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
+ sort(keys_.begin(), keys_.end());
+ streams_.resize(keys_.size(), 0);
+ for (size_t i = 0; i < keys_.size(); ++i) {
+ if (keys_[i].empty()) {
+ if (!has_stdin_) {
+ streams_[i] = &cin;
+ //sources_[i] = "stdin";
+ has_stdin_ = true;
+ } else {
+ FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
+ << "appear once in the input file list.";
+ error_ = true;
+ return;
+ }
+ } else {
+ streams_[i] = new ifstream(
+ keys_[i].c_str(), ifstream::in | ifstream::binary);
+ }
+ }
+ if (pos_ >= keys_.size()) return;
+ ReadFst();
+ }
+
+ void Reset() {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
+ error_ = true;
+ return;
+ }
+ pos_ = 0;
+ ReadFst();
+ }
+
+ bool Find(const string &key) {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
+ error_ = true;
+ return false;
+ }
+ pos_ = 0;//TODO
+ ReadFst();
+ return true;
+ }
+
+ bool Done() const { return error_ || pos_ >= keys_.size(); }
+
+ void Next() {
+ ++pos_;
+ ReadFst();
+ }
+
+ const string &GetKey() const {
+ return keys_[pos_];
+ }
+
+ const Fst<A> &GetFst() const {
+ return *fst_;
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarReader() {
+ if (fst_) delete fst_;
+ for (size_t i = 0; i < keys_.size(); ++i)
+ delete streams_[i];
+ }
+
+ private:
+ void ReadFst() {
+ if (fst_) delete fst_;
+ if (pos_ >= keys_.size()) return;
+ streams_[pos_]->seekg(0);
+ fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
+ if (!fst_) {
+ FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
+ error_ = true;
+ }
+ }
+
+ private:
+ vector<string> keys_;
+ vector<istream*> streams_;
+ bool has_stdin_;
+ size_t pos_;
+ mutable Fst<A> *fst_;
+ mutable bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarReader);
+};
+
+template <class A>
+FarReader<A> *FarReader<A>::Open(const string &filename) {
+ if (filename.empty())
+ return STListFarReader<A>::Open(filename);
+ else if (IsSTTable(filename))
+ return STTableFarReader<A>::Open(filename);
+ else if (IsSTList(filename))
+ return STListFarReader<A>::Open(filename);
+ else if (IsFst(filename))
+ return FstFarReader<A>::Open(filename);
+ return 0;
+}
+
+
+template <class A>
+FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
+ if (!filenames.empty() && filenames[0].empty())
+ return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsSTTable(filenames[0]))
+ return STTableFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsSTList(filenames[0]))
+ return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsFst(filenames[0]))
+ return FstFarReader<A>::Open(filenames);
+ return 0;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_FAR_H__