summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/symbol-table.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/symbol-table.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/symbol-table.h537
1 files changed, 0 insertions, 537 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/symbol-table.h b/kaldi_io/src/tools/openfst/include/fst/symbol-table.h
deleted file mode 100644
index 6eb6c2d..0000000
--- a/kaldi_io/src/tools/openfst/include/fst/symbol-table.h
+++ /dev/null
@@ -1,537 +0,0 @@
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Copyright 2005-2010 Google, Inc.
-// All Rights Reserved.
-//
-// Author : Johan Schalkwyk
-//
-// \file
-// Classes to provide symbol-to-integer and integer-to-symbol mappings.
-
-#ifndef FST_LIB_SYMBOL_TABLE_H__
-#define FST_LIB_SYMBOL_TABLE_H__
-
-#include <cstring>
-#include <string>
-#include <utility>
-using std::pair; using std::make_pair;
-#include <vector>
-using std::vector;
-
-
-#include <fst/compat.h>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-
-
-#include <map>
-
-DECLARE_bool(fst_compat_symbols);
-
-namespace fst {
-
-// WARNING: Reading via symbol table read options should
-// not be used. This is a temporary work around for
-// reading symbol ranges of previously stored symbol sets.
-struct SymbolTableReadOptions {
- SymbolTableReadOptions() { }
-
- SymbolTableReadOptions(vector<pair<int64, int64> > string_hash_ranges_,
- const string& source_)
- : string_hash_ranges(string_hash_ranges_),
- source(source_) { }
-
- vector<pair<int64, int64> > string_hash_ranges;
- string source;
-};
-
-struct SymbolTableTextOptions {
- SymbolTableTextOptions();
-
- bool allow_negative;
- string fst_field_separator;
-};
-
-class SymbolTableImpl {
- public:
- SymbolTableImpl(const string &name)
- : name_(name),
- available_key_(0),
- dense_key_limit_(0),
- check_sum_finalized_(false) {}
-
- explicit SymbolTableImpl(const SymbolTableImpl& impl)
- : name_(impl.name_),
- available_key_(0),
- dense_key_limit_(0),
- check_sum_finalized_(false) {
- for (size_t i = 0; i < impl.symbols_.size(); ++i) {
- AddSymbol(impl.symbols_[i], impl.Find(impl.symbols_[i]));
- }
- }
-
- ~SymbolTableImpl() {
- for (size_t i = 0; i < symbols_.size(); ++i)
- delete[] symbols_[i];
- }
-
- // TODO(johans): Add flag to specify whether the symbol
- // should be indexed as string or int or both.
- int64 AddSymbol(const string& symbol, int64 key);
-
- int64 AddSymbol(const string& symbol) {
- int64 key = Find(symbol);
- return (key == -1) ? AddSymbol(symbol, available_key_++) : key;
- }
-
- static SymbolTableImpl* ReadText(
- istream &strm, const string &name,
- const SymbolTableTextOptions &opts = SymbolTableTextOptions());
-
- static SymbolTableImpl* Read(istream &strm,
- const SymbolTableReadOptions& opts);
-
- bool Write(ostream &strm) const;
-
- //
- // Return the string associated with the key. If the key is out of
- // range (<0, >max), return an empty string.
- string Find(int64 key) const {
- if (key >=0 && key < dense_key_limit_)
- return string(symbols_[key]);
-
- map<int64, const char*>::const_iterator it =
- key_map_.find(key);
- if (it == key_map_.end()) {
- return "";
- }
- return string(it->second);
- }
-
- //
- // Return the key associated with the symbol. If the symbol
- // does not exists, return SymbolTable::kNoSymbol.
- int64 Find(const string& symbol) const {
- return Find(symbol.c_str());
- }
-
- //
- // Return the key associated with the symbol. If the symbol
- // does not exists, return SymbolTable::kNoSymbol.
- int64 Find(const char* symbol) const {
- map<const char *, int64, StrCmp>::const_iterator it =
- symbol_map_.find(symbol);
- if (it == symbol_map_.end()) {
- return -1;
- }
- return it->second;
- }
-
- int64 GetNthKey(ssize_t pos) const {
- if ((pos < 0) || (pos >= symbols_.size())) return -1;
- else return Find(symbols_[pos]);
- }
-
- const string& Name() const { return name_; }
-
- int IncrRefCount() const {
- return ref_count_.Incr();
- }
- int DecrRefCount() const {
- return ref_count_.Decr();
- }
- int RefCount() const {
- return ref_count_.count();
- }
-
- string CheckSum() const {
- MaybeRecomputeCheckSum();
- return check_sum_string_;
- }
-
- string LabeledCheckSum() const {
- MaybeRecomputeCheckSum();
- return labeled_check_sum_string_;
- }
-
- int64 AvailableKey() const {
- return available_key_;
- }
-
- size_t NumSymbols() const {
- return symbols_.size();
- }
-
- private:
- // Recomputes the checksums (both of them) if we've had changes since the last
- // computation (i.e., if check_sum_finalized_ is false).
- // Takes ~2.5 microseconds (dbg) or ~230 nanoseconds (opt) on a 2.67GHz Xeon
- // if the checksum is up-to-date (requiring no recomputation).
- void MaybeRecomputeCheckSum() const;
-
- struct StrCmp {
- bool operator()(const char *s1, const char *s2) const {
- return strcmp(s1, s2) < 0;
- }
- };
-
- string name_;
- int64 available_key_;
- int64 dense_key_limit_;
- vector<const char *> symbols_;
- map<int64, const char*> key_map_;
- map<const char *, int64, StrCmp> symbol_map_;
-
- mutable RefCounter ref_count_;
- mutable bool check_sum_finalized_;
- mutable string check_sum_string_;
- mutable string labeled_check_sum_string_;
- mutable Mutex check_sum_mutex_;
-};
-
-//
-// \class SymbolTable
-// \brief Symbol (string) to int and reverse mapping
-//
-// The SymbolTable implements the mappings of labels to strings and reverse.
-// SymbolTables are used to describe the alphabet of the input and output
-// labels for arcs in a Finite State Transducer.
-//
-// SymbolTables are reference counted and can therefore be shared across
-// multiple machines. For example a language model grammar G, with a
-// SymbolTable for the words in the language model can share this symbol
-// table with the lexical representation L o G.
-//
-class SymbolTable {
- public:
- static const int64 kNoSymbol = -1;
-
- // Construct symbol table with an unspecified name.
- SymbolTable() : impl_(new SymbolTableImpl("<unspecified>")) {}
-
- // Construct symbol table with a unique name.
- SymbolTable(const string& name) : impl_(new SymbolTableImpl(name)) {}
-
- // Create a reference counted copy.
- SymbolTable(const SymbolTable& table) : impl_(table.impl_) {
- impl_->IncrRefCount();
- }
-
- // Derefence implentation object. When reference count hits 0, delete
- // implementation.
- virtual ~SymbolTable() {
- if (!impl_->DecrRefCount()) delete impl_;
- }
-
- // Copys the implemenation from one symbol table to another.
- void operator=(const SymbolTable &st) {
- if (impl_ != st.impl_) {
- st.impl_->IncrRefCount();
- if (!impl_->DecrRefCount()) delete impl_;
- impl_ = st.impl_;
- }
- }
-
- // Read an ascii representation of the symbol table from an istream. Pass a
- // name to give the resulting SymbolTable.
- static SymbolTable* ReadText(
- istream &strm, const string& name,
- const SymbolTableTextOptions &opts = SymbolTableTextOptions()) {
- SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm, name, opts);
- if (!impl)
- return 0;
- else
- return new SymbolTable(impl);
- }
-
- // read an ascii representation of the symbol table
- static SymbolTable* ReadText(const string& filename,
- const SymbolTableTextOptions &opts = SymbolTableTextOptions()) {
- ifstream strm(filename.c_str(), ifstream::in);
- if (!strm) {
- LOG(ERROR) << "SymbolTable::ReadText: Can't open file " << filename;
- return 0;
- }
- return ReadText(strm, filename, opts);
- }
-
-
- // WARNING: Reading via symbol table read options should
- // not be used. This is a temporary work around.
- static SymbolTable* Read(istream &strm,
- const SymbolTableReadOptions& opts) {
- SymbolTableImpl* impl = SymbolTableImpl::Read(strm, opts);
- if (!impl)
- return 0;
- else
- return new SymbolTable(impl);
- }
-
- // read a binary dump of the symbol table from a stream
- static SymbolTable* Read(istream &strm, const string& source) {
- SymbolTableReadOptions opts;
- opts.source = source;
- return Read(strm, opts);
- }
-
- // read a binary dump of the symbol table
- static SymbolTable* Read(const string& filename) {
- ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
- if (!strm) {
- LOG(ERROR) << "SymbolTable::Read: Can't open file " << filename;
- return 0;
- }
- return Read(strm, filename);
- }
-
- //--------------------------------------------------------
- // Derivable Interface (final)
- //--------------------------------------------------------
- // create a reference counted copy
- virtual SymbolTable* Copy() const {
- return new SymbolTable(*this);
- }
-
- // Add a symbol with given key to table. A symbol table also
- // keeps track of the last available key (highest key value in
- // the symbol table).
- virtual int64 AddSymbol(const string& symbol, int64 key) {
- MutateCheck();
- return impl_->AddSymbol(symbol, key);
- }
-
- // Add a symbol to the table. The associated value key is automatically
- // assigned by the symbol table.
- virtual int64 AddSymbol(const string& symbol) {
- MutateCheck();
- return impl_->AddSymbol(symbol);
- }
-
- // Add another symbol table to this table. All key values will be offset
- // by the current available key (highest key value in the symbol table).
- // Note string symbols with the same key value with still have the same
- // key value after the symbol table has been merged, but a different
- // value. Adding symbol tables do not result in changes in the base table.
- virtual void AddTable(const SymbolTable& table);
-
- // return the name of the symbol table
- virtual const string& Name() const {
- return impl_->Name();
- }
-
- // Return the label-agnostic MD5 check-sum for this table. All new symbols
- // added to the table will result in an updated checksum.
- // DEPRECATED.
- virtual string CheckSum() const {
- return impl_->CheckSum();
- }
-
- // Same as CheckSum(), but this returns an label-dependent version.
- virtual string LabeledCheckSum() const {
- return impl_->LabeledCheckSum();
- }
-
- virtual bool Write(ostream &strm) const {
- return impl_->Write(strm);
- }
-
- bool Write(const string& filename) const {
- ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
- if (!strm) {
- LOG(ERROR) << "SymbolTable::Write: Can't open file " << filename;
- return false;
- }
- return Write(strm);
- }
-
- // Dump an ascii text representation of the symbol table via a stream
- virtual bool WriteText(
- ostream &strm,
- const SymbolTableTextOptions &opts = SymbolTableTextOptions()) const;
-
- // Dump an ascii text representation of the symbol table
- bool WriteText(const string& filename) const {
- ofstream strm(filename.c_str());
- if (!strm) {
- LOG(ERROR) << "SymbolTable::WriteText: Can't open file " << filename;
- return false;
- }
- return WriteText(strm);
- }
-
- // Return the string associated with the key. If the key is out of
- // range (<0, >max), log error and return an empty string.
- virtual string Find(int64 key) const {
- return impl_->Find(key);
- }
-
- // Return the key associated with the symbol. If the symbol
- // does not exists, log error and return SymbolTable::kNoSymbol
- virtual int64 Find(const string& symbol) const {
- return impl_->Find(symbol);
- }
-
- // Return the key associated with the symbol. If the symbol
- // does not exists, log error and return SymbolTable::kNoSymbol
- virtual int64 Find(const char* symbol) const {
- return impl_->Find(symbol);
- }
-
- // Return the current available key (i.e highest key number+1) in
- // the symbol table
- virtual int64 AvailableKey(void) const {
- return impl_->AvailableKey();
- }
-
- // Return the current number of symbols in table (not necessarily
- // equal to AvailableKey())
- virtual size_t NumSymbols(void) const {
- return impl_->NumSymbols();
- }
-
- virtual int64 GetNthKey(ssize_t pos) const {
- return impl_->GetNthKey(pos);
- }
-
- private:
- explicit SymbolTable(SymbolTableImpl* impl) : impl_(impl) {}
-
- void MutateCheck() {
- // Copy on write
- if (impl_->RefCount() > 1) {
- impl_->DecrRefCount();
- impl_ = new SymbolTableImpl(*impl_);
- }
- }
-
- const SymbolTableImpl* Impl() const {
- return impl_;
- }
-
- private:
- SymbolTableImpl* impl_;
-};
-
-
-//
-// \class SymbolTableIterator
-// \brief Iterator class for symbols in a symbol table
-class SymbolTableIterator {
- public:
- SymbolTableIterator(const SymbolTable& table)
- : table_(table),
- pos_(0),
- nsymbols_(table.NumSymbols()),
- key_(table.GetNthKey(0)) { }
-
- ~SymbolTableIterator() { }
-
- // is iterator done
- bool Done(void) {
- return (pos_ == nsymbols_);
- }
-
- // return the Value() of the current symbol (int64 key)
- int64 Value(void) {
- return key_;
- }
-
- // return the string of the current symbol
- string Symbol(void) {
- return table_.Find(key_);
- }
-
- // advance iterator forward
- void Next(void) {
- ++pos_;
- if (pos_ < nsymbols_) key_ = table_.GetNthKey(pos_);
- }
-
- // reset iterator
- void Reset(void) {
- pos_ = 0;
- key_ = table_.GetNthKey(0);
- }
-
- private:
- const SymbolTable& table_;
- ssize_t pos_;
- size_t nsymbols_;
- int64 key_;
-};
-
-
-// Tests compatibilty between two sets of symbol tables
-inline bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2,
- bool warning = true) {
- if (!FLAGS_fst_compat_symbols) {
- return true;
- } else if (!syms1 && !syms2) {
- return true;
- } else if (syms1 && !syms2) {
- if (warning)
- LOG(WARNING) <<
- "CompatSymbols: first symbol table present but second missing";
- return false;
- } else if (!syms1 && syms2) {
- if (warning)
- LOG(WARNING) <<
- "CompatSymbols: second symbol table present but first missing";
- return false;
- } else if (syms1->LabeledCheckSum() != syms2->LabeledCheckSum()) {
- if (warning)
- LOG(WARNING) << "CompatSymbols: Symbol table check sums do not match";
- return false;
- } else {
- return true;
- }
-}
-
-
-// Relabels a symbol table as specified by the input vector of pairs
-// (old label, new label). The new symbol table only retains symbols
-// for which a relabeling is *explicitely* specified.
-// TODO(allauzen): consider adding options to allow for some form
-// of implicit identity relabeling.
-template <class Label>
-SymbolTable *RelabelSymbolTable(const SymbolTable *table,
- const vector<pair<Label, Label> > &pairs) {
- SymbolTable *new_table = new SymbolTable(
- table->Name().empty() ? string() :
- (string("relabeled_") + table->Name()));
-
- for (size_t i = 0; i < pairs.size(); ++i)
- new_table->AddSymbol(table->Find(pairs[i].first), pairs[i].second);
-
- return new_table;
-}
-
-// Symbol Table Serialization
-inline void SymbolTableToString(const SymbolTable *table, string *result) {
- ostringstream ostrm;
- table->Write(ostrm);
- *result = ostrm.str();
-}
-
-inline SymbolTable *StringToSymbolTable(const string &s) {
- istringstream istrm(s);
- return SymbolTable::Read(istrm, SymbolTableReadOptions());
-}
-
-
-
-} // namespace fst
-
-#endif // FST_LIB_SYMBOL_TABLE_H__