summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/bi-table.h
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2015-08-14 11:51:42 +0800
committerDeterminant <[email protected]>2015-08-14 11:51:42 +0800
commit96a32415ab43377cf1575bd3f4f2980f58028209 (patch)
tree30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/tools/openfst/include/fst/bi-table.h
parentc177a7549bd90670af4b29fa813ddea32cfe0f78 (diff)
add implementation for kaldi io (by ymz)
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/bi-table.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/bi-table.h532
1 files changed, 532 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/bi-table.h b/kaldi_io/src/tools/openfst/include/fst/bi-table.h
new file mode 100644
index 0000000..d220ce4
--- /dev/null
+++ b/kaldi_io/src/tools/openfst/include/fst/bi-table.h
@@ -0,0 +1,532 @@
+// bi-table.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: [email protected] (Michael Riley)
+//
+// \file
+// Classes for representing a bijective mapping between an arbitrary entry
+// of type T and a signed integral ID.
+
+#ifndef FST_LIB_BI_TABLE_H__
+#define FST_LIB_BI_TABLE_H__
+
+#include <deque>
+using std::deque;
+#include <functional>
+#include <vector>
+using std::vector;
+
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+
+namespace fst {
+
+// BI TABLES - these determine a bijective mapping between an
+// arbitrary entry of type T and an signed integral ID of type I. The IDs are
+// allocated starting from 0 in order.
+//
+// template <class I, class T>
+// class BiTable {
+// public:
+//
+// // Required constructors.
+// BiTable();
+//
+// // Lookup integer ID from entry. If it doesn't exist and 'insert'
+// / is true, then add it. Otherwise return -1.
+// I FindId(const T &entry, bool insert = true);
+// // Lookup entry from integer ID.
+// const T &FindEntry(I) const;
+// // # of stored entries.
+// I Size() const;
+// };
+
+// An implementation using a hash map for the entry to ID mapping.
+// H is the hash function and E is the equality function.
+// If passed to the constructor, ownership is given to this class.
+
+template <class I, class T, class H, class E = std::equal_to<T> >
+class HashBiTable {
+ public:
+ // Reserves space for 'table_size' elements.
+ explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
+ : hash_func_(h),
+ hash_equal_(e),
+ entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) {
+ if (table_size)
+ id2entry_.reserve(table_size);
+ }
+
+ HashBiTable(const HashBiTable<I, T, H, E> &table)
+ : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
+ hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
+ entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
+ table.entry2id_.size(),
+ (hash_func_ ? *hash_func_ : H()),
+ (hash_equal_ ? *hash_equal_ : E())),
+ id2entry_(table.id2entry_) { }
+
+ ~HashBiTable() {
+ delete hash_func_;
+ delete hash_equal_;
+ }
+
+ I FindId(const T &entry, bool insert = true) {
+ I &id_ref = entry2id_[entry];
+ if (id_ref == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ } else {
+ return -1;
+ }
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const {
+ return id2entry_[s];
+ }
+
+ I Size() const { return id2entry_.size(); }
+
+ private:
+ H *hash_func_;
+ E *hash_equal_;
+ unordered_map<T, I, H, E> entry2id_;
+ vector<T> id2entry_;
+
+ void operator=(const HashBiTable<I, T, H, E> &table); // disallow
+};
+
+
+// Enables alternative hash set representations below.
+// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
+typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
+
+// Default hash set is STL hash_set
+template<class K, class H, class E, HSType>
+struct HashSet : public unordered_set<K, H, E> {
+ HashSet(size_t n = 0, const H &h = H(), const E &e = E())
+ : unordered_set<K, H, E>(n, h, e) { }
+
+ void rehash(size_t n) { }
+};
+
+
+// An implementation using a hash set for the entry to ID mapping.
+// The hash set holds 'keys' which are either the ID or kCurrentKey.
+// These keys can be mapped to entrys either by looking up in the
+// entry vector or, if kCurrentKey, in current_entry_ member. The hash
+// and key equality functions map to entries first. H
+// is the hash function and E is the equality function. If passed to
+// the constructor, ownership is given to this class.
+template <class I, class T, class H,
+ class E = std::equal_to<T>, HSType HS = HS_DENSE>
+class CompactHashBiTable {
+ public:
+ friend class HashFunc;
+ friend class HashEqual;
+
+ // Reserves space for 'table_size' elements.
+ explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
+ : hash_func_(h),
+ hash_equal_(e),
+ compact_hash_func_(*this),
+ compact_hash_equal_(*this),
+ keys_(table_size, compact_hash_func_, compact_hash_equal_) {
+ if (table_size)
+ id2entry_.reserve(table_size);
+ }
+
+ CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table)
+ : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
+ hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
+ compact_hash_func_(*this),
+ compact_hash_equal_(*this),
+ keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
+ id2entry_(table.id2entry_) {
+ keys_.insert(table.keys_.begin(), table.keys_.end());
+ }
+
+ ~CompactHashBiTable() {
+ delete hash_func_;
+ delete hash_equal_;
+ }
+
+ I FindId(const T &entry, bool insert = true) {
+ current_entry_ = &entry;
+ typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
+ if (it == keys_.end()) { // T not found
+ if (insert) { // store and assign it a new ID
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return -1;
+ }
+ } else {
+ return *it;
+ }
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ // Clear content. With argument, erases last n IDs.
+ void Clear(ssize_t n = -1) {
+ if (n < 0 || n > id2entry_.size())
+ n = id2entry_.size();
+ while (n-- > 0) {
+ I key = id2entry_.size() - 1;
+ keys_.erase(key);
+ id2entry_.pop_back();
+ }
+ keys_.rehash(0);
+ }
+
+ private:
+ static const I kCurrentKey; // -1
+ static const I kEmptyKey; // -2
+ static const I kDeletedKey; // -3
+
+ class HashFunc {
+ public:
+ HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
+
+ size_t operator()(I k) const {
+ if (k >= kCurrentKey) {
+ return (*ht_->hash_func_)(ht_->Key2Entry(k));
+ } else {
+ return 0;
+ }
+ }
+
+ private:
+ const CompactHashBiTable *ht_;
+ };
+
+ class HashEqual {
+ public:
+ HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
+
+ bool operator()(I k1, I k2) const {
+ if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
+ return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
+ } else {
+ return k1 == k2;
+ }
+ }
+ private:
+ const CompactHashBiTable *ht_;
+ };
+
+ typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
+
+ const T &Key2Entry(I k) const {
+ if (k == kCurrentKey)
+ return *current_entry_;
+ else
+ return id2entry_[k];
+ }
+
+ H *hash_func_;
+ E *hash_equal_;
+ HashFunc compact_hash_func_;
+ HashEqual compact_hash_equal_;
+ KeyHashSet keys_;
+ vector<T> id2entry_;
+ const T *current_entry_;
+
+ void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow
+};
+
+
+template <class I, class T, class H, class E, HSType HS>
+const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1;
+
+template <class I, class T, class H, class E, HSType HS>
+const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2;
+
+template <class I, class T, class H, class E, HSType HS>
+const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3;
+
+
+// An implementation using a vector for the entry to ID mapping.
+// It is passed a function object FP that should fingerprint entries
+// uniquely to an integer that can used as a vector index. Normally,
+// VectorBiTable constructs the FP object. The user can instead
+// pass in this object; in that case, VectorBiTable takes its
+// ownership.
+template <class I, class T, class FP>
+class VectorBiTable {
+ public:
+ // Reserves space for 'table_size' elements.
+ explicit VectorBiTable(FP *fp = 0, size_t table_size = 0)
+ : fp_(fp ? fp : new FP()) {
+ if (table_size)
+ id2entry_.reserve(table_size);
+ }
+
+ VectorBiTable(const VectorBiTable<I, T, FP> &table)
+ : fp_(table.fp_ ? new FP(*table.fp_) : 0),
+ fp2id_(table.fp2id_),
+ id2entry_(table.id2entry_) { }
+
+ ~VectorBiTable() { delete fp_; }
+
+ I FindId(const T &entry, bool insert = true) {
+ ssize_t fp = (*fp_)(entry);
+ if (fp >= fp2id_.size())
+ fp2id_.resize(fp + 1);
+ I &id_ref = fp2id_[fp];
+ if (id_ref == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ } else {
+ return -1;
+ }
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ const FP &Fingerprint() const { return *fp_; }
+
+ private:
+ FP *fp_;
+ vector<I> fp2id_;
+ vector<T> id2entry_;
+
+ void operator=(const VectorBiTable<I, T, FP> &table); // disallow
+};
+
+
+// An implementation using a vector and a compact hash table. The
+// selecting functor S returns true for entries to be hashed in the
+// vector. The fingerprinting functor FP returns a unique fingerprint
+// for each entry to be hashed in the vector (these need to be
+// suitable for indexing in a vector). The hash functor H is used
+// when hashing entry into the compact hash table. If passed to the
+// constructor, ownership is given to this class.
+template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
+class VectorHashBiTable {
+ public:
+ friend class HashFunc;
+ friend class HashEqual;
+
+ explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0,
+ size_t vector_size = 0,
+ size_t entry_size = 0)
+ : selector_(s),
+ fp_(fp ? fp : new FP()),
+ h_(h ? h : new H()),
+ hash_func_(*this),
+ hash_equal_(*this),
+ keys_(0, hash_func_, hash_equal_) {
+ if (vector_size)
+ fp2id_.reserve(vector_size);
+ if (entry_size)
+ id2entry_.reserve(entry_size);
+ }
+
+ VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
+ : selector_(new S(table.s_)),
+ fp_(table.fp_ ? new FP(*table.fp_) : 0),
+ h_(table.h_ ? new H(*table.h_) : 0),
+ id2entry_(table.id2entry_),
+ fp2id_(table.fp2id_),
+ hash_func_(*this),
+ hash_equal_(*this),
+ keys_(table.keys_.size(), hash_func_, hash_equal_) {
+ keys_.insert(table.keys_.begin(), table.keys_.end());
+ }
+
+ ~VectorHashBiTable() {
+ delete selector_;
+ delete fp_;
+ delete h_;
+ }
+
+ I FindId(const T &entry, bool insert = true) {
+ if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true'
+ uint64 fp = (*fp_)(entry);
+ if (fp2id_.size() <= fp)
+ fp2id_.resize(fp + 1, 0);
+ if (fp2id_[fp] == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ fp2id_[fp] = id2entry_.size();
+ } else {
+ return -1;
+ }
+ }
+ return fp2id_[fp] - 1; // NB: assoc_value = ID + 1
+ } else { // Use the hash table otherwise.
+ current_entry_ = &entry;
+ typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
+ if (it == keys_.end()) {
+ if (insert) {
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return -1;
+ }
+ } else {
+ return *it;
+ }
+ }
+ }
+
+ const T &FindEntry(I s) const {
+ return id2entry_[s];
+ }
+
+ I Size() const { return id2entry_.size(); }
+
+ const S &Selector() const { return *selector_; }
+
+ const FP &Fingerprint() const { return *fp_; }
+
+ const H &Hash() const { return *h_; }
+
+ private:
+ static const I kCurrentKey; // -1
+ static const I kEmptyKey; // -2
+
+ class HashFunc {
+ public:
+ HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
+
+ size_t operator()(I k) const {
+ if (k >= kCurrentKey) {
+ return (*(ht_->h_))(ht_->Key2Entry(k));
+ } else {
+ return 0;
+ }
+ }
+ private:
+ const VectorHashBiTable *ht_;
+ };
+
+ class HashEqual {
+ public:
+ HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
+
+ bool operator()(I k1, I k2) const {
+ if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
+ return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
+ } else {
+ return k1 == k2;
+ }
+ }
+ private:
+ const VectorHashBiTable *ht_;
+ };
+
+ typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
+
+ const T &Key2Entry(I k) const {
+ if (k == kCurrentKey)
+ return *current_entry_;
+ else
+ return id2entry_[k];
+ }
+
+ S *selector_; // Returns true if entry hashed into vector
+ FP *fp_; // Fingerprint used when hashing entry into vector
+ H *h_; // Hash function used when hashing entry into hash_set
+
+ vector<T> id2entry_; // Maps state IDs to entry
+ vector<I> fp2id_; // Maps entry fingerprints to IDs
+
+ // Compact implementation of the hash table mapping entrys to
+ // state IDs using the hash function 'h_'
+ HashFunc hash_func_;
+ HashEqual hash_equal_;
+ KeyHashSet keys_;
+ const T *current_entry_;
+
+ // disallow
+ void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table);
+};
+
+template <class I, class T, class S, class FP, class H, HSType HS>
+const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1;
+
+template <class I, class T, class S, class FP, class H, HSType HS>
+const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3;
+
+
+// An implementation using a hash map for the entry to ID
+// mapping. This version permits erasing of arbitrary states. The
+// entry T must have == defined and its default constructor must
+// produce a entry that will never be seen. F is the hash function.
+template <class I, class T, class F>
+class ErasableBiTable {
+ public:
+ ErasableBiTable() : first_(0) {}
+
+ I FindId(const T &entry, bool insert = true) {
+ I &id_ref = entry2id_[entry];
+ if (id_ref == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size() + first_;
+ } else {
+ return -1;
+ }
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s - first_]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ void Erase(I s) {
+ T &entry = id2entry_[s - first_];
+ typename unordered_map<T, I, F>::iterator it =
+ entry2id_.find(entry);
+ entry2id_.erase(it);
+ id2entry_[s - first_] = empty_entry_;
+ while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
+ id2entry_.pop_front();
+ ++first_;
+ }
+ }
+
+ private:
+ unordered_map<T, I, F> entry2id_;
+ deque<T> id2entry_;
+ const T empty_entry_;
+ I first_; // I of first element in the deque;
+
+ // disallow
+ void operator=(const ErasableBiTable<I, T, F> &table); //disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_BI_TABLE_H__