// bi-table.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // Classes for representing a bijective mapping between an arbitrary entry // of type T and a signed integral ID. #ifndef FST_LIB_BI_TABLE_H__ #define FST_LIB_BI_TABLE_H__ #include using std::deque; #include #include using std::vector; #include using std::tr1::unordered_set; using std::tr1::unordered_multiset; namespace fst { // BI TABLES - these determine a bijective mapping between an // arbitrary entry of type T and an signed integral ID of type I. The IDs are // allocated starting from 0 in order. // // template // class BiTable { // public: // // // Required constructors. // BiTable(); // // // Lookup integer ID from entry. If it doesn't exist and 'insert' // / is true, then add it. Otherwise return -1. // I FindId(const T &entry, bool insert = true); // // Lookup entry from integer ID. // const T &FindEntry(I) const; // // # of stored entries. // I Size() const; // }; // An implementation using a hash map for the entry to ID mapping. // H is the hash function and E is the equality function. // If passed to the constructor, ownership is given to this class. template > class HashBiTable { public: // Reserves space for 'table_size' elements. explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) : hash_func_(h), hash_equal_(e), entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { if (table_size) id2entry_.reserve(table_size); } HashBiTable(const HashBiTable &table) : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), entry2id_(table.entry2id_.begin(), table.entry2id_.end(), table.entry2id_.size(), (hash_func_ ? *hash_func_ : H()), (hash_equal_ ? *hash_equal_ : E())), id2entry_(table.id2entry_) { } ~HashBiTable() { delete hash_func_; delete hash_equal_; } I FindId(const T &entry, bool insert = true) { I &id_ref = entry2id_[entry]; if (id_ref == 0) { // T not found if (insert) { // store and assign it a new ID id2entry_.push_back(entry); id_ref = id2entry_.size(); } else { return -1; } } return id_ref - 1; // NB: id_ref = ID + 1 } const T &FindEntry(I s) const { return id2entry_[s]; } I Size() const { return id2entry_.size(); } private: H *hash_func_; E *hash_equal_; unordered_map entry2id_; vector id2entry_; void operator=(const HashBiTable &table); // disallow }; // Enables alternative hash set representations below. // typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; // Default hash set is STL hash_set template struct HashSet : public unordered_set { HashSet(size_t n = 0, const H &h = H(), const E &e = E()) : unordered_set(n, h, e) { } void rehash(size_t n) { } }; // An implementation using a hash set for the entry to ID mapping. // The hash set holds 'keys' which are either the ID or kCurrentKey. // These keys can be mapped to entrys either by looking up in the // entry vector or, if kCurrentKey, in current_entry_ member. The hash // and key equality functions map to entries first. H // is the hash function and E is the equality function. If passed to // the constructor, ownership is given to this class. template , HSType HS = HS_DENSE> class CompactHashBiTable { public: friend class HashFunc; friend class HashEqual; // Reserves space for 'table_size' elements. explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) : hash_func_(h), hash_equal_(e), compact_hash_func_(*this), compact_hash_equal_(*this), keys_(table_size, compact_hash_func_, compact_hash_equal_) { if (table_size) id2entry_.reserve(table_size); } CompactHashBiTable(const CompactHashBiTable &table) : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), compact_hash_func_(*this), compact_hash_equal_(*this), keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), id2entry_(table.id2entry_) { keys_.insert(table.keys_.begin(), table.keys_.end()); } ~CompactHashBiTable() { delete hash_func_; delete hash_equal_; } I FindId(const T &entry, bool insert = true) { current_entry_ = &entry; typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); if (it == keys_.end()) { // T not found if (insert) { // store and assign it a new ID I key = id2entry_.size(); id2entry_.push_back(entry); keys_.insert(key); return key; } else { return -1; } } else { return *it; } } const T &FindEntry(I s) const { return id2entry_[s]; } I Size() const { return id2entry_.size(); } // Clear content. With argument, erases last n IDs. void Clear(ssize_t n = -1) { if (n < 0 || n > id2entry_.size()) n = id2entry_.size(); while (n-- > 0) { I key = id2entry_.size() - 1; keys_.erase(key); id2entry_.pop_back(); } keys_.rehash(0); } private: static const I kCurrentKey; // -1 static const I kEmptyKey; // -2 static const I kDeletedKey; // -3 class HashFunc { public: HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} size_t operator()(I k) const { if (k >= kCurrentKey) { return (*ht_->hash_func_)(ht_->Key2Entry(k)); } else { return 0; } } private: const CompactHashBiTable *ht_; }; class HashEqual { public: HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} bool operator()(I k1, I k2) const { if (k1 >= kCurrentKey && k2 >= kCurrentKey) { return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); } else { return k1 == k2; } } private: const CompactHashBiTable *ht_; }; typedef HashSet KeyHashSet; const T &Key2Entry(I k) const { if (k == kCurrentKey) return *current_entry_; else return id2entry_[k]; } H *hash_func_; E *hash_equal_; HashFunc compact_hash_func_; HashEqual compact_hash_equal_; KeyHashSet keys_; vector id2entry_; const T *current_entry_; void operator=(const CompactHashBiTable &table); // disallow }; template const I CompactHashBiTable::kCurrentKey = -1; template const I CompactHashBiTable::kEmptyKey = -2; template const I CompactHashBiTable::kDeletedKey = -3; // An implementation using a vector for the entry to ID mapping. // It is passed a function object FP that should fingerprint entries // uniquely to an integer that can used as a vector index. Normally, // VectorBiTable constructs the FP object. The user can instead // pass in this object; in that case, VectorBiTable takes its // ownership. template class VectorBiTable { public: // Reserves space for 'table_size' elements. explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) : fp_(fp ? fp : new FP()) { if (table_size) id2entry_.reserve(table_size); } VectorBiTable(const VectorBiTable &table) : fp_(table.fp_ ? new FP(*table.fp_) : 0), fp2id_(table.fp2id_), id2entry_(table.id2entry_) { } ~VectorBiTable() { delete fp_; } I FindId(const T &entry, bool insert = true) { ssize_t fp = (*fp_)(entry); if (fp >= fp2id_.size()) fp2id_.resize(fp + 1); I &id_ref = fp2id_[fp]; if (id_ref == 0) { // T not found if (insert) { // store and assign it a new ID id2entry_.push_back(entry); id_ref = id2entry_.size(); } else { return -1; } } return id_ref - 1; // NB: id_ref = ID + 1 } const T &FindEntry(I s) const { return id2entry_[s]; } I Size() const { return id2entry_.size(); } const FP &Fingerprint() const { return *fp_; } private: FP *fp_; vector fp2id_; vector id2entry_; void operator=(const VectorBiTable &table); // disallow }; // An implementation using a vector and a compact hash table. The // selecting functor S returns true for entries to be hashed in the // vector. The fingerprinting functor FP returns a unique fingerprint // for each entry to be hashed in the vector (these need to be // suitable for indexing in a vector). The hash functor H is used // when hashing entry into the compact hash table. If passed to the // constructor, ownership is given to this class. template class VectorHashBiTable { public: friend class HashFunc; friend class HashEqual; explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, size_t vector_size = 0, size_t entry_size = 0) : selector_(s), fp_(fp ? fp : new FP()), h_(h ? h : new H()), hash_func_(*this), hash_equal_(*this), keys_(0, hash_func_, hash_equal_) { if (vector_size) fp2id_.reserve(vector_size); if (entry_size) id2entry_.reserve(entry_size); } VectorHashBiTable(const VectorHashBiTable &table) : selector_(new S(table.s_)), fp_(table.fp_ ? new FP(*table.fp_) : 0), h_(table.h_ ? new H(*table.h_) : 0), id2entry_(table.id2entry_), fp2id_(table.fp2id_), hash_func_(*this), hash_equal_(*this), keys_(table.keys_.size(), hash_func_, hash_equal_) { keys_.insert(table.keys_.begin(), table.keys_.end()); } ~VectorHashBiTable() { delete selector_; delete fp_; delete h_; } I FindId(const T &entry, bool insert = true) { if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' uint64 fp = (*fp_)(entry); if (fp2id_.size() <= fp) fp2id_.resize(fp + 1, 0); if (fp2id_[fp] == 0) { // T not found if (insert) { // store and assign it a new ID id2entry_.push_back(entry); fp2id_[fp] = id2entry_.size(); } else { return -1; } } return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 } else { // Use the hash table otherwise. current_entry_ = &entry; typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); if (it == keys_.end()) { if (insert) { I key = id2entry_.size(); id2entry_.push_back(entry); keys_.insert(key); return key; } else { return -1; } } else { return *it; } } } const T &FindEntry(I s) const { return id2entry_[s]; } I Size() const { return id2entry_.size(); } const S &Selector() const { return *selector_; } const FP &Fingerprint() const { return *fp_; } const H &Hash() const { return *h_; } private: static const I kCurrentKey; // -1 static const I kEmptyKey; // -2 class HashFunc { public: HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} size_t operator()(I k) const { if (k >= kCurrentKey) { return (*(ht_->h_))(ht_->Key2Entry(k)); } else { return 0; } } private: const VectorHashBiTable *ht_; }; class HashEqual { public: HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} bool operator()(I k1, I k2) const { if (k1 >= kCurrentKey && k2 >= kCurrentKey) { return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); } else { return k1 == k2; } } private: const VectorHashBiTable *ht_; }; typedef HashSet KeyHashSet; const T &Key2Entry(I k) const { if (k == kCurrentKey) return *current_entry_; else return id2entry_[k]; } S *selector_; // Returns true if entry hashed into vector FP *fp_; // Fingerprint used when hashing entry into vector H *h_; // Hash function used when hashing entry into hash_set vector id2entry_; // Maps state IDs to entry vector fp2id_; // Maps entry fingerprints to IDs // Compact implementation of the hash table mapping entrys to // state IDs using the hash function 'h_' HashFunc hash_func_; HashEqual hash_equal_; KeyHashSet keys_; const T *current_entry_; // disallow void operator=(const VectorHashBiTable &table); }; template const I VectorHashBiTable::kCurrentKey = -1; template const I VectorHashBiTable::kEmptyKey = -3; // An implementation using a hash map for the entry to ID // mapping. This version permits erasing of arbitrary states. The // entry T must have == defined and its default constructor must // produce a entry that will never be seen. F is the hash function. template class ErasableBiTable { public: ErasableBiTable() : first_(0) {} I FindId(const T &entry, bool insert = true) { I &id_ref = entry2id_[entry]; if (id_ref == 0) { // T not found if (insert) { // store and assign it a new ID id2entry_.push_back(entry); id_ref = id2entry_.size() + first_; } else { return -1; } } return id_ref - 1; // NB: id_ref = ID + 1 } const T &FindEntry(I s) const { return id2entry_[s - first_]; } I Size() const { return id2entry_.size(); } void Erase(I s) { T &entry = id2entry_[s - first_]; typename unordered_map::iterator it = entry2id_.find(entry); entry2id_.erase(it); id2entry_[s - first_] = empty_entry_; while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { id2entry_.pop_front(); ++first_; } } private: unordered_map entry2id_; deque id2entry_; const T empty_entry_; I first_; // I of first element in the deque; // disallow void operator=(const ErasableBiTable &table); //disallow }; } // namespace fst #endif // FST_LIB_BI_TABLE_H__