From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/tools/openfst/include/fst/bi-table.h | 532 ++++++++++++++++++++++ 1 file changed, 532 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/bi-table.h (limited to 'kaldi_io/src/tools/openfst/include/fst/bi-table.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/bi-table.h b/kaldi_io/src/tools/openfst/include/fst/bi-table.h new file mode 100644 index 0000000..d220ce4 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/bi-table.h @@ -0,0 +1,532 @@ +// bi-table.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// Classes for representing a bijective mapping between an arbitrary entry +// of type T and a signed integral ID. + +#ifndef FST_LIB_BI_TABLE_H__ +#define FST_LIB_BI_TABLE_H__ + +#include +using std::deque; +#include +#include +using std::vector; + +#include +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; + +namespace fst { + +// BI TABLES - these determine a bijective mapping between an +// arbitrary entry of type T and an signed integral ID of type I. The IDs are +// allocated starting from 0 in order. +// +// template +// class BiTable { +// public: +// +// // Required constructors. +// BiTable(); +// +// // Lookup integer ID from entry. If it doesn't exist and 'insert' +// / is true, then add it. Otherwise return -1. +// I FindId(const T &entry, bool insert = true); +// // Lookup entry from integer ID. +// const T &FindEntry(I) const; +// // # of stored entries. +// I Size() const; +// }; + +// An implementation using a hash map for the entry to ID mapping. +// H is the hash function and E is the equality function. +// If passed to the constructor, ownership is given to this class. + +template > +class HashBiTable { + public: + // Reserves space for 'table_size' elements. + explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) + : hash_func_(h), + hash_equal_(e), + entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { + if (table_size) + id2entry_.reserve(table_size); + } + + HashBiTable(const HashBiTable &table) + : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), + hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), + entry2id_(table.entry2id_.begin(), table.entry2id_.end(), + table.entry2id_.size(), + (hash_func_ ? *hash_func_ : H()), + (hash_equal_ ? *hash_equal_ : E())), + id2entry_(table.id2entry_) { } + + ~HashBiTable() { + delete hash_func_; + delete hash_equal_; + } + + I FindId(const T &entry, bool insert = true) { + I &id_ref = entry2id_[entry]; + if (id_ref == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + id_ref = id2entry_.size(); + } else { + return -1; + } + } + return id_ref - 1; // NB: id_ref = ID + 1 + } + + const T &FindEntry(I s) const { + return id2entry_[s]; + } + + I Size() const { return id2entry_.size(); } + + private: + H *hash_func_; + E *hash_equal_; + unordered_map entry2id_; + vector id2entry_; + + void operator=(const HashBiTable &table); // disallow +}; + + +// Enables alternative hash set representations below. +// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; +typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; + +// Default hash set is STL hash_set +template +struct HashSet : public unordered_set { + HashSet(size_t n = 0, const H &h = H(), const E &e = E()) + : unordered_set(n, h, e) { } + + void rehash(size_t n) { } +}; + + +// An implementation using a hash set for the entry to ID mapping. +// The hash set holds 'keys' which are either the ID or kCurrentKey. +// These keys can be mapped to entrys either by looking up in the +// entry vector or, if kCurrentKey, in current_entry_ member. The hash +// and key equality functions map to entries first. H +// is the hash function and E is the equality function. If passed to +// the constructor, ownership is given to this class. +template , HSType HS = HS_DENSE> +class CompactHashBiTable { + public: + friend class HashFunc; + friend class HashEqual; + + // Reserves space for 'table_size' elements. + explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) + : hash_func_(h), + hash_equal_(e), + compact_hash_func_(*this), + compact_hash_equal_(*this), + keys_(table_size, compact_hash_func_, compact_hash_equal_) { + if (table_size) + id2entry_.reserve(table_size); + } + + CompactHashBiTable(const CompactHashBiTable &table) + : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), + hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), + compact_hash_func_(*this), + compact_hash_equal_(*this), + keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), + id2entry_(table.id2entry_) { + keys_.insert(table.keys_.begin(), table.keys_.end()); + } + + ~CompactHashBiTable() { + delete hash_func_; + delete hash_equal_; + } + + I FindId(const T &entry, bool insert = true) { + current_entry_ = &entry; + typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); + if (it == keys_.end()) { // T not found + if (insert) { // store and assign it a new ID + I key = id2entry_.size(); + id2entry_.push_back(entry); + keys_.insert(key); + return key; + } else { + return -1; + } + } else { + return *it; + } + } + + const T &FindEntry(I s) const { return id2entry_[s]; } + + I Size() const { return id2entry_.size(); } + + // Clear content. With argument, erases last n IDs. + void Clear(ssize_t n = -1) { + if (n < 0 || n > id2entry_.size()) + n = id2entry_.size(); + while (n-- > 0) { + I key = id2entry_.size() - 1; + keys_.erase(key); + id2entry_.pop_back(); + } + keys_.rehash(0); + } + + private: + static const I kCurrentKey; // -1 + static const I kEmptyKey; // -2 + static const I kDeletedKey; // -3 + + class HashFunc { + public: + HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} + + size_t operator()(I k) const { + if (k >= kCurrentKey) { + return (*ht_->hash_func_)(ht_->Key2Entry(k)); + } else { + return 0; + } + } + + private: + const CompactHashBiTable *ht_; + }; + + class HashEqual { + public: + HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} + + bool operator()(I k1, I k2) const { + if (k1 >= kCurrentKey && k2 >= kCurrentKey) { + return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); + } else { + return k1 == k2; + } + } + private: + const CompactHashBiTable *ht_; + }; + + typedef HashSet KeyHashSet; + + const T &Key2Entry(I k) const { + if (k == kCurrentKey) + return *current_entry_; + else + return id2entry_[k]; + } + + H *hash_func_; + E *hash_equal_; + HashFunc compact_hash_func_; + HashEqual compact_hash_equal_; + KeyHashSet keys_; + vector id2entry_; + const T *current_entry_; + + void operator=(const CompactHashBiTable &table); // disallow +}; + + +template +const I CompactHashBiTable::kCurrentKey = -1; + +template +const I CompactHashBiTable::kEmptyKey = -2; + +template +const I CompactHashBiTable::kDeletedKey = -3; + + +// An implementation using a vector for the entry to ID mapping. +// It is passed a function object FP that should fingerprint entries +// uniquely to an integer that can used as a vector index. Normally, +// VectorBiTable constructs the FP object. The user can instead +// pass in this object; in that case, VectorBiTable takes its +// ownership. +template +class VectorBiTable { + public: + // Reserves space for 'table_size' elements. + explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) + : fp_(fp ? fp : new FP()) { + if (table_size) + id2entry_.reserve(table_size); + } + + VectorBiTable(const VectorBiTable &table) + : fp_(table.fp_ ? new FP(*table.fp_) : 0), + fp2id_(table.fp2id_), + id2entry_(table.id2entry_) { } + + ~VectorBiTable() { delete fp_; } + + I FindId(const T &entry, bool insert = true) { + ssize_t fp = (*fp_)(entry); + if (fp >= fp2id_.size()) + fp2id_.resize(fp + 1); + I &id_ref = fp2id_[fp]; + if (id_ref == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + id_ref = id2entry_.size(); + } else { + return -1; + } + } + return id_ref - 1; // NB: id_ref = ID + 1 + } + + const T &FindEntry(I s) const { return id2entry_[s]; } + + I Size() const { return id2entry_.size(); } + + const FP &Fingerprint() const { return *fp_; } + + private: + FP *fp_; + vector fp2id_; + vector id2entry_; + + void operator=(const VectorBiTable &table); // disallow +}; + + +// An implementation using a vector and a compact hash table. The +// selecting functor S returns true for entries to be hashed in the +// vector. The fingerprinting functor FP returns a unique fingerprint +// for each entry to be hashed in the vector (these need to be +// suitable for indexing in a vector). The hash functor H is used +// when hashing entry into the compact hash table. If passed to the +// constructor, ownership is given to this class. +template +class VectorHashBiTable { + public: + friend class HashFunc; + friend class HashEqual; + + explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, + size_t vector_size = 0, + size_t entry_size = 0) + : selector_(s), + fp_(fp ? fp : new FP()), + h_(h ? h : new H()), + hash_func_(*this), + hash_equal_(*this), + keys_(0, hash_func_, hash_equal_) { + if (vector_size) + fp2id_.reserve(vector_size); + if (entry_size) + id2entry_.reserve(entry_size); + } + + VectorHashBiTable(const VectorHashBiTable &table) + : selector_(new S(table.s_)), + fp_(table.fp_ ? new FP(*table.fp_) : 0), + h_(table.h_ ? new H(*table.h_) : 0), + id2entry_(table.id2entry_), + fp2id_(table.fp2id_), + hash_func_(*this), + hash_equal_(*this), + keys_(table.keys_.size(), hash_func_, hash_equal_) { + keys_.insert(table.keys_.begin(), table.keys_.end()); + } + + ~VectorHashBiTable() { + delete selector_; + delete fp_; + delete h_; + } + + I FindId(const T &entry, bool insert = true) { + if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' + uint64 fp = (*fp_)(entry); + if (fp2id_.size() <= fp) + fp2id_.resize(fp + 1, 0); + if (fp2id_[fp] == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + fp2id_[fp] = id2entry_.size(); + } else { + return -1; + } + } + return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 + } else { // Use the hash table otherwise. + current_entry_ = &entry; + typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); + if (it == keys_.end()) { + if (insert) { + I key = id2entry_.size(); + id2entry_.push_back(entry); + keys_.insert(key); + return key; + } else { + return -1; + } + } else { + return *it; + } + } + } + + const T &FindEntry(I s) const { + return id2entry_[s]; + } + + I Size() const { return id2entry_.size(); } + + const S &Selector() const { return *selector_; } + + const FP &Fingerprint() const { return *fp_; } + + const H &Hash() const { return *h_; } + + private: + static const I kCurrentKey; // -1 + static const I kEmptyKey; // -2 + + class HashFunc { + public: + HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} + + size_t operator()(I k) const { + if (k >= kCurrentKey) { + return (*(ht_->h_))(ht_->Key2Entry(k)); + } else { + return 0; + } + } + private: + const VectorHashBiTable *ht_; + }; + + class HashEqual { + public: + HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} + + bool operator()(I k1, I k2) const { + if (k1 >= kCurrentKey && k2 >= kCurrentKey) { + return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); + } else { + return k1 == k2; + } + } + private: + const VectorHashBiTable *ht_; + }; + + typedef HashSet KeyHashSet; + + const T &Key2Entry(I k) const { + if (k == kCurrentKey) + return *current_entry_; + else + return id2entry_[k]; + } + + S *selector_; // Returns true if entry hashed into vector + FP *fp_; // Fingerprint used when hashing entry into vector + H *h_; // Hash function used when hashing entry into hash_set + + vector id2entry_; // Maps state IDs to entry + vector fp2id_; // Maps entry fingerprints to IDs + + // Compact implementation of the hash table mapping entrys to + // state IDs using the hash function 'h_' + HashFunc hash_func_; + HashEqual hash_equal_; + KeyHashSet keys_; + const T *current_entry_; + + // disallow + void operator=(const VectorHashBiTable &table); +}; + +template +const I VectorHashBiTable::kCurrentKey = -1; + +template +const I VectorHashBiTable::kEmptyKey = -3; + + +// An implementation using a hash map for the entry to ID +// mapping. This version permits erasing of arbitrary states. The +// entry T must have == defined and its default constructor must +// produce a entry that will never be seen. F is the hash function. +template +class ErasableBiTable { + public: + ErasableBiTable() : first_(0) {} + + I FindId(const T &entry, bool insert = true) { + I &id_ref = entry2id_[entry]; + if (id_ref == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + id_ref = id2entry_.size() + first_; + } else { + return -1; + } + } + return id_ref - 1; // NB: id_ref = ID + 1 + } + + const T &FindEntry(I s) const { return id2entry_[s - first_]; } + + I Size() const { return id2entry_.size(); } + + void Erase(I s) { + T &entry = id2entry_[s - first_]; + typename unordered_map::iterator it = + entry2id_.find(entry); + entry2id_.erase(it); + id2entry_[s - first_] = empty_entry_; + while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { + id2entry_.pop_front(); + ++first_; + } + } + + private: + unordered_map entry2id_; + deque id2entry_; + const T empty_entry_; + I first_; // I of first element in the deque; + + // disallow + void operator=(const ErasableBiTable &table); //disallow +}; + +} // namespace fst + +#endif // FST_LIB_BI_TABLE_H__ -- cgit v1.2.3-70-g09d2