summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h183
1 files changed, 0 insertions, 183 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h
deleted file mode 100644
index f5a5ba7..0000000
--- a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h
+++ /dev/null
@@ -1,183 +0,0 @@
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Copyright 2005-2010 Google, Inc.
-// Author: sorenj@google.com (Jeffrey Sorensen)
-
-#ifndef FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
-#define FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
-
-#include <vector>
-using std::vector;
-
-#include <fst/compat.h>
-
-// This class is a bitstring storage class with an index that allows
-// seeking to the Nth set or clear bit in time O(Log(N)) where N is
-// the length of the bit vector. In addition, it allows counting set or
-// clear bits over ranges in constant time.
-//
-// This is accomplished by maintaining an "secondary" index of limited
-// size in bits that maintains a running count of the number of bits set
-// in each block of bitmap data. A block is defined as the number of
-// uint64 values that can fit in the secondary index before an overflow
-// occurs.
-//
-// To handle overflows, a "primary" index containing a running count of
-// bits set in each block is created using the type uint64.
-
-namespace fst {
-
-class BitmapIndex {
- public:
- static size_t StorageSize(size_t size) {
- return ((size + kStorageBlockMask) >> kStorageLogBitSize);
- }
-
- BitmapIndex() : bits_(NULL), size_(0) { }
-
- bool Get(size_t index) const {
- return (bits_[index >> kStorageLogBitSize] &
- (kOne << (index & kStorageBlockMask))) != 0;
- }
-
- static void Set(uint64* bits, size_t index) {
- bits[index >> kStorageLogBitSize] |= (kOne << (index & kStorageBlockMask));
- }
-
- static void Clear(uint64* bits, size_t index) {
- bits[index >> kStorageLogBitSize] &= ~(kOne << (index & kStorageBlockMask));
- }
-
- size_t Bits() const {
- return size_;
- }
-
- size_t ArraySize() const {
- return StorageSize(size_);
- }
-
- // Returns the number of one bits in the bitmap
- size_t GetOnesCount() const {
- return primary_index_[primary_index_size() - 1];
- }
-
- // Returns the number of one bits in positions 0 to limit - 1.
- // REQUIRES: limit <= Bits()
- size_t Rank1(size_t end) const;
-
- // Returns the number of one bits in the range start to end - 1.
- // REQUIRES: limit <= Bits()
- size_t GetOnesCountInRange(size_t start, size_t end) const {
- return Rank1(end) - Rank1(start);
- }
-
- // Returns the number of zero bits in positions 0 to limit - 1.
- // REQUIRES: limit <= Bits()
- size_t Rank0(size_t end) const {
- return end - Rank1(end);
- }
-
- // Returns the number of zero bits in the range start to end - 1.
- // REQUIRES: limit <= Bits()
- size_t GetZeroesCountInRange(size_t start, size_t end) const {
- return end - start - GetOnesCountInRange(start, end);
- }
-
- // Return true if any bit between begin inclusive and end exclusive
- // is set. 0 <= begin <= end <= Bits() is required.
- //
- bool TestRange(size_t start, size_t end) const {
- return Rank1(end) > Rank1(start);
- }
-
- // Returns the offset to the nth set bit (zero based)
- // or Bits() if index >= number of ones
- size_t Select1(size_t bit_index) const;
-
- // Returns the offset to the nth clear bit (zero based)
- // or Bits() if index > number of
- size_t Select0(size_t bit_index) const;
-
- // Rebuilds from index for the associated Bitmap, should be called
- // whenever changes have been made to the Bitmap or else behavior
- // of the indexed bitmap methods will be undefined.
- void BuildIndex(const uint64 *bits, size_t size);
-
- // the secondary index accumulates counts until it can possibly overflow
- // this constant computes the number of uint64 units that can fit into
- // units the size of uint16.
- static const uint64 kOne = 1;
- static const uint32 kStorageBitSize = 64;
- static const uint32 kStorageLogBitSize = 6;
- static const uint32 kSecondaryBlockSize = ((1 << 16) - 1)
- >> kStorageLogBitSize;
-
- private:
- static const uint32 kStorageBlockMask = kStorageBitSize - 1;
-
- // returns, from the index, the count of ones up to array_index
- size_t get_index_ones_count(size_t array_index) const;
-
- // because the indexes, both primary and secondary, contain a running
- // count of the population of one bits contained in [0,i), there is
- // no reason to have an element in the zeroth position as this value would
- // necessarily be zero. (The bits are indexed in a zero based way.) Thus
- // we don't store the 0th element in either index. Both of the following
- // functions, if greater than 0, must be decremented by one before retreiving
- // the value from the corresponding array.
- // returns the 1 + the block that contains the bitindex in question
- // the inverted version works the same but looks for zeros using an inverted
- // view of the index
- size_t find_primary_block(size_t bit_index) const;
-
- size_t find_inverted_primary_block(size_t bit_index) const;
-
- // similarly, the secondary index (which resets its count to zero at
- // the end of every kSecondaryBlockSize entries) does not store the element
- // at 0. Note that the rem_bit_index parameter is the number of bits
- // within the secondary block, after the bits accounted for by the primary
- // block have been removed (i.e. the remaining bits) And, because we
- // reset to zero with each new block, there is no need to store those
- // actual zeros.
- // returns 1 + the secondary block that contains the bitindex in question
- size_t find_secondary_block(size_t block, size_t rem_bit_index) const;
-
- size_t find_inverted_secondary_block(size_t block, size_t rem_bit_index)
- const;
-
- // We create a primary index based upon the number of secondary index
- // blocks. The primary index uses fields wide enough to accomodate any
- // index of the bitarray so cannot overflow
- // The primary index is the actual running
- // count of one bits set for all blocks (and, thus, all uint64s).
- size_t primary_index_size() const {
- return (ArraySize() + kSecondaryBlockSize - 1) / kSecondaryBlockSize;
- }
-
- const uint64* bits_;
- size_t size_;
-
- // The primary index contains the running popcount of all blocks
- // which means the nth value contains the popcounts of
- // [0,n*kSecondaryBlockSize], however, the 0th element is omitted.
- vector<uint32> primary_index_;
- // The secondary index contains the running popcount of the associated
- // bitmap. It is the same length (in units of uint16) as the
- // bitmap's map is in units of uint64s.
- vector<uint16> secondary_index_;
-};
-
-} // end namespace fst
-
-#endif // FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_