summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi/util/hash-list.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/kaldi/util/hash-list.h')
-rw-r--r--kaldi_io/src/kaldi/util/hash-list.h140
1 files changed, 140 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/util/hash-list.h b/kaldi_io/src/kaldi/util/hash-list.h
new file mode 100644
index 0000000..4524759
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/hash-list.h
@@ -0,0 +1,140 @@
+// util/hash-list.h
+
+// Copyright 2009-2011 Microsoft Corporation
+// 2013 Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_HASH_LIST_H_
+#define KALDI_UTIL_HASH_LIST_H_
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <limits>
+#include <cassert>
+#include "util/stl-utils.h"
+
+
+/* This header provides utilities for a structure that's used in a decoder (but
+ is quite generic in nature so we implement and test it separately).
+ Basically it's a singly-linked list, but implemented in such a way that we
+ can quickly search for elements in the list. We give it a slightly richer
+ interface than just a hash and a list. The idea is that we want to separate
+ the hash part and the list part: basically, in the decoder, we want to have a
+ single hash for the current frame and the next frame, because by the time we
+ need to access the hash for the next frame we no longer need the hash for the
+ previous frame. So we have an operation that clears the hash but leaves the
+ list structure intact. We also control memory management inside this object,
+ to avoid repeated new's/deletes.
+
+ See hash-list-test.cc for an example of how to use this object.
+*/
+
+
+namespace kaldi {
+
+template<class I, class T> class HashList {
+
+ public:
+ struct Elem {
+ I key;
+ T val;
+ Elem *tail;
+ };
+
+ /// Constructor takes no arguments. Call SetSize to inform it of the likely size.
+ HashList();
+
+ /// Clears the hash and gives the head of the current list to the user;
+ /// ownership is transferred to the user (the user must call Delete()
+ /// for each element in the list, at his/her leisure).
+ Elem *Clear();
+
+ /// Gives the head of the current list to the user. Ownership retained in the
+ /// class. Caution: in December 2013 the return type was changed to const Elem*
+ /// and this function was made const. You may need to change some types of
+ /// local Elem* variables to const if this produces compilation errors.
+ const Elem *GetList() const;
+
+ /// Think of this like delete(). It is to be called for each Elem in turn
+ /// after you "obtained ownership" by doing Clear(). This is not the opposite of
+ /// Insert, it is the opposite of New. It's really a memory operation.
+ inline void Delete(Elem *e);
+
+ /// This should probably not be needed to be called directly by the user. Think of it as opposite
+ /// to Delete();
+ inline Elem *New();
+
+ /// Find tries to find this element in the current list using the hashtable.
+ /// It returns NULL if not present. The Elem it returns is not owned by the user,
+ /// it is part of the internal list owned by this object, but the user is
+ /// free to modify the "val" element.
+ inline Elem *Find(I key);
+
+ /// Insert inserts a new element into the hashtable/stored list. By calling this,
+ /// the user asserts that it is not already present (e.g. Find was called and
+ /// returned NULL). With current code, calling this if an element already exists will
+ /// result in duplicate elements in the structure, and Find() will find the
+ /// first one that was added. [but we don't guarantee this behavior].
+ inline void Insert(I key, T val);
+
+ /// Insert inserts another element with same key into the hashtable/stored list.
+ /// By calling this, the user asserts that one element with that key is already present.
+ /// We insert it that way, that all elements with the same key follow each other.
+ /// Find() will return the first one of the elements with the same key.
+ inline void InsertMore(I key, T val);
+
+ /// SetSize tells the object how many hash buckets to allocate (should typically be
+ /// at least twice the number of objects we expect to go in the structure, for fastest
+ /// performance). It must be called while the hash is empty (e.g. after Clear() or
+ /// after initializing the object, but before adding anything to the hash.
+ void SetSize(size_t sz);
+
+ /// Returns current number of hash buckets.
+ inline size_t Size() { return hash_size_; }
+
+ ~HashList();
+ private:
+
+ struct HashBucket {
+ size_t prev_bucket; // index to next bucket (-1 if list tail). Note: list of buckets
+ // goes in opposite direction to list of Elems.
+ Elem *last_elem; // pointer to last element in this bucket (NULL if empty)
+ inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {}
+ };
+
+ Elem *list_head_; // head of currently stored list.
+ size_t bucket_list_tail_; // tail of list of active hash buckets.
+
+ size_t hash_size_; // number of hash buckets.
+
+ std::vector<HashBucket> buckets_;
+
+ Elem *freed_head_; // head of list of currently freed elements. [ready for allocation]
+
+ std::vector<Elem*> allocated_; // list of allocated blocks.
+
+ static const size_t allocate_block_size_ = 1024; // Number of Elements to allocate in one block. Must be
+ // largish so storing allocated_ doesn't become a problem.
+};
+
+
+} // end namespace kaldi
+
+#include "hash-list-inl.h"
+
+#endif