1 files changed, 140 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/util/hash-list.h b/kaldi_io/src/kaldi/util/hash-list.h
new file mode 100644
index 0000000..4524759
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/hash-list.h
@@ -0,0 +1,140 @@
+// util/hash-list.h
+
+// Copyright 2009-2011   Microsoft Corporation
+//                2013   Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_HASH_LIST_H_
+#define KALDI_UTIL_HASH_LIST_H_
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <limits>
+#include <cassert>
+#include "util/stl-utils.h"
+
+
+/* This header provides utilities for a structure that's used in a decoder (but
+   is quite generic in nature so we implement and test it separately).
+   Basically it's a singly-linked list, but implemented in such a way that we
+   can quickly search for elements in the list.  We give it a slightly richer
+   interface than just a hash and a list.  The idea is that we want to separate
+   the hash part and the list part: basically, in the decoder, we want to have a
+   single hash for the current frame and the next frame, because by the time we
+   need to access the hash for the next frame we no longer need the hash for the
+   previous frame.  So we have an operation that clears the hash but leaves the
+   list structure intact.  We also control memory management inside this object,
+   to avoid repeated new's/deletes.
+
+   See hash-list-test.cc for an example of how to use this object.
+*/
+
+
+namespace kaldi {
+
+template<class I, class T> class HashList {
+
+ public:
+  struct Elem {
+    I key;
+    T val;
+    Elem *tail;
+  };
+
+  /// Constructor takes no arguments.  Call SetSize to inform it of the likely size.
+  HashList();
+
+  /// Clears the hash and gives the head of the current list to the user;
+  /// ownership is transferred to the user (the user must call Delete()
+  /// for each element in the list, at his/her leisure).
+  Elem *Clear();
+
+  /// Gives the head of the current list to the user.  Ownership retained in the
+  /// class.  Caution: in December 2013 the return type was changed to const Elem*
+  /// and this function was made const.  You may need to change some types of
+  /// local Elem* variables to const if this produces compilation errors.
+  const Elem *GetList() const;
+
+  /// Think of this like delete().  It is to be called for each Elem in turn
+  /// after you "obtained ownership" by doing Clear().  This is not the opposite of
+  /// Insert, it is the opposite of New.  It's really a memory operation.
+  inline void Delete(Elem *e);
+
+  /// This should probably not be needed to be called directly by the user.  Think of it as opposite
+  /// to Delete();
+  inline Elem *New();
+
+  /// Find tries to find this element in the current list using the hashtable.
+  /// It returns NULL if not present.  The Elem it returns is not owned by the user,
+  /// it is part of the internal list owned by this object, but the user is
+  /// free to modify the "val" element.
+  inline Elem *Find(I key);
+  
+  /// Insert inserts a new element into the hashtable/stored list.  By calling this,
+  /// the user asserts that it is not already present (e.g. Find was called and
+  /// returned NULL).  With current code, calling this if an element already exists will
+  /// result in duplicate elements in the structure, and Find() will find the
+  /// first one that was added.  [but we don't guarantee this behavior].
+  inline void Insert(I key, T val);
+
+  /// Insert inserts another element with same key into the hashtable/stored list.
+  /// By calling this, the user asserts that one element with that key is already present.
+  /// We insert it that way, that all elements with the same key follow each other.
+  /// Find() will return the first one of the elements with the same key.
+  inline void InsertMore(I key, T val);
+
+  /// SetSize tells the object how many hash buckets to allocate (should typically be
+  /// at least twice the number of objects we expect to go in the structure, for fastest
+  /// performance).  It must be called while the hash is empty (e.g. after Clear() or
+  /// after initializing the object, but before adding anything to the hash.
+  void SetSize(size_t sz);
+
+  /// Returns current number of hash buckets.
+  inline size_t Size() { return hash_size_; }
+
+  ~HashList();
+ private:
+
+  struct HashBucket {
+    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note: list of buckets
+    // goes in opposite direction to list of Elems.
+    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
+    inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {}
+  };
+
+  Elem *list_head_;  // head of currently stored list.
+  size_t bucket_list_tail_;  // tail of list of active hash buckets.
+
+  size_t hash_size_;  // number of hash buckets.
+
+  std::vector<HashBucket> buckets_;
+
+  Elem *freed_head_;  // head of list of currently freed elements. [ready for allocation]
+
+  std::vector<Elem*> allocated_;  // list of allocated blocks.
+
+  static const size_t allocate_block_size_ = 1024;  // Number of Elements to allocate in one block.  Must be
+  // largish so storing allocated_ doesn't become a problem.
+};
+
+
+} // end namespace kaldi
+
+#include "hash-list-inl.h"
+
+#endif