From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/kaldi/util/hash-list.h | 140 ++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 kaldi_io/src/kaldi/util/hash-list.h (limited to 'kaldi_io/src/kaldi/util/hash-list.h') diff --git a/kaldi_io/src/kaldi/util/hash-list.h b/kaldi_io/src/kaldi/util/hash-list.h new file mode 100644 index 0000000..4524759 --- /dev/null +++ b/kaldi_io/src/kaldi/util/hash-list.h @@ -0,0 +1,140 @@ +// util/hash-list.h + +// Copyright 2009-2011 Microsoft Corporation +// 2013 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#ifndef KALDI_UTIL_HASH_LIST_H_ +#define KALDI_UTIL_HASH_LIST_H_ +#include +#include +#include +#include +#include +#include "util/stl-utils.h" + + +/* This header provides utilities for a structure that's used in a decoder (but + is quite generic in nature so we implement and test it separately). + Basically it's a singly-linked list, but implemented in such a way that we + can quickly search for elements in the list. We give it a slightly richer + interface than just a hash and a list. The idea is that we want to separate + the hash part and the list part: basically, in the decoder, we want to have a + single hash for the current frame and the next frame, because by the time we + need to access the hash for the next frame we no longer need the hash for the + previous frame. So we have an operation that clears the hash but leaves the + list structure intact. We also control memory management inside this object, + to avoid repeated new's/deletes. + + See hash-list-test.cc for an example of how to use this object. +*/ + + +namespace kaldi { + +template class HashList { + + public: + struct Elem { + I key; + T val; + Elem *tail; + }; + + /// Constructor takes no arguments. Call SetSize to inform it of the likely size. + HashList(); + + /// Clears the hash and gives the head of the current list to the user; + /// ownership is transferred to the user (the user must call Delete() + /// for each element in the list, at his/her leisure). + Elem *Clear(); + + /// Gives the head of the current list to the user. Ownership retained in the + /// class. Caution: in December 2013 the return type was changed to const Elem* + /// and this function was made const. You may need to change some types of + /// local Elem* variables to const if this produces compilation errors. + const Elem *GetList() const; + + /// Think of this like delete(). It is to be called for each Elem in turn + /// after you "obtained ownership" by doing Clear(). This is not the opposite of + /// Insert, it is the opposite of New. It's really a memory operation. + inline void Delete(Elem *e); + + /// This should probably not be needed to be called directly by the user. Think of it as opposite + /// to Delete(); + inline Elem *New(); + + /// Find tries to find this element in the current list using the hashtable. + /// It returns NULL if not present. The Elem it returns is not owned by the user, + /// it is part of the internal list owned by this object, but the user is + /// free to modify the "val" element. + inline Elem *Find(I key); + + /// Insert inserts a new element into the hashtable/stored list. By calling this, + /// the user asserts that it is not already present (e.g. Find was called and + /// returned NULL). With current code, calling this if an element already exists will + /// result in duplicate elements in the structure, and Find() will find the + /// first one that was added. [but we don't guarantee this behavior]. + inline void Insert(I key, T val); + + /// Insert inserts another element with same key into the hashtable/stored list. + /// By calling this, the user asserts that one element with that key is already present. + /// We insert it that way, that all elements with the same key follow each other. + /// Find() will return the first one of the elements with the same key. + inline void InsertMore(I key, T val); + + /// SetSize tells the object how many hash buckets to allocate (should typically be + /// at least twice the number of objects we expect to go in the structure, for fastest + /// performance). It must be called while the hash is empty (e.g. after Clear() or + /// after initializing the object, but before adding anything to the hash. + void SetSize(size_t sz); + + /// Returns current number of hash buckets. + inline size_t Size() { return hash_size_; } + + ~HashList(); + private: + + struct HashBucket { + size_t prev_bucket; // index to next bucket (-1 if list tail). Note: list of buckets + // goes in opposite direction to list of Elems. + Elem *last_elem; // pointer to last element in this bucket (NULL if empty) + inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {} + }; + + Elem *list_head_; // head of currently stored list. + size_t bucket_list_tail_; // tail of list of active hash buckets. + + size_t hash_size_; // number of hash buckets. + + std::vector buckets_; + + Elem *freed_head_; // head of list of currently freed elements. [ready for allocation] + + std::vector allocated_; // list of allocated blocks. + + static const size_t allocate_block_size_ = 1024; // Number of Elements to allocate in one block. Must be + // largish so storing allocated_ doesn't become a problem. +}; + + +} // end namespace kaldi + +#include "hash-list-inl.h" + +#endif -- cgit v1.2.3-70-g09d2