From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/tools/openfst/include/fst/partition.h | 305 +++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/partition.h (limited to 'kaldi_io/src/tools/openfst/include/fst/partition.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/partition.h b/kaldi_io/src/tools/openfst/include/fst/partition.h new file mode 100644 index 0000000..40b849a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/partition.h @@ -0,0 +1,305 @@ +// partition.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: johans@google.com (Johan Schalkwyk) +// +// \file Functions and classes to create a partition of states +// + +#ifndef FST_LIB_PARTITION_H__ +#define FST_LIB_PARTITION_H__ + +#include +using std::vector; +#include + + +#include + + + +namespace fst { + +template class PartitionIterator; + +// \class Partition +// \brief Defines a partitioning of states. Typically used to represent +// equivalence classes for Fst operations like minimization. +// +template +class Partition { + friend class PartitionIterator; + + struct Element { + Element() : value(0), next(0), prev(0) {} + Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; + Element* prev; + }; + + public: + Partition(bool allow_repeated_split): + allow_repeated_split_(allow_repeated_split) {} + + Partition(bool allow_repeated_split, T num_states): + allow_repeated_split_(allow_repeated_split) { + Initialize(num_states); + } + + ~Partition() { + for (size_t i = 0; i < elements_.size(); ++i) + delete elements_[i]; + } + + // Create an empty partition for num_states. At initialization time + // all elements are not assigned to a class (i.e class_index = -1). + // Initialize just creates num_states of elements. All element + // operations are then done by simply disconnecting the element from + // it current class and placing it at the head of the next class. + void Initialize(size_t num_states) { + for (size_t i = 0; i < elements_.size(); ++i) + delete elements_[i]; + elements_.clear(); + classes_.clear(); + class_index_.clear(); + + elements_.resize(num_states); + class_index_.resize(num_states, -1); + class_size_.reserve(num_states); + for (size_t i = 0; i < num_states; ++i) + elements_[i] = new Element(i); + num_states_ = num_states; + } + + // Add a class, resize classes_ and class_size_ resource by 1. + size_t AddClass() { + size_t num_classes = classes_.size(); + classes_.resize(num_classes + 1, 0); + class_size_.resize(num_classes + 1, 0); + class_split_.resize(num_classes + 1, 0); + split_size_.resize(num_classes + 1, 0); + return num_classes; + } + + void AllocateClasses(T num_classes) { + size_t n = classes_.size() + num_classes; + classes_.resize(n, 0); + class_size_.resize(n, 0); + class_split_.resize(n, 0); + split_size_.resize(n, 0); + } + + // Add element_id to class_id. The Add method is used to initialize + // partition. Once elements have been added to a class, you need to + // use the Move() method move an element from once class to another. + void Add(T element_id, T class_id) { + Element* element = elements_[element_id]; + + if (classes_[class_id]) + classes_[class_id]->prev = element; + element->next = classes_[class_id]; + element->prev = 0; + classes_[class_id] = element; + + class_index_[element_id] = class_id; + class_size_[class_id]++; + } + + // Move and element_id to class_id. Disconnects (removes) element + // from it current class and + void Move(T element_id, T class_id) { + T old_class_id = class_index_[element_id]; + + Element* element = elements_[element_id]; + if (element->next) element->next->prev = element->prev; + if (element->prev) element->prev->next = element->next; + else classes_[old_class_id] = element->next; + + Add(element_id, class_id); + class_size_[old_class_id]--; + } + + // split class on the element_id + void SplitOn(T element_id) { + T class_id = class_index_[element_id]; + if (class_size_[class_id] == 1) return; + + // first time class is split + if (split_size_[class_id] == 0) { + visited_classes_.push_back(class_id); + class_split_[class_id] = classes_[class_id]; + } + // increment size of split (set of element at head of chain) + split_size_[class_id]++; + + // update split point + if (class_split_[class_id] != 0 + && class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class + Move(element_id, class_id); + } + + // Finalize class_id, split if required, and update class_splits, + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { + + Element* split_el = class_split_[class_id]; + // only split if necessary + //if (class_size_[class_id] == split_size_[class_id]) { + if(split_el == NULL) { // we split on everything... + split_size_[class_id] = 0; + return -1; + } else { + T new_class = AddClass(); + + if(allow_repeated_split_) { // split_size_ is possibly + // inaccurate, so work it out exactly. + size_t split_count; Element *e; + for(split_count=0,e=classes_[class_id]; + e != split_el; split_count++, e=e->next); + split_size_[class_id] = split_count; + } + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller + classes_[new_class] = split_el; + split_el->prev->next = 0; + split_el->prev = 0; + class_size_[class_id] = split_size_[class_id]; + class_size_[new_class] = remainder; + } else { + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; + split_el->prev->next = 0; + split_el->prev = 0; + classes_[class_id] = split_el; + } + + // update class index for element in new class + for (Element* el = classes_[new_class]; el; el = el->next) + class_index_[el->value] = new_class; + + class_split_[class_id] = 0; + split_size_[class_id] = 0; + + return new_class; + } + } + + // Once all states have been processed for a particular class C, we + // can finalize the split. FinalizeSplit() will update each block in the + // partition, create new once and update the queue of active classes + // that require further refinement. + template + void FinalizeSplit(Queue* L) { + for (size_t i = 0; i < visited_classes_.size(); ++i) { + T new_class = SplitRefine(visited_classes_[i]); + if (new_class != -1 && L) + L->Enqueue(new_class); + } + visited_classes_.clear(); + } + + + const T class_id(T element_id) const { + return class_index_[element_id]; + } + + const vector& class_sizes() const { + return class_size_; + } + + const size_t class_size(T class_id) const { + return class_size_[class_id]; + } + + const T num_classes() const { + return classes_.size(); + } + + + private: + int num_states_; + + // container of all elements (owner of ptrs) + vector elements_; + + // linked list of elements belonging to class + vector classes_; + + // pointer to split point for each class + vector class_split_; + + // class index of element + vector class_index_; + + // class sizes + vector class_size_; + + // size of split for each class + // in the nondeterministic case, split_size_ is actually an upper + // bound on the size of split for each class. + vector split_size_; + + // set of visited classes to be used in split refine + vector visited_classes_; + + // true if input fst was deterministic: we can make + // certain assumptions in this case that speed up the algorithm. + bool allow_repeated_split_; +}; + + +// iterate over members of a class in a partition +template +class PartitionIterator { + typedef typename Partition::Element Element; + public: + PartitionIterator(const Partition& partition, T class_id) + : p_(partition), + element_(p_.classes_[class_id]), + class_id_(class_id) {} + + bool Done() { + return (element_ == 0); + } + + const T Value() { + return (element_->value); + } + + void Next() { + element_ = element_->next; + } + + void Reset() { + element_ = p_.classes_[class_id_]; + } + + private: + const Partition& p_; + + const Element* element_; + + T class_id_; +}; +} // namespace fst + +#endif // FST_LIB_PARTITION_H__ -- cgit v1.2.3