summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/partition.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/partition.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/partition.h305
1 files changed, 305 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/partition.h b/kaldi_io/src/tools/openfst/include/fst/partition.h
new file mode 100644
index 0000000..40b849a
--- /dev/null
+++ b/kaldi_io/src/tools/openfst/include/fst/partition.h
@@ -0,0 +1,305 @@
+// partition.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file Functions and classes to create a partition of states
+//
+
+#ifndef FST_LIB_PARTITION_H__
+#define FST_LIB_PARTITION_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+
+#include <fst/queue.h>
+
+
+
+namespace fst {
+
+template <typename T> class PartitionIterator;
+
+// \class Partition
+// \brief Defines a partitioning of states. Typically used to represent
+// equivalence classes for Fst operations like minimization.
+//
+template <typename T>
+class Partition {
+ friend class PartitionIterator<T>;
+
+ struct Element {
+ Element() : value(0), next(0), prev(0) {}
+ Element(T v) : value(v), next(0), prev(0) {}
+
+ T value;
+ Element* next;
+ Element* prev;
+ };
+
+ public:
+ Partition(bool allow_repeated_split):
+ allow_repeated_split_(allow_repeated_split) {}
+
+ Partition(bool allow_repeated_split, T num_states):
+ allow_repeated_split_(allow_repeated_split) {
+ Initialize(num_states);
+ }
+
+ ~Partition() {
+ for (size_t i = 0; i < elements_.size(); ++i)
+ delete elements_[i];
+ }
+
+ // Create an empty partition for num_states. At initialization time
+ // all elements are not assigned to a class (i.e class_index = -1).
+ // Initialize just creates num_states of elements. All element
+ // operations are then done by simply disconnecting the element from
+ // it current class and placing it at the head of the next class.
+ void Initialize(size_t num_states) {
+ for (size_t i = 0; i < elements_.size(); ++i)
+ delete elements_[i];
+ elements_.clear();
+ classes_.clear();
+ class_index_.clear();
+
+ elements_.resize(num_states);
+ class_index_.resize(num_states, -1);
+ class_size_.reserve(num_states);
+ for (size_t i = 0; i < num_states; ++i)
+ elements_[i] = new Element(i);
+ num_states_ = num_states;
+ }
+
+ // Add a class, resize classes_ and class_size_ resource by 1.
+ size_t AddClass() {
+ size_t num_classes = classes_.size();
+ classes_.resize(num_classes + 1, 0);
+ class_size_.resize(num_classes + 1, 0);
+ class_split_.resize(num_classes + 1, 0);
+ split_size_.resize(num_classes + 1, 0);
+ return num_classes;
+ }
+
+ void AllocateClasses(T num_classes) {
+ size_t n = classes_.size() + num_classes;
+ classes_.resize(n, 0);
+ class_size_.resize(n, 0);
+ class_split_.resize(n, 0);
+ split_size_.resize(n, 0);
+ }
+
+ // Add element_id to class_id. The Add method is used to initialize
+ // partition. Once elements have been added to a class, you need to
+ // use the Move() method move an element from once class to another.
+ void Add(T element_id, T class_id) {
+ Element* element = elements_[element_id];
+
+ if (classes_[class_id])
+ classes_[class_id]->prev = element;
+ element->next = classes_[class_id];
+ element->prev = 0;
+ classes_[class_id] = element;
+
+ class_index_[element_id] = class_id;
+ class_size_[class_id]++;
+ }
+
+ // Move and element_id to class_id. Disconnects (removes) element
+ // from it current class and
+ void Move(T element_id, T class_id) {
+ T old_class_id = class_index_[element_id];
+
+ Element* element = elements_[element_id];
+ if (element->next) element->next->prev = element->prev;
+ if (element->prev) element->prev->next = element->next;
+ else classes_[old_class_id] = element->next;
+
+ Add(element_id, class_id);
+ class_size_[old_class_id]--;
+ }
+
+ // split class on the element_id
+ void SplitOn(T element_id) {
+ T class_id = class_index_[element_id];
+ if (class_size_[class_id] == 1) return;
+
+ // first time class is split
+ if (split_size_[class_id] == 0) {
+ visited_classes_.push_back(class_id);
+ class_split_[class_id] = classes_[class_id];
+ }
+ // increment size of split (set of element at head of chain)
+ split_size_[class_id]++;
+
+ // update split point
+ if (class_split_[class_id] != 0
+ && class_split_[class_id] == elements_[element_id])
+ class_split_[class_id] = elements_[element_id]->next;
+
+ // move to head of chain in same class
+ Move(element_id, class_id);
+ }
+
+ // Finalize class_id, split if required, and update class_splits,
+ // class indices of the newly created class. Returns the new_class id
+ // or -1 if no new class was created.
+ T SplitRefine(T class_id) {
+
+ Element* split_el = class_split_[class_id];
+ // only split if necessary
+ //if (class_size_[class_id] == split_size_[class_id]) {
+ if(split_el == NULL) { // we split on everything...
+ split_size_[class_id] = 0;
+ return -1;
+ } else {
+ T new_class = AddClass();
+
+ if(allow_repeated_split_) { // split_size_ is possibly
+ // inaccurate, so work it out exactly.
+ size_t split_count; Element *e;
+ for(split_count=0,e=classes_[class_id];
+ e != split_el; split_count++, e=e->next);
+ split_size_[class_id] = split_count;
+ }
+ size_t remainder = class_size_[class_id] - split_size_[class_id];
+ if (remainder < split_size_[class_id]) { // add smaller
+ classes_[new_class] = split_el;
+ split_el->prev->next = 0;
+ split_el->prev = 0;
+ class_size_[class_id] = split_size_[class_id];
+ class_size_[new_class] = remainder;
+ } else {
+ classes_[new_class] = classes_[class_id];
+ class_size_[class_id] = remainder;
+ class_size_[new_class] = split_size_[class_id];
+ split_el->prev->next = 0;
+ split_el->prev = 0;
+ classes_[class_id] = split_el;
+ }
+
+ // update class index for element in new class
+ for (Element* el = classes_[new_class]; el; el = el->next)
+ class_index_[el->value] = new_class;
+
+ class_split_[class_id] = 0;
+ split_size_[class_id] = 0;
+
+ return new_class;
+ }
+ }
+
+ // Once all states have been processed for a particular class C, we
+ // can finalize the split. FinalizeSplit() will update each block in the
+ // partition, create new once and update the queue of active classes
+ // that require further refinement.
+ template <class Queue>
+ void FinalizeSplit(Queue* L) {
+ for (size_t i = 0; i < visited_classes_.size(); ++i) {
+ T new_class = SplitRefine(visited_classes_[i]);
+ if (new_class != -1 && L)
+ L->Enqueue(new_class);
+ }
+ visited_classes_.clear();
+ }
+
+
+ const T class_id(T element_id) const {
+ return class_index_[element_id];
+ }
+
+ const vector<T>& class_sizes() const {
+ return class_size_;
+ }
+
+ const size_t class_size(T class_id) const {
+ return class_size_[class_id];
+ }
+
+ const T num_classes() const {
+ return classes_.size();
+ }
+
+
+ private:
+ int num_states_;
+
+ // container of all elements (owner of ptrs)
+ vector<Element*> elements_;
+
+ // linked list of elements belonging to class
+ vector<Element*> classes_;
+
+ // pointer to split point for each class
+ vector<Element*> class_split_;
+
+ // class index of element
+ vector<T> class_index_;
+
+ // class sizes
+ vector<T> class_size_;
+
+ // size of split for each class
+ // in the nondeterministic case, split_size_ is actually an upper
+ // bound on the size of split for each class.
+ vector<T> split_size_;
+
+ // set of visited classes to be used in split refine
+ vector<T> visited_classes_;
+
+ // true if input fst was deterministic: we can make
+ // certain assumptions in this case that speed up the algorithm.
+ bool allow_repeated_split_;
+};
+
+
+// iterate over members of a class in a partition
+template <typename T>
+class PartitionIterator {
+ typedef typename Partition<T>::Element Element;
+ public:
+ PartitionIterator(const Partition<T>& partition, T class_id)
+ : p_(partition),
+ element_(p_.classes_[class_id]),
+ class_id_(class_id) {}
+
+ bool Done() {
+ return (element_ == 0);
+ }
+
+ const T Value() {
+ return (element_->value);
+ }
+
+ void Next() {
+ element_ = element_->next;
+ }
+
+ void Reset() {
+ element_ = p_.classes_[class_id_];
+ }
+
+ private:
+ const Partition<T>& p_;
+
+ const Element* element_;
+
+ T class_id_;
+};
+} // namespace fst
+
+#endif // FST_LIB_PARTITION_H__