summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi/tree/event-map.h
blob: 07fcc2b33ab5f7dbe19a8d6b98a51a80e708785a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
// tree/event-map.h

// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_TREE_EVENT_MAP_H_
#define KALDI_TREE_EVENT_MAP_H_

#include <vector>
#include <map>
#include <algorithm>
#include "base/kaldi-common.h"
#include "util/stl-utils.h"
#include "util/const-integer-set.h"

namespace kaldi {

/// \defgroup event_map_group Event maps
/// \ingroup tree_group
/// See \ref tree_internals for overview, and specifically \ref treei_event_map.


// Note RE negative values: some of this code will not work if things of type
// EventValueType are negative.  In particular, TableEventMap can't be used if
// things of EventValueType are negative, and additionally TableEventMap won't
// be efficient if things of EventValueType take on extremely large values.  The
// EventKeyType can be negative though.

/// Things of type EventKeyType can take any value.  The code does not assume they are contiguous.
/// So values like -1, 1000000 and the like are acceptable.
typedef int32 EventKeyType;

/// Given current code, things of type EventValueType should generally be nonnegative and in a
/// reasonably small range (e.g. not one million), as we sometimes construct vectors of the size:
/// [largest value we saw for this key].  This deficiency may be fixed in future [would require
/// modifying TableEventMap]
typedef int32 EventValueType;

/// As far as the event-map code itself is concerned, things of type EventAnswerType may take
/// any value except kNoAnswer (== -1).  However, some specific uses of EventMap (e.g. in
/// build-tree-utils.h) assume these quantities are nonnegative.
typedef int32 EventAnswerType;

typedef std::vector<std::pair<EventKeyType, EventValueType> > EventType;
// It is required to be sorted and have unique keys-- i.e. functions assume this when called
// with this type.

inline std::pair<EventKeyType, EventValueType> MakeEventPair (EventKeyType k, EventValueType v) {  
  return std::pair<EventKeyType, EventValueType>(k, v);
}

void WriteEventType(std::ostream &os, bool binary, const EventType &vec);
void ReadEventType(std::istream &is, bool binary, EventType *vec);

std::string EventTypeToString(const EventType &evec);  // so we can print events out in error messages.

struct EventMapVectorHash {  // Hashing object for EventMapVector.  Works for both pointers and references.
  // Not used in event-map.{h, cc}
  size_t operator () (const EventType &vec);
  size_t operator () (const EventType *ptr) { return (*this)(*ptr); }
};
struct EventMapVectorEqual {  // Equality object for EventType pointers-- test equality of underlying vector.
  // Not used in event-map.{h, cc}
  size_t operator () (const EventType *p1, const EventType *p2) { return (*p1 == *p2); }
};


/// A class that is capable of representing a generic mapping from
/// EventType (which is a vector of (key, value) pairs) to
/// EventAnswerType which is just an integer.  See \ref tree_internals
/// for overview.
class EventMap {
 public:
  static void Check(const EventType &event);  // will crash if not sorted and unique on key.
  static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans);

  // Maps events to the answer type. input must be sorted.
  virtual bool Map(const EventType &event, EventAnswerType *ans) const = 0;

  // MultiMap maps a partially specified set of events to the set of answers it might
  // map to.  It appends these to "ans".  "ans" is
  // **not guaranteed unique at output** if the
  // tree contains duplicate answers at leaves -- you should sort & uniq afterwards.
  // e.g.: SortAndUniq(ans).
  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const = 0;

  // GetChildren() returns the EventMaps that are immediate children of this
  // EventMap (if they exist), by putting them in *out.  Useful for
  // determining the structure of the event map.
  virtual void GetChildren(std::vector<EventMap*> *out) const = 0;

  // This Copy() does a deep copy of the event map.
  // If new_leaves is nonempty when it reaches a leaf with value l s.t. new_leaves[l] != NULL,
  // it replaces it with a copy of that EventMap.  This makes it possible to extend and modify
  // It's the way we do splits of trees, and clustering of trees.  Think about this carefully, because
  // the EventMap structure does not support modification of an existing tree.  Do not be tempted
  // to do this differently, because other kinds of mechanisms would get very messy and unextensible.
  // Copy() is the only mechanism to modify a tree.  It's similar to a kind of function composition.
  // Copy() does not take ownership of the pointers in new_leaves (it uses the Copy() function of those
  // EventMaps).
  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const = 0;
  
  EventMap *Copy() const { std::vector<EventMap*> new_leaves; return Copy(new_leaves); }

  // The function MapValues() is intended to be used to map phone-sets between
  // different integer representations.  For all the keys in the set
  // "keys_to_map", it will map the corresponding values using the map
  // "value_map".  Note: these values are the values in the key->value pairs of
  // the EventMap, which really correspond to phones in the usual case; they are
  // not the "answers" of the EventMap which correspond to clustered states.  In
  // case multiple values are mapped to the same value, it will try to deal with
  // it gracefully where it can, but will crash if, for example, this would
  // cause problems with the TableEventMap.  It will also crash if any values
  // used for keys in "keys_to_map" are not mapped by "value_map".  This
  // function is not currently used.
  virtual EventMap *MapValues(
      const unordered_set<EventKeyType> &keys_to_map,
      const unordered_map<EventValueType,EventValueType> &value_map) const = 0;

  // The function Prune() is like Copy(), except it removes parts of the tree
  // that return only -1 (it will return NULL if this EventMap returns only -1).
  // This is a mechanism to remove parts of the tree-- you would first use the
  // Copy() function with a vector of EventMap*, and for the parts you don't
  // want, you'd put a ConstantEventMap with -1; you'd then call
  // Prune() on the result.  This function is not currently used.
  virtual EventMap *Prune() const = 0;
  
  virtual EventAnswerType MaxResult() const {  // child classes may override this for efficiency; here is basic version.
    // returns -1 if nothing found.
    std::vector<EventAnswerType> tmp; EventType empty_event;
    MultiMap(empty_event, &tmp);
    if (tmp.empty()) {
      KALDI_WARN << "EventMap::MaxResult(), empty result";
      return std::numeric_limits<EventAnswerType>::min();
    }
    else { return * std::max_element(tmp.begin(), tmp.end()); }
  }

  /// Write to stream.
  virtual void Write(std::ostream &os, bool binary) = 0;

  virtual ~EventMap() {}

  /// a Write function that takes care of NULL pointers.
  static void Write(std::ostream &os, bool binary, EventMap *emap);
  /// a Read function that reads an arbitrary EventMap; also
  /// works for NULL pointers.
  static EventMap *Read(std::istream &is, bool binary);
};


class ConstantEventMap: public EventMap {
 public:
  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
    *ans = answer_;
    return true;
  }

  virtual void MultiMap(const EventType &,
                        std::vector<EventAnswerType> *ans) const {
     ans->push_back(answer_);
  }

  virtual void GetChildren(std::vector<EventMap*> *out) const { out->clear(); }

  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
    if (answer_ < 0 || answer_ >= (EventAnswerType)new_leaves.size() ||
        new_leaves[answer_] == NULL)
      return new ConstantEventMap(answer_);
    else return new_leaves[answer_]->Copy();
  }

  virtual EventMap *MapValues(
      const unordered_set<EventKeyType> &keys_to_map,
      const unordered_map<EventValueType,EventValueType> &value_map) const {
    return new ConstantEventMap(answer_);
  }

  virtual EventMap *Prune() const {
    return (answer_ == -1 ? NULL : new ConstantEventMap(answer_));
  }
  
  explicit ConstantEventMap(EventAnswerType answer): answer_(answer) { }
  
  virtual void Write(std::ostream &os, bool binary);
  static ConstantEventMap *Read(std::istream &is, bool binary);
 private:
  EventAnswerType answer_;
  KALDI_DISALLOW_COPY_AND_ASSIGN(ConstantEventMap);
};

class TableEventMap: public EventMap {
 public:

  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
    EventValueType tmp;   *ans = -1;  // means no answer
    if (Lookup(event, key_, &tmp) && tmp >= 0
       && tmp < (EventValueType)table_.size() && table_[tmp] != NULL) {
      return table_[tmp]->Map(event, ans);
    }
    return false;
  }

  virtual void GetChildren(std::vector<EventMap*> *out) const {
    out->clear();
    for (size_t i = 0; i<table_.size(); i++)
      if (table_[i] != NULL) out->push_back(table_[i]);
  }

  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const {
    EventValueType tmp;
    if (Lookup(event, key_, &tmp)) {
      if (tmp >= 0 && tmp < (EventValueType)table_.size() && table_[tmp] != NULL)
        return table_[tmp]->MultiMap(event, ans);
      // else no answers.
    } else {  // all answers are possible if no such key.
      for (size_t i = 0;i < table_.size();i++)
        if (table_[i] != NULL) table_[i]->MultiMap(event, ans);  // append.
    }
  }

  virtual EventMap *Prune() const;
  
  virtual EventMap *MapValues(
      const unordered_set<EventKeyType> &keys_to_map,
      const unordered_map<EventValueType,EventValueType> &value_map)