From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/tools/openfst/include/fst/const-fst.h | 497 +++++++++++++++++++++ 1 file changed, 497 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/const-fst.h (limited to 'kaldi_io/src/tools/openfst/include/fst/const-fst.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/const-fst.h b/kaldi_io/src/tools/openfst/include/fst/const-fst.h new file mode 100644 index 0000000..e6e85af --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/const-fst.h @@ -0,0 +1,497 @@ +// const-fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// Simple concrete immutable FST whose states and arcs are each stored +// in single arrays. + +#ifndef FST_LIB_CONST_FST_H__ +#define FST_LIB_CONST_FST_H__ + +#include +#include +using std::vector; + +#include +#include // For optional argument declarations +#include +#include +#include + + +namespace fst { + +template class ConstFst; +template void Cast(const F &, G *); + +// States and arcs each implemented by single arrays, templated on the +// Arc definition. The unsigned type U is used to represent indices into +// the arc array. +template +class ConstFstImpl : public FstImpl { + public: + using FstImpl::SetInputSymbols; + using FstImpl::SetOutputSymbols; + using FstImpl::SetType; + using FstImpl::SetProperties; + using FstImpl::Properties; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef U Unsigned; + + ConstFstImpl() + : states_region_(0), arcs_region_(0), states_(0), arcs_(0), nstates_(0), + narcs_(0), start_(kNoStateId) { + string type = "const"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + SetType(type); + SetProperties(kNullProperties | kStaticProperties); + } + + explicit ConstFstImpl(const Fst &fst); + + ~ConstFstImpl() { + delete arcs_region_; + delete states_region_; + } + + StateId Start() const { return start_; } + + Weight Final(StateId s) const { return states_[s].final; } + + StateId NumStates() const { return nstates_; } + + size_t NumArcs(StateId s) const { return states_[s].narcs; } + + size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; } + + size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; } + + static ConstFstImpl *Read(istream &strm, const FstReadOptions &opts); + + A *Arcs(StateId s) { return arcs_ + states_[s].pos; } + + // Provide information needed for generic state iterator + void InitStateIterator(StateIteratorData *data) const { + data->base = 0; + data->nstates = nstates_; + } + + // Provide information needed for the generic arc iterator + void InitArcIterator(StateId s, ArcIteratorData *data) const { + data->base = 0; + data->arcs = arcs_ + states_[s].pos; + data->narcs = states_[s].narcs; + data->ref_count = 0; + } + + private: + friend class ConstFst; // Allow finding narcs_, nstates_ during Write + + // States implemented by array *states_ below, arcs by (single) *arcs_. + struct State { + Weight final; // Final weight + Unsigned pos; // Start of state's arcs in *arcs_ + Unsigned narcs; // Number of arcs (per state) + Unsigned niepsilons; // # of input epsilons + Unsigned noepsilons; // # of output epsilons + State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} + }; + + // Properties always true of this Fst class + static const uint64 kStaticProperties = kExpanded; + // Current unaligned file format version. The unaligned version was added and + // made the default since the aligned version does not work on pipes. + static const int kFileVersion = 2; + // Current aligned file format version + static const int kAlignedFileVersion = 1; + // Minimum file format version supported + static const int kMinFileVersion = 1; + + MappedFile *states_region_; // Mapped file for states + MappedFile *arcs_region_; // Mapped file for arcs + State *states_; // States represenation + A *arcs_; // Arcs representation + StateId nstates_; // Number of states + size_t narcs_; // Number of arcs (per FST) + StateId start_; // Initial state + + DISALLOW_COPY_AND_ASSIGN(ConstFstImpl); +}; + +template +const uint64 ConstFstImpl::kStaticProperties; +template +const int ConstFstImpl::kFileVersion; +template +const int ConstFstImpl::kAlignedFileVersion; +template +const int ConstFstImpl::kMinFileVersion; + + +template +ConstFstImpl::ConstFstImpl(const Fst &fst) : nstates_(0), narcs_(0) { + string type = "const"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(sizeof(U) * 8, &size); + type += size; + } + SetType(type); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + start_ = fst.Start(); + + // Count # of states and arcs. + for (StateIterator< Fst > siter(fst); + !siter.Done(); + siter.Next()) { + ++nstates_; + StateId s = siter.Value(); + for (ArcIterator< Fst > aiter(fst, s); + !aiter.Done(); + aiter.Next()) + ++narcs_; + } + states_region_ = MappedFile::Allocate(nstates_ * sizeof(*states_)); + arcs_region_ = MappedFile::Allocate(narcs_ * sizeof(*arcs_)); + states_ = reinterpret_cast(states_region_->mutable_data()); + arcs_ = reinterpret_cast(arcs_region_->mutable_data()); + size_t pos = 0; + for (StateId s = 0; s < nstates_; ++s) { + states_[s].final = fst.Final(s); + states_[s].pos = pos; + states_[s].narcs = 0; + states_[s].niepsilons = 0; + states_[s].noepsilons = 0; + for (ArcIterator< Fst > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const A &arc = aiter.Value(); + ++states_[s].narcs; + if (arc.ilabel == 0) + ++states_[s].niepsilons; + if (arc.olabel == 0) + ++states_[s].noepsilons; + arcs_[pos++] = arc; + } + } + SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties); +} + + +template +ConstFstImpl *ConstFstImpl::Read(istream &strm, + const FstReadOptions &opts) { + ConstFstImpl *impl = new ConstFstImpl; + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { + delete impl; + return 0; + } + impl->start_ = hdr.Start(); + impl->nstates_ = hdr.NumStates(); + impl->narcs_ = hdr.NumArcs(); + + // Ensures compatibility + if (hdr.Version() == kAlignedFileVersion) + hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); + + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; + delete impl; + return 0; + } + + size_t b = impl->nstates_ * sizeof(typename ConstFstImpl::State); + impl->states_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || impl->states_region_ == NULL) { + LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; + delete impl; + return 0; + } + impl->states_ = reinterpret_cast( + impl->states_region_->mutable_data()); + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; + delete impl; + return 0; + } + + b = impl->narcs_ * sizeof(A); + impl->arcs_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || impl->arcs_region_ == NULL) { + LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; + delete impl; + return 0; + } + impl->arcs_ = reinterpret_cast(impl->arcs_region_->mutable_data()); + return impl; +} + +// Simple concrete immutable FST. This class attaches interface to +// implementation and handles reference counting, delegating most +// methods to ImplToExpandedFst. The unsigned type U is used to +// represent indices into the arc array (uint32 by default, declared +// in fst-decl.h). +template +class ConstFst : public ImplToExpandedFst< ConstFstImpl > { + public: + friend class StateIterator< ConstFst >; + friend class ArcIterator< ConstFst >; + template void friend Cast(const F &, G *); + + typedef A Arc; + typedef typename A::StateId StateId; + typedef ConstFstImpl Impl; + typedef U Unsigned; + + ConstFst() : ImplToExpandedFst(new Impl()) {} + + explicit ConstFst(const Fst &fst) + : ImplToExpandedFst(new Impl(fst)) {} + + ConstFst(const ConstFst &fst) : ImplToExpandedFst(fst) {} + + // Get a copy of this ConstFst. See Fst<>::Copy() for further doc. + virtual ConstFst *Copy(bool safe = false) const { + return new ConstFst(*this); + } + + // Read a ConstFst from an input stream; return NULL on error + static ConstFst *Read(istream &strm, const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new ConstFst(impl) : 0; + } + + // Read a ConstFst from a file; return NULL on error + // Empty filename reads from standard input + static ConstFst *Read(const string &filename) { + Impl* impl = ImplToExpandedFst::Read(filename); + return impl ? new ConstFst(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return WriteFst(*this, strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst::WriteFile(filename); + } + + template + static bool WriteFst(const F &fst, ostream &strm, + const FstWriteOptions &opts); + + virtual void InitStateIterator(StateIteratorData *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + explicit ConstFst(Impl *impl) : ImplToExpandedFst(impl) {} + + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst >::GetImpl(); } + + void SetImpl(Impl *impl, bool own_impl = true) { + ImplToFst< Impl, ExpandedFst >::SetImpl(impl, own_impl); + } + + // Use overloading to extract the type of the argument. + static Impl* GetImplIfConstFst(const ConstFst &const_fst) { + return const_fst.GetImpl(); + } + + // Note that this does not give privileged treatment to subtypes of ConstFst. + template + static Impl* GetImplIfConstFst(const NonConstFst& fst) { + return NULL; + } + + void operator=(const ConstFst &fst); // disallow +}; + +// Writes Fst in Const format, potentially with a pass over the machine +// before writing to compute number of states and arcs. +// +template +template +bool ConstFst::WriteFst(const F &fst, ostream &strm, + const FstWriteOptions &opts) { + int file_version = opts.align ? ConstFstImpl::kAlignedFileVersion : + ConstFstImpl::kFileVersion; + size_t num_arcs = -1, num_states = -1; + size_t start_offset = 0; + bool update_header = true; + if (Impl* impl = GetImplIfConstFst(fst)) { + num_arcs = impl->narcs_; + num_states = impl->nstates_; + update_header = false; + } else if ((start_offset = strm.tellp()) == -1) { + // precompute values needed for header when we cannot seek to rewrite it. + num_arcs = 0; + num_states = 0; + for (StateIterator siter(fst); !siter.Done(); siter.Next()) { + num_arcs += fst.NumArcs(siter.Value()); + ++num_states; + } + update_header = false; + } + FstHeader hdr; + hdr.SetStart(fst.Start()); + hdr.SetNumStates(num_states); + hdr.SetNumArcs(num_arcs); + string type = "const"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + uint64 properties = fst.Properties(kCopyProperties, true) | + ConstFstImpl::kStaticProperties; + FstImpl::WriteFstHeader(fst, strm, opts, file_version, type, properties, + &hdr); + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "Could not align file during write after header"; + return false; + } + size_t pos = 0, states = 0; + typename ConstFstImpl::State state; + for (StateIterator siter(fst); !siter.Done(); siter.Next()) { + state.final = fst.Final(siter.Value()); + state.pos = pos; + state.narcs = fst.NumArcs(siter.Value()); + state.niepsilons = fst.NumInputEpsilons(siter.Value()); + state.noepsilons = fst.NumOutputEpsilons(siter.Value()); + strm.write(reinterpret_cast(&state), sizeof(state)); + pos += state.narcs; + ++states; + } + hdr.SetNumStates(states); + hdr.SetNumArcs(pos); + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "Could not align file during write after writing states"; + } + for (StateIterator siter(fst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + for (ArcIterator aiter(fst, s); !aiter.Done(); aiter.Next()) { + const A &arc = aiter.Value(); + strm.write(reinterpret_cast(&arc), sizeof(arc)); + } + } + strm.flush(); + if (!strm) { + LOG(ERROR) << "ConstFst Write write failed: " << opts.source; + return false; + } + if (update_header) { + return FstImpl::UpdateFstHeader(fst, strm, opts, file_version, type, + properties, &hdr, start_offset); + } else { + if (hdr.NumStates() != num_states) { + LOG(ERROR) << "Inconsistent number of states observed during write"; + return false; + } + if (hdr.NumArcs() != num_arcs) { + LOG(ERROR) << "Inconsistent number of arcs observed during write"; + return false; + } + } + return true; +} + +// Specialization for ConstFst; see generic version in fst.h +// for sample usage (but use the ConstFst type!). This version +// should inline. +template +class StateIterator< ConstFst > { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const ConstFst &fst) + : nstates_(fst.GetImpl()->NumStates()), s_(0) {} + + bool Done() const { return s_ >= nstates_; } + + StateId Value() const { return s_; } + + void Next() { ++s_; } + + void Reset() { s_ = 0; } + + private: + StateId nstates_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for ConstFst; see generic version in fst.h +// for sample usage (but use the ConstFst type!). This version +// should inline. +template +class ArcIterator< ConstFst > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const ConstFst &fst, StateId s) + : arcs_(fst.GetImpl()->Arcs(s)), + narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {} + + bool Done() const { return i_ >= narcs_; } + + const A& Value() const { return arcs_[i_]; } + + void Next() { ++i_; } + + size_t Position() const { return i_; } + + void Reset() { i_ = 0; } + + void Seek(size_t a) { i_ = a; } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 f, uint32 m) {} + + private: + const A *arcs_; + size_t narcs_; + size_t i_; + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// A useful alias when using StdArc. +typedef ConstFst StdConstFst; + +} // namespace fst + +#endif // FST_LIB_CONST_FST_H__ -- cgit v1.2.3