// far.h
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Finite-State Transducer (FST) archive classes.
//
#ifndef FST_EXTENSIONS_FAR_FAR_H__
#define FST_EXTENSIONS_FAR_FAR_H__
#include <fst/extensions/far/stlist.h>
#include <fst/extensions/far/sttable.h>
#include <fst/fst.h>
#include <fst/vector-fst.h>
namespace fst {
enum FarEntryType { FET_LINE, FET_FILE };
enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
inline bool IsFst(const string &filename) {
ifstream strm(filename.c_str());
if (!strm)
return false;
return IsFstHeader(strm, filename);
}
// FST archive header class
class FarHeader {
public:
const string &FarType() const { return fartype_; }
const string &ArcType() const { return arctype_; }
bool Read(const string &filename) {
FstHeader fsthdr;
if (filename.empty()) {
// Header reading unsupported on stdin. Assumes STList and StdArc.
fartype_ = "stlist";
arctype_ = "standard";
return true;
} else if (IsSTTable(filename)) { // Check if STTable
ReadSTTableHeader(filename, &fsthdr);
fartype_ = "sttable";
arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
return true;
} else if (IsSTList(filename)) { // Check if STList
ReadSTListHeader(filename, &fsthdr);
fartype_ = "sttable";
arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
return true;
} else if (IsFst(filename)) { // Check if Fst
ifstream istrm(filename.c_str());
fsthdr.Read(istrm, filename);
fartype_ = "fst";
arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
return true;
}
return false;
}
private:
string fartype_;
string arctype_;
};
enum FarType {
FAR_DEFAULT = 0,
FAR_STTABLE = 1,
FAR_STLIST = 2,
FAR_FST = 3,
};
// This class creates an archive of FSTs.
template <class A>
class FarWriter {
public:
typedef A Arc;
// Creates a new (empty) FST archive; returns NULL on error.
static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
// Adds an FST to the end of an archive. Keys must be non-empty and
// in lexicographic order. FSTs must have a suitable write method.
virtual void Add(const string &key, const Fst<A> &fst) = 0;
virtual FarType Type() const = 0;
virtual bool Error() const = 0;
virtual ~FarWriter() {}
protected:
FarWriter() {}
private:
DISALLOW_COPY_AND_ASSIGN(FarWriter);
};
// This class iterates through an existing archive of FSTs.
template <class A>
class FarReader {
public:
typedef A Arc;
// Opens an existing FST archive in a single file; returns NULL on error.
// Sets current position to the beginning of the achive.
static FarReader *Open(const string &filename);
// Opens an existing FST archive in multiple files; returns NULL on error.
// Sets current position to the beginning of the achive.
static FarReader *Open(const vector<string> &filenames);
// Resets current posision to beginning of archive.
virtual void Reset() = 0;
// Sets current position to first entry >= key. Returns true if a match.
virtual bool Find(const string &key) = 0;
// Current position at end of archive?
virtual bool Done() const = 0;
// Move current position to next FST.
virtual void Next() = 0;
// Returns key at the current position. This reference is invalidated if
// the current position in the archive is changed.
virtual const string &GetKey() const = 0;
// Returns FST at the current position. This reference is invalidated if
// the current position in the archive is changed.
virtual const Fst<A> &GetFst() const = 0;
virtual FarType Type() const = 0;
virtual bool Error() const = 0;
virtual ~FarReader() {}
protected:
FarReader() {}
private:
DISALLOW_COPY_AND_ASSIGN(FarReader);
};
template <class A>
class FstWriter {
public:
void operator()(ostream &strm, const Fst<A> &fst) const {
fst.Write(strm, FstWriteOptions());
}
};
template <class A>
class STTableFarWriter : public FarWriter<A> {
public:
typedef A Arc;
static STTableFarWriter *Create(const string &filename) {
STTableWriter<Fst<A>, FstWriter<A> > *writer =
STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STTableFarWriter(writer);
}
void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
FarType Type() const { return FAR_STTABLE; }
bool Error() const { return writer_->Error(); }
~STTableFarWriter() { delete writer_; }
private:
explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
: writer_(writer) {}
private:
STTableWriter<Fst<A>, FstWriter<A> > *writer_;
DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
};
template <class A>
class STListFarWriter : public FarWriter<A> {
public:
typedef A Arc;
static STListFarWriter *Create(const string &filename) {
STListWriter<Fst<A>, FstWriter<A> > *writer =
STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STListFarWriter(writer);
}
void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
FarType Type() const { return FAR_STLIST; }
bool Error() const { return writer_->Error(); }
~STListFarWriter() { delete writer_; }
private:
explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
: writer_(writer) {}
private:
STListWriter<Fst<A>, FstWriter<A> > *writer_;
DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
};
template <class A>
class FstFarWriter : public FarWriter<A> {
public:
typedef A Arc;
explicit FstFarWriter(const string &filename)
: filename_(filename), error_(false),