summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/icu.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/icu.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/icu.h116
1 files changed, 0 insertions, 116 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/icu.h b/kaldi_io/src/tools/openfst/include/fst/icu.h
deleted file mode 100644
index 3947716..0000000
--- a/kaldi_io/src/tools/openfst/include/fst/icu.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// icu.h
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Copyright 2005-2010 Google, Inc.
-// Author: sorenj@google.com (Jeffrey Sorensen)
-// roubert@google.com (Fredrik Roubert)
-//
-// This library implements an unrestricted Thompson/Pike UTF-8 parser and
-// serializer. UTF-8 is a restricted subset of this byte stream encoding. See
-// http://en.wikipedia.org/wiki/UTF-8 for a good description of the encoding
-// details.
-
-#ifndef FST_LIB_ICU_H_
-#define FST_LIB_ICU_H_
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-
-namespace fst {
-
-template <class Label>
-bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
- const char *data = str.data();
- size_t length = str.size();
- for (int i = 0; i < length; /* no update */) {
- int c = data[i++] & 0xff;
- if ((c & 0x80) == 0) {
- labels->push_back(c);
- } else {
- if ((c & 0xc0) == 0x80) {
- LOG(ERROR) << "UTF8StringToLabels: continuation byte as lead byte";
- return false;
- }
- int count = (c >= 0xc0) + (c >= 0xe0) + (c >= 0xf0) + (c >= 0xf8) +
- (c >= 0xfc);
- int code = c & ((1 << (6 - count)) - 1);
- while (count != 0) {
- if (i == length) {
- LOG(ERROR) << "UTF8StringToLabels: truncated utf-8 byte sequence";
- return false;
- }
- char cb = data[i++];
- if ((cb & 0xc0) != 0x80) {
- LOG(ERROR) << "UTF8StringToLabels: missing/invalid continuation byte";
- return false;
- }
- code = (code << 6) | (cb & 0x3f);
- count--;
- }
- if (code < 0) {
- // This should not be able to happen.
- LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
- return false;
- }
- labels->push_back(code);
- }
- }
- return true;
-}
-
-template <class Label>
-bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
- ostringstream ostr;
- for (size_t i = 0; i < labels.size(); ++i) {
- int32_t code = labels[i];
- if (code < 0) {
- LOG(ERROR) << "LabelsToUTF8String: Invalid character found: " << code;
- return false;
- } else if (code < 0x80) {
- ostr << static_cast<char>(code);
- } else if (code < 0x800) {
- ostr << static_cast<char>((code >> 6) | 0xc0);
- ostr << static_cast<char>((code & 0x3f) | 0x80);
- } else if (code < 0x10000) {
- ostr << static_cast<char>((code >> 12) | 0xe0);
- ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
- ostr << static_cast<char>((code & 0x3f) | 0x80);
- } else if (code < 0x200000) {
- ostr << static_cast<char>((code >> 18) | 0xf0);
- ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80);
- ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
- ostr << static_cast<char>((code & 0x3f) | 0x80);
- } else if (code < 0x4000000) {
- ostr << static_cast<char>((code >> 24) | 0xf8);
- ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80);
- ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80);
- ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
- ostr << static_cast<char>((code & 0x3f) | 0x80);
- } else {
- ostr << static_cast<char>((code >> 30) | 0xfc);
- ostr << static_cast<char>(((code >> 24) & 0x3f) | 0x80);
- ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80);
- ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80);
- ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
- ostr << static_cast<char>((code & 0x3f) | 0x80);
- }
- }
- *str = ostr.str();
- return true;
-}
-
-} // namespace fst
-
-#endif // FST_LIB_ICU_H_