diff options
author | Determinant <[email protected]> | 2015-05-29 23:06:58 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-05-29 23:06:58 +0800 |
commit | 74b9f7cb88cd21cfac3c2e50c8efb802485df0c5 (patch) | |
tree | bd6e583088a086144acc2d8af3eaca59691194ff /tnet_io/KaldiLib/Tokenizer.cc |
init
Diffstat (limited to 'tnet_io/KaldiLib/Tokenizer.cc')
-rw-r--r-- | tnet_io/KaldiLib/Tokenizer.cc | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/tnet_io/KaldiLib/Tokenizer.cc b/tnet_io/KaldiLib/Tokenizer.cc new file mode 100644 index 0000000..0c49050 --- /dev/null +++ b/tnet_io/KaldiLib/Tokenizer.cc @@ -0,0 +1,53 @@ +#include "Tokenizer.h" +#include "string.h" + +namespace TNet +{ + //**************************************************************************** + //**************************************************************************** + void + Tokenizer:: + AddString(const char* pString) + { + // copy into string struct, which is more convenient + std::string aux_string(pString); + std::string aux_record; + std::string::size_type cur_pos = 0; + std::string::size_type old_pos = 0; + std::string::size_type search_start = 0; + + // make sure we have enough space + aux_record.reserve(aux_string.length()); + + // find all of separators and make a list of tokens + while(old_pos < std::string::npos) { + // find the next separator + cur_pos = aux_string.find_first_of(mSeparator, search_start); + + // if backslash is in front of separator, ignore this separator + if (cur_pos != 0 && cur_pos != std::string::npos && + pString[cur_pos - 1] == '\\') { + search_start = cur_pos + 1; + continue; + } + + // we don't want to have empty records + if (!(cur_pos == old_pos && mSkipEmpty)) { + // extract token + aux_record.insert(0, pString+old_pos, cur_pos==std::string::npos ? strlen(pString+old_pos) : cur_pos - old_pos); + // insert to list + this->push_back(aux_record); + + // we don't need the contents of the token + aux_record.erase(); + } + + // update old position so that it points behind the separator + old_pos = cur_pos < std::string::npos ? cur_pos + 1 : cur_pos; + search_start = old_pos; + } + } + + +} // namespace TNet + |