diff options
author | Determinant <[email protected]> | 2015-06-25 12:56:45 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-06-25 12:56:45 +0800 |
commit | a74183ddb4ab8383bfe214b3745eb8a0a99ee47a (patch) | |
tree | d5e69cf8c4c2db2e3a4722778352fc3c95953bb2 /tnet_io/KaldiLib/StkMatch.cc | |
parent | b6301089cde20f4c825c7f5deaf179082aad63da (diff) |
let HTK I/O implementation be a single package
Diffstat (limited to 'tnet_io/KaldiLib/StkMatch.cc')
-rw-r--r-- | tnet_io/KaldiLib/StkMatch.cc | 582 |
1 files changed, 0 insertions, 582 deletions
diff --git a/tnet_io/KaldiLib/StkMatch.cc b/tnet_io/KaldiLib/StkMatch.cc deleted file mode 100644 index 4ff4b18..0000000 --- a/tnet_io/KaldiLib/StkMatch.cc +++ /dev/null @@ -1,582 +0,0 @@ -/* - EPSHeader - - File: filmatch.c - Author: J. Kercheval - Created: Thu, 03/14/1991 22:22:01 -*/ - -/* - EPSRevision History - O. Glembek Thu, 03/11/2005 01:58:00 Added Mask extraction support (char % does this) - J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain - J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them) - J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche() - J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code - J. Kercheval Sun, 03/10/1991 20:37:11 beef up main() - J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain - J. Kercheval Thu, 03/14/1991 22:22:25 remove '\' for DOS file parsing - J. Kercheval Thu, 03/28/1991 20:58:27 include filmatch.h -*/ - -/* - Wildcard Pattern Matching -*/ - - -#include "StkMatch.h" -#include "Common.h" - -namespace TNet -{ - //#define TEST - static int matche_after_star (register const char *pattern, register const char *text, register char *s); - // following function is not defined or used. - // static int fast_match_after_star (register const char *pattern, register const char *text); - - /*---------------------------------------------------------------------------- - * - * Return true if PATTERN has any special wildcard characters - * - ----------------------------------------------------------------------------*/ - - bool is_pattern (const char *p) - { - while ( *p ) { - switch ( *p++ ) { - case '?': - case '*': - case '%': - case '[': - return true; - } - } - return false; - } - - - /*---------------------------------------------------------------------------- - * - * Return true if PATTERN has is a well formed regular expression according - * to the above syntax - * - * error_type is a return code based on the type of pattern error. Zero is - * returned in error_type if the pattern is a valid one. error_type return - * values are as follows: - * - * PATTERN_VALID - pattern is well formed - * PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-]) - * PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g ) - * PATTERN_EMPTY - [..] construct is empty (ie []) - * - ----------------------------------------------------------------------------*/ - - bool is_valid_pattern (const char *p, int *error_type) - { - - /* init error_type */ - *error_type = PATTERN_VALID; - - /* loop through pattern to EOS */ - while ( *p ) - { - /* determine pattern type */ - switch ( *p ) - { - /* the [..] construct must be well formed */ - case '[': - { - p++; - - /* if the next character is ']' then bad pattern */ - if ( *p == ']' ) { - *error_type = PATTERN_EMPTY; - return false; - } - - /* if end of pattern here then bad pattern */ - if ( !*p ) - { - *error_type = PATTERN_CLOSE; - return false; - } - - /* loop to end of [..] construct */ - while ( *p != ']' ) - { - /* check for literal escape */ - if ( *p == '\\' ) - { - p++; - - /* if end of pattern here then bad pattern */ - if ( !*p++ ) { - *error_type = PATTERN_ESC; - return false; - } - } - else - p++; - - /* if end of pattern here then bad pattern */ - if ( !*p ) - { - *error_type = PATTERN_CLOSE; - return false; - } - - /* if this a range */ - if ( *p == '-' ) - { - /* we must have an end of range */ - if ( !*++p || *p == ']' ) - { - *error_type = PATTERN_RANGE; - return false; - } - else - { - - /* check for literal escape */ - if ( *p == '\\' ) - p++; - - /* if end of pattern here then bad pattern */ - if ( !*p++ ) - { - *error_type = PATTERN_ESC; - return false; - } - } - } - } - break; - } //case '[': - - - /* all other characters are valid pattern elements */ - case '*': - case '?': - case '%': - default: - p++; /* "normal" character */ - break; - } // switch ( *p ) - } // while ( *p ) - - return true; - } //bool is_valid_pattern (const char *p, int *error_type) - - - /*---------------------------------------------------------------------------- - * - * Match the pattern PATTERN against the string TEXT; - * - * returns MATCH_VALID if pattern matches, or an errorcode as follows - * otherwise: - * - * MATCH_PATTERN - bad pattern - * MATCH_RANGE - match failure on [..] construct - * MATCH_ABORT - premature end of text string - * MATCH_END - premature end of pattern string - * MATCH_VALID - valid match - * - * - * A match means the entire string TEXT is used up in matching. - * - * In the pattern string: - * `*' matches any sequence of characters (zero or more) - * `?' matches any character - * `%' matches any character and stores it in the s string - * [SET] matches any character in the specified set, - * [!SET] or [^SET] matches any character not in the specified set. - * \ is allowed within a set to escape a character like ']' or '-' - * - * A set is composed of characters or ranges; a range looks like - * character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the - * minimal set of characters allowed in the [..] pattern construct. - * Other characters are allowed (ie. 8 bit characters) if your system - * will support them. - * - * To suppress the special syntactic significance of any of `[]*?%!^-\', - * within a [..] construct and match the character exactly, precede it - * with a `\'. - * - ----------------------------------------------------------------------------*/ - - int matche ( register const char *p, register const char *t, register char *s ) - { - register char range_start, range_end; /* start and end in range */ - - bool invert; /* is this [..] or [!..] */ - bool member_match; /* have I matched the [..] construct? */ - bool loop; /* should I terminate? */ - - for ( ; *p; p++, t++ ) { - - /* if this is the end of the text then this is the end of the match */ - if (!*t) { - return ( *p == '*' && *++p == '\0' ) ? MATCH_VALID : MATCH_ABORT; - } - - /* determine and react to pattern type */ - switch ( *p ) { - - /* single any character match */ - case '?': - break; - - /* single any character match, with extraction*/ - case '%': { - *s++ = *t; - *s = '\0'; - break; - } - - /* multiple any character match */ - case '*': - return matche_after_star (p, t, s); - - /* [..] construct, single member/exclusion character match */ - case '[': { - /* move to beginning of range */ - p++; - - /* check if this is a member match or exclusion match */ - invert = false; - if ( *p == '!' || *p == '^') { - invert = true; - p++; - } - - /* if closing bracket here or at range start then we have a - malformed pattern */ - if ( *p == ']' ) { - return MATCH_PATTERN; - } - - member_match = false; - loop = true; - - while ( loop ) { - - /* if end of construct then loop is done */ - if (*p == ']') { - loop = false; - continue; - } - - /* matching a '!', '^', '-', '\' or a ']' */ - if ( *p == '\\' ) { - range_start = range_end = *++p; - } - else { - range_start = range_end = *p; - } - - /* if end of pattern then bad pattern (Missing ']') */ - if (!*p) - return MATCH_PATTERN; - - /* check for range bar */ - if (*++p == '-') { - - /* get the range end */ - range_end = *++p; - - /* if end of pattern or construct then bad pattern */ - if (range_end == '\0' || range_end == ']') - return MATCH_PATTERN; - - /* special character range end */ - if (range_end == '\\') { - range_end = *++p; - - /* if end of text then we have a bad pattern */ - if (!range_end) - return MATCH_PATTERN; - } - - /* move just beyond this range */ - p++; - } - - /* if the text character is in range then match found. - make sure the range letters have the proper - relationship to one another before comparison */ - if ( range_start < range_end ) { - if (*t >= range_start && *t <= range_end) { - member_match = true; - loop = false; - } - } - else { - if (*t >= range_end && *t <= range_start) { - member_match = true; - loop = false; - } - } - } - - /* if there was a match in an exclusion set then no match */ - /* if there was no match in a member set then no match */ - if ((invert && member_match) || - !(invert || member_match)) - return MATCH_RANGE; - - /* if this is not an exclusion then skip the rest of the [...] - construct that already matched. */ - if (member_match) { - while (*p != ']') { - - /* bad pattern (Missing ']') */ - if (!*p) - return MATCH_PATTERN; - - /* skip exact match */ - if (*p == '\\') { - p++; - - /* if end of text then we have a bad pattern */ - if (!*p) - return MATCH_PATTERN; - } - - /* move to next pattern char */ - p++; - } - } - - break; - } // case ']' - - /* must match this character exactly */ - default: - if (*p != *t) - return MATCH_LITERAL; - } - } - - //*s = '\0'; - /* if end of text not reached then the pattern fails */ - if ( *t ) - return MATCH_END; - else - return MATCH_VALID; - } - - - /*---------------------------------------------------------------------------- - * - * recursively call matche() with final segment of PATTERN and of TEXT. - * - ----------------------------------------------------------------------------*/ - - static int matche_after_star (register const char *p, register const char *t, register char *s) - { - register int match = 0; - register char nextp; - - /* pass over existing ? and * in pattern */ - while ( *p == '?' || *p == '%' || *p == '*' ) { - - /* take one char for each ? and + */ - if ( *p == '?') { - - /* if end of text then no match */ - if ( !*t++ ) { - return MATCH_ABORT; - } - } - - if ( *p == '%') { - *s++ = *t; - *s = '\0'; - /* if end of text then no match */ - if ( !*t++ ) { - return MATCH_ABORT; - } - } - - /* move to next char in pattern */ - p++; - } - - /* if end of pattern we have matched regardless of text left */ - if ( !*p ) { - return MATCH_VALID; - } - - /* get the next character to match which must be a literal or '[' */ - nextp = *p; - - /* Continue until we run out of text or definite result seen */ - do { - - /* a precondition for matching is that the next character - in the pattern match the next character in the text or that - the next pattern char is the beginning of a range. Increment - text pointer as we go here */ - if ( nextp == *t || nextp == '[' ) { - match = matche(p, t, s); - } - - /* if the end of text is reached then no match */ - if ( !*t++ ) match = MATCH_ABORT; - - } while ( match != MATCH_VALID && - match != MATCH_ABORT && - match != MATCH_PATTERN); - - /* return result */ - return match; - } - - - /*---------------------------------------------------------------------------- - * - * match() is a shell to matche() to return only bool values. - * - ----------------------------------------------------------------------------*/ - - bool match(const char *p, const char *t, char *s) - { - int error_type; - error_type = matche(p,t,s); - return (error_type != MATCH_VALID ) ? false : true; - } - - - //*************************************************************************** - //*************************************************************************** - bool - ProcessMask(const std::string & rString, - const std::string & rWildcard, - std::string & rSubstr) - { - char * substr; - int percent_count = 0; - int ret ; - size_t pos = 0; - - // let's find how many % to allocate enough space for the return substring - while ((pos = rWildcard.find('%', pos)) != rWildcard.npos) - { - percent_count++; - pos++; - } - - // allocate space for the substring - substr = new char[percent_count + 1]; - substr[percent_count] = 0; - substr[0] = '\0'; - - // optionally prepend '*/' to wildcard - std::string wildcard(rWildcard); - if(wildcard[0] != '*') { - wildcard = "*/" + wildcard; - } - - //optionally prepend '/' to string - std::string string1(rString); - if(string1[0] != '/') { - string1 = "/" + string1; - } - - // parse the string - if (0 != (ret = match(wildcard.c_str(), string1.c_str(), substr))) - { - rSubstr = substr; - } - delete[] substr; - return ret; - } // ProcessMask -} - - -#ifdef TEST - -/* -* This test main expects as first arg the pattern and as second arg -* the match string. Output is yaeh or nay on match. If nay on -* match then the error code is parsed and written. -*/ - -#include <stdio.h> - -int main(int argc, char *argv[]) -{ - int error; - int is_valid_error; - - char * tmp = argv[0]; - int i = 0; - for (; *tmp; tmp++) - if (*tmp=='%') i++; - - char s[i+1]; - - - if (argc != 3) { - printf("Usage: MATCH Pattern Text\n"); - } - else { - printf("Pattern: %s\n", argv[1]); - printf("Text : %s\n", argv[2]); - - if (!is_pattern(argv[1])) { - printf(" First Argument Is Not A Pattern\n"); - } - else { - match(argv[1],argv[2], s) ? printf("true") : printf("false"); - error = matche(argv[1],argv[2], s); - is_valid_pattern(argv[1],&is_valid_error); - - switch ( error ) { - case MATCH_VALID: - printf(" Match Successful"); - if (is_valid_error != PATTERN_VALID) - printf(" -- is_valid_pattern() is complaining\n"); - else - printf("\n"); - printf("%s\n", s); - - break; - case MATCH_RANGE: - printf(" Match Failed on [..]\n"); - break; - case MATCH_ABORT: - printf(" Match Failed on Early Text Termination\n"); - break; - case MATCH_END: - printf(" Match Failed on Early Pattern Termination\n"); - break; - case MATCH_PATTERN: - switch ( is_valid_error ) { - case PATTERN_VALID: - printf(" Internal Disagreement On Pattern\n"); - break; - case PATTERN_RANGE: - printf(" No End of Range in [..] Construct\n"); - break; - case PATTERN_CLOSE: - printf(" [..] Construct is Open\n"); - break; - case PATTERN_EMPTY: - printf(" [..] Construct is Empty\n"); - break; - default: - printf(" Internal Error in is_valid_pattern()\n"); - } - break; - default: - printf(" Internal Error in matche()\n"); - break; - } - } - - } - return(0); -} - -#endif |