/* EPSHeader File: filmatch.c Author: J. Kercheval Created: Thu, 03/14/1991 22:22:01 */ /* EPSRevision History O. Glembek Thu, 03/11/2005 01:58:00 Added Mask extraction support (char % does this) J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them) J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche() J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code J. Kercheval Sun, 03/10/1991 20:37:11 beef up main() J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain J. Kercheval Thu, 03/14/1991 22:22:25 remove '\' for DOS file parsing J. Kercheval Thu, 03/28/1991 20:58:27 include filmatch.h */ /* Wildcard Pattern Matching */ #include "StkMatch.h" #include "Common.h" namespace TNet { //#define TEST static int matche_after_star (register const char *pattern, register const char *text, register char *s); // following function is not defined or used. // static int fast_match_after_star (register const char *pattern, register const char *text); /*---------------------------------------------------------------------------- * * Return true if PATTERN has any special wildcard characters * ----------------------------------------------------------------------------*/ bool is_pattern (const char *p) { while ( *p ) { switch ( *p++ ) { case '?': case '*': case '%': case '[': return true; } } return false; } /*---------------------------------------------------------------------------- * * Return true if PATTERN has is a well formed regular expression according * to the above syntax * * error_type is a return code based on the type of pattern error. Zero is * returned in error_type if the pattern is a valid one. error_type return * values are as follows: * * PATTERN_VALID - pattern is well formed * PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-]) * PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g ) * PATTERN_EMPTY - [..] construct is empty (ie []) * ----------------------------------------------------------------------------*/ bool is_valid_pattern (const char *p, int *error_type) { /* init error_type */ *error_type = PATTERN_VALID; /* loop through pattern to EOS */ while ( *p ) { /* determine pattern type */ switch ( *p ) { /* the [..] construct must be well formed */ case '[': { p++; /* if the next character is ']' then bad pattern */ if ( *p == ']' ) { *error_type = PATTERN_EMPTY; return false; } /* if end of pattern here then bad pattern */ if ( !*p ) { *error_type = PATTERN_CLOSE; return false; } /* loop to end of [..] construct */ while ( *p != ']' ) { /* check for literal escape */ if ( *p == '\\' ) { p++; /* if end of pattern here then bad pattern */ if ( !*p++ ) { *error_type = PATTERN_ESC; return false; } } else p++; /* if end of pattern here then bad pattern */ if ( !*p ) { *error_type = PATTERN_CLOSE; return false; } /* if this a range */ if ( *p == '-' ) { /* we must have an end of range */ if ( !*++p || *p == ']' ) { *error_type = PATTERN_RANGE; return false; } else { /* check for literal escape */ if ( *p == '\\' ) p++; /* if end of pattern here then bad pattern */ if ( !*p++ ) { *error_type = PATTERN_ESC; return false; } } } } break; } //case '[': /* all other characters are valid pattern elements */ case '*': case '?': case '%': default: p++; /* "normal" character */ break; } // switch ( *p ) } // while ( *p ) return true; } //bool is_valid_pattern (const char *p, int *error_type) /*---------------------------------------------------------------------------- * * Match the pattern PATTERN against the string TEXT; * * returns MATCH_VALID if pattern matches, or an errorcode as follows * otherwise: * * MATCH_PATTERN - bad pattern * MATCH_RANGE - match failure on [..] construct * MATCH_ABORT - premature end of text string * MATCH_END - premature end of pattern string * MATCH_VALID - valid match * * * A match means the entire string TEXT is used up in matching. * * In the pattern string: * `*' matches any sequence of characters (zero or more) * `?' matches any character * `%' matches any character and stores it in the s string * [SET] matches any character in the specified set, * [!SET] or [^SET] matches any character not in the specified set. * \ is allowed within a set to escape a character like ']' or '-' * * A set is composed of characters or ranges; a range looks like * character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the * minimal set of characters allowed in the [..] pattern construct. * Other characters are allowed (ie. 8 bit characters) if your system * will support them. * * To suppress the special syntactic significance of any of `[]*?%!^-\', * within a [..] construct and match the character exactly, precede it * with a `\'. * ----------------------------------------------------------------------------*/ int matche ( register const char *p, register const char *t, register char *s ) { register char range_start, range_end; /* start and end in range */ bool invert; /* is this [..] or [!..] */ bool member_match; /* have I matched the [..] construct? */ bool loop; /* should I terminate? */ for ( ; *p; p++, t++ ) { /* if this is the end of the text then this is the end of the match */ if (!*t) { return ( *p == '*' && *++p == '\0' ) ? MATCH_VALID : MATCH_ABORT; } /* determine and react to pattern type */ switch ( *p ) { /* single any character match */ case '?': break; /* single any character match, with extraction*/ case '%': { *s++ = *t; *s = '\0'; break; } /* multiple any character match */ case '*': return matche_after_star (p, t, s); /* [..] construct, single member/exclusion character match */ case '[': { /* move to beginning of range */ p++; /* check if this is a member match or exclusion match */ invert = false; if ( *p == '!' || *p == '^') { invert = true; p++; } /* if closing bracket here or at range start then we have a malformed pattern */ if ( *p == ']' ) { return MATCH_PATTERN; } member_match = false; loop = true; while ( loop ) { /* if end of construct then loop is done */ if (*p == ']') { loop = false; continue; } /* matching a '!', '^', '-', '\' or a ']' */ if ( *p == '\\' ) { range_start = range_end = *++p; } else { range_start = range_end = *p; } /* if end of pattern then bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* check for range bar */ if (*++p == '-') { /* get the range end */ range_end = *++p; /* if end of pattern or construct then bad pattern */ if (range_end == '\0' || range_end == ']') return MATCH_PATTERN; /* special character range end */ if (range_end == '\\') { range_end = *++p; /* if end of text then we have a bad pattern */ if (!range_end) return MATCH_PATTERN; } /* move just beyond this range */ p++; } /* if the text character is in range then match found. make sure the range letters have the proper relationship to one another before comparison */ if ( range_start < range_end ) { if (*t >= range_start && *t <= range_end) { member_match = true; loop = false; } } else { if (*t >= range_end && *t <= range_start) { member_match = true; loop = false; } } } /* if there was a match in an exclusion set then no match */ /* if there was no match in a member set then no match */ if ((invert && member_match) || !(invert || member_match)) return MATCH_RANGE; /* if this is not an exclusion then skip the rest of the [...] construct that already matched. */ if (member_match) { while (*p != ']') { /* bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* skip exact match */ if (*p == '\\') { p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; } /* move to next pattern char */ p++; } } break; } // case ']' /* must match this character exactly */ default: if (*p != *t) return MATCH_LITERAL; } } //*s = '\0'; /* if end of text not reached then the pattern fails */ if ( *t ) return MATCH_END; else return MATCH_VALID; } /*---------------------------------------------------------------------------- * * recursively call matche() with final segment of PATTERN and of TEXT. * ----------------------------------------------------------------------------*/ static int matche_after_star (register const char *p, register const char *t, register char *s) { register int match = 0; register char nextp; /* pass over existing ? and * in pattern */ while ( *p == '?' || *p == '%' || *p == '*' ) { /* take one char for each ? and + */ if ( *p == '?') { /* if end of text then no match */ if ( !*t++ ) { return MATCH_ABORT; } } if ( *p == '%') { *s++ = *t; *s = '\0'; /* if end of text then no match */ if ( !*t++ ) { return MATCH_ABORT; } } /* move to next char in pattern */ p++; } /* if end of pattern we have matched regardless of text left */ if ( !*p ) { return MATCH_VALID; } /* get the next character to match which must be a literal or '[' */ nextp = *p; /* Continue until we run out of text or definite result seen */ do { /* a precondition for matching is that the next character in the pattern match the next character in the text or that the next pattern char is the beginning of a range. Increment text pointer as we go here */ if ( nextp == *t || nextp == '[' ) { match = matche(p, t, s); } /* if the end of text is reached then no match */ if ( !*t++ ) match = MATCH_ABORT; } while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN); /* return result */ return match; } /*---------------------------------------------------------------------------- * * match() is a shell to matche() to return only bool values. * ----------------------------------------------------------------------------*/ bool match(const char *p, const char *t, char *s) { int error_type; error_type = matche(p,t,s); return (error_type != MATCH_VALID ) ? false : true; } //*************************************************************************** //*************************************************************************** bool ProcessMask(const std::string & rString, const std::string & rWildcard, std::string & rSubstr) { char * substr; int percent_count = 0; int ret ; size_t pos = 0; // let's find how many % to allocate enough space for the return substring while ((pos = rWildcard.find('%', pos)) != rWildcard.npos) { percent_count++; pos++; } // allocate space for the substring substr = new char[percent_count + 1]; substr[percent_count] = 0; substr[0] = '\0'; // optionally prepend '*/' to wildcard std::string wildcard(rWildcard); if(wildcard[0] != '*') { wildcard = "*/" + wildcard; } //optionally prepend '/' to string std::string string1(rString); if(string1[0] != '/') { string1 = "/" + string1; } // parse the string if (0 != (ret = match(wildcard.c_str(), string1.c_str(), substr))) { rSubstr = substr; } delete[] substr; return ret; } // ProcessMask } #ifdef TEST /* * This test main expects as first arg the pattern and as second arg * the match string. Output is yaeh or nay on match. If nay on * match then the error code is parsed and written. */ #include int main(int argc, char *argv[]) { int error; int is_valid_error; char * tmp = argv[0]; int i = 0; for (; *tmp; tmp++) if (*tmp=='%') i++; char s[i+1]; if (argc != 3) { printf("Usage: MATCH Pattern Text\n"); } else { printf("Pattern: %s\n", argv[1]); printf("Text : %s\n", argv[2]); if (!is_pattern(argv[1])) { printf(" First Argument Is Not A Pattern\n"); } else { match(argv[1],argv[2], s) ? printf("true") : printf("false"); error = matche(argv[1],argv[2], s); is_valid_pattern(argv[1],&is_valid_error); switch ( error ) { case MATCH_VALID: printf(" Match Successful"); if (is_valid_error != PATTERN_VALID) printf(" -- is_valid_pattern() is complaining\n"); else printf("\n"); printf("%s\n", s); break; case MATCH_RANGE: printf(" Match Failed on [..]\n"); break; case MATCH_ABORT: printf(" Match Failed on Early Text Termination\n"); break; case MATCH_END: printf(" Match Failed on Early Pattern Termination\n"); break; case MATCH_PATTERN: switch ( is_valid_error ) { case PATTERN_VALID: printf(" Internal Disagreement On Pattern\n"); break; case PATTERN_RANGE: printf(" No End of Range in [..] Construct\n"); break; case PATTERN_CLOSE: printf(" [..] Construct is Open\n"); break; case PATTERN_EMPTY: printf(" [..] Construct is Empty\n"); break; default: printf(" Internal Error in is_valid_pattern()\n"); } break; default: printf(" Internal Error in matche()\n"); break; } } } return(0); } #endif