/*
EPSHeader
File: filmatch.c
Author: J. Kercheval
Created: Thu, 03/14/1991 22:22:01
*/
/*
EPSRevision History
O. Glembek Thu, 03/11/2005 01:58:00 Added Mask extraction support (char % does this)
J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain
J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them)
J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche()
J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code
J. Kercheval Sun, 03/10/1991 20:37:11 beef up main()
J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain
J. Kercheval Thu, 03/14/1991 22:22:25 remove '\' for DOS file parsing
J. Kercheval Thu, 03/28/1991 20:58:27 include filmatch.h
*/
/*
Wildcard Pattern Matching
*/
#include "StkMatch.h"
#include "Common.h"
namespace TNet
{
//#define TEST
static int matche_after_star (register const char *pattern, register const char *text, register char *s);
// following function is not defined or used.
// static int fast_match_after_star (register const char *pattern, register const char *text);
/*----------------------------------------------------------------------------
*
* Return true if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/
bool is_pattern (const char *p)
{
while ( *p ) {
switch ( *p++ ) {
case '?':
case '*':
case '%':
case '[':
return true;
}
}
return false;
}
/*----------------------------------------------------------------------------
*
* Return true if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error. Zero is
* returned in error_type if the pattern is a valid one. error_type return
* values are as follows:
*
* PATTERN_VALID - pattern is well formed
* PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
* PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
* PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/
bool is_valid_pattern (const char *p, int *error_type)
{
/* init error_type */
*error_type = PATTERN_VALID;
/* loop through pattern to EOS */
while ( *p )
{
/* determine pattern type */
switch ( *p )
{
/* the [..] construct must be well formed */
case '[':
{
p++;
/* if the next character is ']' then bad pattern */
if ( *p == ']' ) {
*error_type = PATTERN_EMPTY;
return false;
}
/* if end of pattern here then bad pattern */
if ( !*p )
{
*error_type = PATTERN_CLOSE;
return false;
}
/* loop to end of [..] construct */
while ( *p != ']' )
{
/* check for literal escape */
if ( *p == '\\' )
{
p++;
/* if end of pattern here then bad pattern */
if ( !*p++ ) {
*error_type = PATTERN_ESC;
return false;
}
}
else
p++;
/* if end of pattern here then bad pattern */
if ( !*p )
{
*error_type = PATTERN_CLOSE;
return false;
}
/* if this a range */
if ( *p == '-' )
{
/* we must have an end of range */
if ( !*++p || *p == ']' )
{
*error_type = PATTERN_RANGE;
return false;
}
else
{
/* check for literal escape */
if ( *p == '\\' )
p++;
/* if end of pattern here then bad pattern */
if ( !*p++ )
{
*error_type = PATTERN_ESC;
return false;
}
}
}
}
break;
} //case '[':
/* all other characters are valid pattern elements */
case '*':
case '?':
case '%':
default:
p++; /* "normal" character */
break;
} // switch ( *p )
} // while ( *p )
return true;
} //bool is_valid_pattern (const char *p, int *error_type)
/*----------------------------------------------------------------------------
*
* Match the pattern PATTERN against the string TEXT;
*
* returns MATCH_VALID if pattern matches, or an errorcode as follows
* otherwise:
*
* MATCH_PATTERN - bad pattern
* MATCH_RANGE - match failure on [..] construct
* MATCH_ABORT - premature end of text string
* MATCH_END - premature end of pattern string
* MATCH_VALID - valid match
*
*
* A match means the entire string TEXT is used up in matching.
*
* In the pattern string:
* `*' matches any sequence of characters (zero or more)
* `?' matches any character
* `%' matches any character and stores it in the s string
* [SET] matches any character in the specified set,
* [!SET] or [^SET] matches any character not in the specified set.
* \ is allowed within a set to escape a character like ']' or '-'
*
* A set is composed of characters or ranges; a range looks like
* character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the
* minimal set of characters allowed in the [..] pattern construct.
* Other characters are allowed (ie. 8 bit characters) if your system
* will support them.
*
* To suppress the special syntactic significance of any of `[]*?%!^-\',
* within a [..] construct and match the character exactly, precede it
* with a `\'.
*
----------------------------------------------------------------------------*/
int matche ( register const char *p, register const char *t, register char *s )
{
register char range_start, range_end; /* start and end in range */
bool invert; /* is this [..] or [!..] */
bool member_match; /* have I matched the [..] construct? */
bool loop; /* should I terminate? */
for ( ; *p; p++, t++ ) {
/* if this is the end of the text then this is the end of the match */
if (!*t) {
return ( *p == '*' && *++p == '\0' ) ? MATCH_VALID : MATCH_ABORT;
}
/* determine and react to pattern type */
switch ( *p ) {
/* single any character match */
case '?':
break;
/* single any character match, with extraction*/
case '%': {
*s++ = *t;
*s = '\0';
break;
}
/* multiple any character match */
case '*':
return matche_after_star (p, t, s);
/* [..] construct, single member/exclusion character match */
case '[': {
/* move to beginning of range */
p++;
/* check if this is a member match or exclusion match */
invert = false;
if ( *p == '!' || *p == '^') {
invert = true;
p++;
}
/* if closing bracket here or at range start then we have a
malformed pattern */
if ( *p == ']' ) {
return MATCH_PATTERN;
}
member_match = false;
loop = true;
while ( loop ) {
/* if end of construct then loop is done */
if (*p == ']') {
loop = false;
continue;
}
/* matching a '!', '^', '-', '\' or a ']' */
if ( *p == '\\' ) {
range_start = range_end = *++p;
}
else {
range_start = range_end = *p;
}
/* if end of pattern then bad pattern (Missing ']') */
if (!*p)
return MATCH_PATTERN;
/* check for range bar */
if (*++p == '-') {
/* get the range end */
range_end = *++p;
/* if end of pattern or construct then bad pattern */
if (range_end == '\0' || range_end == ']')
return MATCH_PATTERN;
/* special character range end */
if (range_end == '\\') {
range_end = *++p;
/* if end of text then we have a bad pattern */
if (!range_end)
return MATCH_PATTERN;
}
/* move just beyond this range */
p++;
}
/* if the text character is in range then match found.
make sure the range letters have the proper
relationship to one another before comparison */
if ( range_start < range_end ) {
if (*t >= range_start && *t <= range_end) {
member_match = true;
loop = false;
}
}
else {
if (*t >= range_end && *t <= range_start) {
member_match = true;
loop = false;
}
}
}
/* if there was a match in an exclusion set then no match */
/* if there was no match in a member set then no match */
if ((invert && member_match) ||
!(invert || member_match))
return MATCH_RANGE;
/* if this is not an exclusion then skip the rest of the [...]
construct that already matched. */
if (member_match) {
while (*p != ']') {
/* bad pattern (Missing ']') */
if (!*p)
return MATCH_PATTERN;
/* skip exact match */
if (*p == '\\') {
p++;
/* if end of text then we have a bad pattern */
if (!*p)
return MATCH_PATTERN;
}
/* move to next pattern char */
p++;
}
}
break;
} // case ']'
/* must match this character exactly */
default:
if (*p != *t)
return MATCH_LITERAL;
}
}
//*s = '\0';
/* if end of text not reached then the pattern fails */
if ( *t )
return MATCH_END;
else
return MATCH_VALID;
}
/*----------------------------------------------------------------------------
*
* recursively call matche() with final segment of PATTERN and of TEXT.
*
----------------------------------------------------------------------------*/
static int matche_after_star (register const char *p, register const char *t, register char *s)
{
register int match = 0;
register char nextp;
/* pass over existing ? and * in pattern */
while ( *p == '?' || *p == '%' || *p == '*' ) {
/* take one char for each ? and + */
if ( *p == '?') {
/* if end of text then no match */
if ( !*t++ ) {
return MATCH_ABORT;
}
}
if ( *p == '%') {
*s++ = *t;
*s = '\0';
/* if end of text then no match */
if ( !*t++ ) {
return MATCH_ABORT;
}
}
/* move to next char in pattern */
p++;
}
/* if end of pattern we have matched regardless of text left */
if ( !*p ) {
return MATCH_VALID;
}
/* get the next character to match which must be a literal or '[' */
nextp = *p;
/* Continue until we run out of text or definite result seen */
do {
/* a precondition for matching is that the next character
in the pattern match the next character in the text or that
the next pattern char is the beginning of a range. Increment
text pointer as we go here */
if ( nextp == *t || nextp == '[' ) {
match = matche(p, t, s);
}
/* if the end of text is reached then no match */
if ( !*t++ ) match = MATCH_ABORT;
} while ( match != MATCH_VALID &&
match != MATCH_ABORT &&
match != MATCH_PATTERN);
/* return result */
return match;
}
/*----------------------------------------------------------------------------
*
* match() is a shell to matche() to return only bool values.
*
----------------------------------------------------------------------------*/
bool match(const char *p, const char *t, char *s)
{
int error_type;
error_type = matche(p,t,s);
return (error_type != MATCH_VALID ) ? false : true;
}
//***************************************************************************
//***************************************************************************
bool
ProcessMask(const std::string & rString,
const std::string & rWildcard,
std::string & rSubstr)
{
char * substr;
int percent_count = 0;
int ret ;
size_t pos = 0;
// let's find how many % to allocate enough space for the return substring
while ((pos = rWildcard.find('%', pos)) != rWildcard.npos)
{
percent_count++;
pos++;
}
// allocate space for the substring
substr = new char[percent_count + 1];
substr[percent_count] = 0;
substr[0] = '\0';
// optionally prepend '*/' to wildcard
std::string wildcard(rWildcard);
if(wildcard[0] != '*') {
wildcard = "*/" + wildcard;
}
//optionally prepend '/' to string
std::string string1(rString);
if(string1[0] != '/') {
string1 = "/" + string1;
}
// parse the string
if (0 != (ret = match(wildcard.c_str(), string1.c_str(), substr)))
{
rSubstr = substr;
}
delete[] substr;
return ret;
} // ProcessMask
}
#ifdef TEST
/*
* This test main expects as first arg the pattern and as second arg
* the match string. Output is yaeh or nay on match. If nay on
* match then the error code is parsed and written.
*/
#include <stdio.h>
int main(int argc, char *argv[])
{
int error;
int is_valid_error;
char * tmp = argv[0];
int i = 0;
for (; *tmp; tmp++)
if (*tmp=='%') i++;
char s[i+1];
if (argc != 3) {
printf("Usage: MATCH Pattern Text\n");
}
else {
printf("Pattern: %s\n", argv[1]);
printf("Text : %s\n", argv[2]);
if (!is_pattern(argv[1])) {
printf(" First Argument Is Not A Pattern\n");
}
else {
match(argv[1],argv[2], s) ? printf("true") : printf("false");
error = matche(argv[1],argv[2], s);
is_valid_pattern(argv[1],&is_valid_error);
switch ( error ) {
case MATCH_VALID:
printf(" Match Successful");
if (is_valid_error != PATTERN_VALID)
printf(" -- is_valid_pattern() is complaining\n");
else
printf("\n");
printf("%s\n", s);
break;
case MATCH_RANGE:
printf(" Match Failed on [..]\n");
break;
case MATCH_ABORT:
printf(" Match Failed on Early Text Termination\n");
break;
case MATCH_END:
printf(" Match Failed on Early Pattern Termination\n");
break;
case MATCH_PATTERN:
switch ( is_valid_error ) {
case PATTERN_VALID:
printf(" Internal Disagreement On Pattern\n");
break;
case PATTERN_RANGE:
printf(" No End of Range in [..] Construct\n");
break;
case PATTERN_CLOSE:
printf(" [..] Construct is Open\n");
break;
case PATTERN_EMPTY:
printf(" [..] Construct is Empty\n");
break;
default:
printf(" Internal Error in is_valid_pattern()\n");
}
break;
default:
printf(" Internal Error in matche()\n");
break;
}
}
}
return(0);
}
#endif