diff options
| author | jacqueline <me@jacqueline.id.au> | 2024-06-27 16:12:18 +1000 |
|---|---|---|
| committer | jacqueline <me@jacqueline.id.au> | 2024-06-27 16:12:18 +1000 |
| commit | 1036f1b00efe2bbd2467cbfa3a4a97ab7f56591e (patch) | |
| tree | cc3aa8683daa30e36f3777699e32aaf93e211f95 /lib/console/argtable3/arg_rex.c | |
| parent | 1daa1e9b0fe737ea5dac99f42fdf3c217873443f (diff) | |
| download | tangara-fw-1036f1b00efe2bbd2467cbfa3a4a97ab7f56591e.tar.gz | |
fork esp-idf's dev console, in order to add some hooks
Diffstat (limited to 'lib/console/argtable3/arg_rex.c')
| -rw-r--r-- | lib/console/argtable3/arg_rex.c | 1014 |
1 files changed, 1014 insertions, 0 deletions
diff --git a/lib/console/argtable3/arg_rex.c b/lib/console/argtable3/arg_rex.c new file mode 100644 index 00000000..8a7aa181 --- /dev/null +++ b/lib/console/argtable3/arg_rex.c @@ -0,0 +1,1014 @@ +/* + * SPDX-FileCopyrightText: 1998-2001,2003-2011,2013 Stewart Heitmann + * + * SPDX-License-Identifier: BSD-3-Clause + */ +/******************************************************************************* + * arg_rex: Implements the regex command-line option + * + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * <sheitmann@users.sourceforge.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include "argtable3.h" + +#ifndef ARG_AMALGAMATION +#include "argtable3_private.h" +#endif + +#include <stdlib.h> +#include <string.h> + +#ifndef _TREX_H_ +#define _TREX_H_ + +/* + * This module uses the T-Rex regular expression library to implement the regex + * logic. Here is the copyright notice of the library: + * + * Copyright (C) 2003-2006 Alberto Demichelis + * + * This software is provided 'as-is', without any express + * or implied warranty. In no event will the authors be held + * liable for any damages arising from the use of this software. + * + * Permission is granted to anyone to use this software for + * any purpose, including commercial applications, and to alter + * it and redistribute it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; + * you must not claim that you wrote the original software. + * If you use this software in a product, an acknowledgment + * in the product documentation would be appreciated but + * is not required. + * + * 2. Altered source versions must be plainly marked as such, + * and must not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any + * source distribution. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define TRexChar char +#define MAX_CHAR 0xFF +#define _TREXC(c) (c) +#define trex_strlen strlen +#define trex_printf printf + +#ifndef TREX_API +#define TREX_API extern +#endif + +#define TRex_True 1 +#define TRex_False 0 + +#define TREX_ICASE ARG_REX_ICASE + +typedef unsigned int TRexBool; +typedef struct TRex TRex; + +typedef struct { + const TRexChar* begin; + int len; +} TRexMatch; + +#if defined(__clang__) +TREX_API TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags) __attribute__((optnone)); +#elif defined(__GNUC__) +TREX_API TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags) __attribute__((optimize(0))); +#else +TREX_API TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags); +#endif +TREX_API void trex_free(TRex* exp); +TREX_API TRexBool trex_match(TRex* exp, const TRexChar* text); +TREX_API TRexBool trex_search(TRex* exp, const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end); +TREX_API TRexBool +trex_searchrange(TRex* exp, const TRexChar* text_begin, const TRexChar* text_end, const TRexChar** out_begin, const TRexChar** out_end); +TREX_API int trex_getsubexpcount(TRex* exp); +TREX_API TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch* subexp); + +#ifdef __cplusplus +} +#endif + +#endif + +struct privhdr { + const char* pattern; + int flags; +}; + +static void arg_rex_resetfn(struct arg_rex* parent) { + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + +static int arg_rex_scanfn(struct arg_rex* parent, const char* argval) { + int errorcode = 0; + const TRexChar* error = NULL; + TRex* rex = NULL; + TRexBool is_match = TRex_False; + + if (parent->count == parent->hdr.maxcount) { + /* maximum number of arguments exceeded */ + errorcode = ARG_ERR_MAXCOUNT; + } else if (!argval) { + /* a valid argument with no argument value was given. */ + /* This happens when an optional argument value was invoked. */ + /* leave parent argument value unaltered but still count the argument. */ + parent->count++; + } else { + struct privhdr* priv = (struct privhdr*)parent->hdr.priv; + + /* test the current argument value for a match with the regular expression */ + /* if a match is detected, record the argument value in the arg_rex struct */ + + rex = trex_compile(priv->pattern, &error, priv->flags); + is_match = trex_match(rex, argval); + if (!is_match) + errorcode = ARG_ERR_REGNOMATCH; + else + parent->sval[parent->count++] = argval; + + trex_free(rex); + } + + ARG_TRACE(("%s:scanfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + +static int arg_rex_checkfn(struct arg_rex* parent) { + int errorcode = (parent->count < parent->hdr.mincount) ? ARG_ERR_MINCOUNT : 0; +#if 0 + struct privhdr *priv = (struct privhdr*)parent->hdr.priv; + + /* free the regex "program" we constructed in resetfn */ + regfree(&(priv->regex)); + + /*printf("%s:checkfn(%p) returns %d\n",__FILE__,parent,errorcode);*/ +#endif + return errorcode; +} + +static void arg_rex_errorfn(struct arg_rex* parent, arg_dstr_t ds, int errorcode, const char* argval, const char* progname) { + const char* shortopts = parent->hdr.shortopts; + const char* longopts = parent->hdr.longopts; + const char* datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + arg_dstr_catf(ds, "%s: ", progname); + switch (errorcode) { + case ARG_ERR_MINCOUNT: + arg_dstr_cat(ds, "missing option "); + arg_print_option_ds(ds, shortopts, longopts, datatype, "\n"); + break; + + case ARG_ERR_MAXCOUNT: + arg_dstr_cat(ds, "excess option "); + arg_print_option_ds(ds, shortopts, longopts, argval, "\n"); + break; + + case ARG_ERR_REGNOMATCH: + arg_dstr_cat(ds, "illegal value "); + arg_print_option_ds(ds, shortopts, longopts, argval, "\n"); + break; + + default: { + #if 0 + char errbuff[256]; + regerror(errorcode, NULL, errbuff, sizeof(errbuff)); + printf("%s\n", errbuff); + #endif + } break; + } +} + +struct arg_rex* arg_rex0(const char* shortopts, const char* longopts, const char* pattern, const char* datatype, int flags, const char* glossary) { + return arg_rexn(shortopts, longopts, pattern, datatype, 0, 1, flags, glossary); +} + +struct arg_rex* arg_rex1(const char* shortopts, const char* longopts, const char* pattern, const char* datatype, int flags, const char* glossary) { + return arg_rexn(shortopts, longopts, pattern, datatype, 1, 1, flags, glossary); +} + +struct arg_rex* arg_rexn(const char* shortopts, + const char* longopts, + const char* pattern, + const char* datatype, + int mincount, + int maxcount, + int flags, + const char* glossary) { + size_t nbytes; + struct arg_rex* result; + struct privhdr* priv; + int i; + const TRexChar* error = NULL; + TRex* rex = NULL; + + if (!pattern) { + printf("argtable: ERROR - illegal regular expression pattern \"(NULL)\"\n"); + printf("argtable: Bad argument table.\n"); + return NULL; + } + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + nbytes = sizeof(struct arg_rex) /* storage for struct arg_rex */ + + sizeof(struct privhdr) /* storage for private arg_rex data */ + + (size_t)maxcount * sizeof(char*); /* storage for sval[maxcount] array */ + + /* init the arg_hdr struct */ + result = (struct arg_rex*)xmalloc(nbytes); + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = datatype ? datatype : pattern; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn*)arg_rex_resetfn; + result->hdr.scanfn = (arg_scanfn*)arg_rex_scanfn; + result->hdr.checkfn = (arg_checkfn*)arg_rex_checkfn; + result->hdr.errorfn = (arg_errorfn*)arg_rex_errorfn; + + /* store the arg_rex_priv struct immediately after the arg_rex struct */ + result->hdr.priv = result + 1; + priv = (struct privhdr*)(result->hdr.priv); + priv->pattern = pattern; + priv->flags = flags; + + /* store the sval[maxcount] array immediately after the arg_rex_priv struct */ + result->sval = (const char**)(priv + 1); + result->count = 0; + + /* foolproof the string pointers by initializing them to reference empty strings */ + for (i = 0; i < maxcount; i++) + result->sval[i] = ""; + + /* here we construct and destroy a regex representation of the regular + * expression for no other reason than to force any regex errors to be + * trapped now rather than later. If we don't, then errors may go undetected + * until an argument is actually parsed. + */ + + rex = trex_compile(priv->pattern, &error, priv->flags); + if (rex == NULL) { + ARG_LOG(("argtable: %s \"%s\"\n", error ? error : _TREXC("undefined"), priv->pattern)); + ARG_LOG(("argtable: Bad argument table.\n")); + } + + trex_free(rex); + + ARG_TRACE(("arg_rexn() returns %p\n", result)); + return result; +} + +/* see copyright notice in trex.h */ +#include <ctype.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _UINCODE +#define scisprint iswprint +#define scstrlen wcslen +#define scprintf wprintf +#define _SC(x) L(x) +#else +#define scisprint isprint +#define scstrlen strlen +#define scprintf printf +#define _SC(x) (x) +#endif + +#ifdef ARG_REX_DEBUG +#include <stdio.h> + +static const TRexChar* g_nnames[] = {_SC("NONE"), _SC("OP_GREEDY"), _SC("OP_OR"), _SC("OP_EXPR"), _SC("OP_NOCAPEXPR"), + _SC("OP_DOT"), _SC("OP_CLASS"), _SC("OP_CCLASS"), _SC("OP_NCLASS"), _SC("OP_RANGE"), + _SC("OP_CHAR"), _SC("OP_EOL"), _SC("OP_BOL"), _SC("OP_WB")}; + +#endif +#define OP_GREEDY (MAX_CHAR + 1) /* * + ? {n} */ +#define OP_OR (MAX_CHAR + 2) +#define OP_EXPR (MAX_CHAR + 3) /* parentesis () */ +#define OP_NOCAPEXPR (MAX_CHAR + 4) /* parentesis (?:) */ +#define OP_DOT (MAX_CHAR + 5) +#define OP_CLASS (MAX_CHAR + 6) +#define OP_CCLASS (MAX_CHAR + 7) +#define OP_NCLASS (MAX_CHAR + 8) /* negates class the [^ */ +#define OP_RANGE (MAX_CHAR + 9) +#define OP_CHAR (MAX_CHAR + 10) +#define OP_EOL (MAX_CHAR + 11) +#define OP_BOL (MAX_CHAR + 12) +#define OP_WB (MAX_CHAR + 13) + +#define TREX_SYMBOL_ANY_CHAR ('.') +#define TREX_SYMBOL_GREEDY_ONE_OR_MORE ('+') +#define TREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*') +#define TREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?') +#define TREX_SYMBOL_BRANCH ('|') +#define TREX_SYMBOL_END_OF_STRING ('$') +#define TREX_SYMBOL_BEGINNING_OF_STRING ('^') +#define TREX_SYMBOL_ESCAPE_CHAR ('\\') + +typedef int TRexNodeType; + +typedef struct tagTRexNode { + TRexNodeType type; + int left; + int right; + int next; +} TRexNode; + +struct TRex { + const TRexChar* _eol; + const TRexChar* _bol; + const TRexChar* _p; + int _first; + int _op; + TRexNode* _nodes; + int _nallocated; + int _nsize; + int _nsubexpr; + TRexMatch* _matches; + int _currsubexp; + void* _jmpbuf; + const TRexChar** _error; + int _flags; +}; + +static int trex_list(TRex* exp); + +static int trex_newnode(TRex* exp, TRexNodeType type) { + TRexNode n; + int newid; + n.type = type; + n.next = n.right = n.left = -1; + if (type == OP_EXPR) + n.right = exp->_nsubexpr++; + if (exp->_nallocated < (exp->_nsize + 1)) { + exp->_nallocated *= 2; + exp->_nodes = (TRexNode*)xrealloc(exp->_nodes, (size_t)exp->_nallocated * sizeof(TRexNode)); + } + exp->_nodes[exp->_nsize++] = n; + newid = exp->_nsize - 1; + return (int)newid; +} + +static void trex_error(TRex* exp, const TRexChar* error) { + if (exp->_error) + *exp->_error = error; + longjmp(*((jmp_buf*)exp->_jmpbuf), -1); +} + +static void trex_expect(TRex* exp, int n) { + if ((*exp->_p) != n) + trex_error(exp, _SC("expected paren")); + exp->_p++; +} + +static TRexChar trex_escapechar(TRex* exp) { + if (*exp->_p == TREX_SYMBOL_ESCAPE_CHAR) { + exp->_p++; + switch (*exp->_p) { + case 'v': + exp->_p++; + return '\v'; + case 'n': + exp->_p++; + return '\n'; + case 't': + exp->_p++; + return '\t'; + case 'r': + exp->_p++; + return '\r'; + case 'f': + exp->_p++; + return '\f'; + default: + return (*exp->_p++); + } + } else if (!scisprint((int)(*exp->_p))) + trex_error(exp, _SC("letter expected")); + return (*exp->_p++); +} + +static int trex_charclass(TRex* exp, int classid) { + int n = trex_newnode(exp, OP_CCLASS); + exp->_nodes[n].left = classid; + return n; +} + +static int trex_charnode(TRex* exp, TRexBool isclass) { + TRexChar t; + if (*exp->_p == TREX_SYMBOL_ESCAPE_CHAR) { + exp->_p++; + switch (*exp->_p) { + case 'n': + exp->_p++; + return trex_newnode(exp, '\n'); + case 't': + exp->_p++; + return trex_newnode(exp, '\t'); + case 'r': + exp->_p++; + return trex_newnode(exp, '\r'); + case 'f': + exp->_p++; + return trex_newnode(exp, '\f'); + case 'v': + exp->_p++; + return trex_newnode(exp, '\v'); + case 'a': + case 'A': + case 'w': + case 'W': + case 's': + case 'S': + case 'd': + case 'D': + case 'x': + case 'X': + case 'c': + case 'C': + case 'p': + case 'P': + case 'l': + case 'u': { + t = *exp->_p; + exp->_p++; + return trex_charclass(exp, t); + } + case 'b': + case 'B': + if (!isclass) { + int node = trex_newnode(exp, OP_WB); + exp->_nodes[node].left = *exp->_p; + exp->_p++; + return node; + } + /* fall through */ + default: + t = *exp->_p; + exp->_p++; + return trex_newnode(exp, t); + } + } else if (!scisprint((int)(*exp->_p))) { + trex_error(exp, _SC("letter expected")); + } + t = *exp->_p; + exp->_p++; + return trex_newnode(exp, t); +} +static int trex_class(TRex* exp) { + int ret = -1; + int first = -1, chain; + if (*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING) { + ret = trex_newnode(exp, OP_NCLASS); + exp->_p++; + } else + ret = trex_newnode(exp, OP_CLASS); + + if (*exp->_p == ']') + trex_error(exp, _SC("empty class")); + chain = ret; + while (*exp->_p != ']' && exp->_p != exp->_eol) { + if (*exp->_p == '-' && first != -1) { + int r, t; + if (*exp->_p++ == ']') + trex_error(exp, _SC("unfinished range")); + r = trex_newnode(exp, OP_RANGE); + if (first > *exp->_p) + trex_error(exp, _SC("invalid range")); + if (exp->_nodes[first].type == OP_CCLASS) + trex_error(exp, _SC("cannot use character classes in ranges")); + exp->_nodes[r].left = exp->_nodes[first].type; + t = trex_escapechar(exp); + exp->_nodes[r].right = t; + exp->_nodes[chain].next = r; + chain = r; + first = -1; + } else { + if (first != -1) { + int c = first; + exp->_nodes[chain].next = c; + chain = c; + first = trex_charnode(exp, TRex_True); + } else { + first = trex_charnode(exp, TRex_True); + } + } + } + if (first != -1) { + int c = first; + exp->_nodes[chain].next = c; + chain = c; + first = -1; + } + /* hack? */ + exp->_nodes[ret].left = exp->_nodes[ret].next; + exp->_nodes[ret].next = -1; + return ret; +} + +static int trex_parsenumber(TRex* exp) { + int ret = *exp->_p - '0'; + int positions = 10; + exp->_p++; + while (isdigit((int)(*exp->_p))) { + ret = ret * 10 + (*exp->_p++ - '0'); + if (positions == 1000000000) + trex_error(exp, _SC("overflow in numeric constant")); + positions *= 10; + }; + return ret; +} + +static int trex_element(TRex* exp) { + int ret = -1; + switch (*exp->_p) { + case '(': { + int expr, newn; + exp->_p++; + + if (*exp->_p == '?') { + exp->_p++; + trex_expect(exp, ':'); + expr = trex_newnode(exp, OP_NOCAPEXPR); + } else + expr = trex_newnode(exp, OP_EXPR); + newn = trex_list(exp); + exp->_nodes[expr].left = newn; + ret = expr; + trex_expect(exp, ')'); + } break; + case '[': + exp->_p++; + ret = trex_class(exp); + trex_expect(exp, ']'); + break; + case TREX_SYMBOL_END_OF_STRING: + exp->_p++; + ret = trex_newnode(exp, OP_EOL); + break; + case TREX_SYMBOL_ANY_CHAR: + exp->_p++; + ret = trex_newnode(exp, OP_DOT); + break; + default: + ret = trex_charnode(exp, TRex_False); + break; + } + + { + TRexBool isgreedy = TRex_False; + unsigned short p0 = 0, p1 = 0; + switch (*exp->_p) { + case TREX_SYMBOL_GREEDY_ZERO_OR_MORE: + p0 = 0; + p1 = 0xFFFF; + exp->_p++; + isgreedy = TRex_True; + break; + case TREX_SYMBOL_GREEDY_ONE_OR_MORE: + p0 = 1; + p1 = 0xFFFF; + exp->_p++; + isgreedy = TRex_True; + break; + case TREX_SYMBOL_GREEDY_ZERO_OR_ONE: + p0 = 0; + p1 = 1; + exp->_p++; + isgreedy = TRex_True; + break; + case '{': + exp->_p++; + if (!isdigit((int)(*exp->_p))) + trex_error(exp, _SC("number expected")); + p0 = (unsigned short)trex_parsenumber(exp); + /*******************************/ + switch (*exp->_p) { + case '}': + p1 = p0; + exp->_p++; + break; + case ',': + exp->_p++; + p1 = 0xFFFF; + if (isdigit((int)(*exp->_p))) { + p1 = (unsigned short)trex_parsenumber(exp); + } + trex_expect(exp, '}'); + break; + default: + trex_error(exp, _SC(", or } expected")); + } + /*******************************/ + isgreedy = TRex_True; + break; + } + if (isgreedy) { + int nnode = trex_newnode(exp, OP_GREEDY); + exp->_nodes[nnode].left = ret; + exp->_nodes[nnode].right = ((p0) << 16) | p1; + ret = nnode; + } + } + if ((*exp->_p != TREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != TREX_SYMBOL_GREEDY_ZERO_OR_MORE) && + (*exp->_p != TREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) { + int nnode = trex_element(exp); + exp->_nodes[ret].next = nnode; + } + + return ret; +} + +static int trex_list(TRex* exp) { + int ret = -1, e; + if (*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING) { + exp->_p++; + ret = trex_newnode(exp, OP_BOL); + } + e = trex_element(exp); + if (ret != -1) { + exp->_nodes[ret].next = e; + } else + ret = e; + + if (*exp->_p == TREX_SYMBOL_BRANCH) { + int temp, tright; + exp->_p++; + temp = trex_newnode(exp, OP_OR); + exp->_nodes[temp].left = ret; + tright = trex_list(exp); + exp->_nodes[temp].right = tright; + ret = temp; + } + return ret; +} + +static TRexBool trex_matchcclass(int cclass, TRexChar c) { + switch (cclass) { + case 'a': + return isalpha(c) ? TRex_True : TRex_False; + case 'A': + return !isalpha(c) ? TRex_True : TRex_False; + case 'w': + return (isalnum(c) || c == '_') ? TRex_True : TRex_False; + case 'W': + return (!isalnum(c) && c != '_') ? TRex_True : TRex_False; + case 's': + return isspace(c) ? TRex_True : TRex_False; + case 'S': + return !isspace(c) ? TRex_True : TRex_False; + case 'd': + return isdigit(c) ? TRex_True : TRex_False; + case 'D': + return !isdigit(c) ? TRex_True : TRex_False; + case 'x': + return isxdigit(c) ? TRex_True : TRex_False; + case 'X': + return !isxdigit(c) ? TRex_True : TRex_False; + case 'c': + return iscntrl(c) ? TRex_True : TRex_False; + case 'C': + return !iscntrl(c) ? TRex_True : TRex_False; + case 'p': + return ispunct(c) ? TRex_True : TRex_False; + case 'P': + return !ispunct(c) ? TRex_True : TRex_False; + case 'l': + return islower(c) ? TRex_True : TRex_False; + case 'u': + return isupper(c) ? TRex_True : TRex_False; + } + return TRex_False; /*cannot happen*/ +} + +static TRexBool trex_matchclass(TRex* exp, TRexNode* node, TRexChar c) { + do { + switch (node->type) { + case OP_RANGE: + if (exp->_flags & TREX_ICASE) { + if (c >= toupper(node->left) && c <= toupper(node->right)) + return TRex_True; + if (c >= tolower(node->left) && c <= tolower(node->right)) + return TRex_True; + } else { + if (c >= node->left && c <= node->right) + return TRex_True; + } + break; + case OP_CCLASS: + if (trex_matchcclass(node->left, c)) + return TRex_True; + break; + default: + if (exp->_flags & TREX_ICASE) { + if (c == tolower(node->type) || c == toupper(node->type)) + return TRex_True; + } else { + if (c == node->type) + return TRex_True; + } + } + } while ((node->next != -1) && ((node = &exp->_nodes[node->next]) != NULL)); + return TRex_False; +} + +static const TRexChar* trex_matchnode(TRex* exp, TRexNode* node, const TRexChar* str, TRexNode* next) { + TRexNodeType type = node->type; + switch (type) { + case OP_GREEDY: { + /* TRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL; */ + TRexNode* greedystop = NULL; + int p0 = (node->right >> 16) & 0x0000FFFF, p1 = node->right & 0x0000FFFF, nmaches = 0; + const TRexChar *s = str, *good = str; + + if (node->next != -1) { + greedystop = &exp->_nodes[node->next]; + } else { + greedystop = next; + } + + while ((nmaches == 0xFFFF || nmaches < p1)) { + const TRexChar* stop; + if ((s = trex_matchnode(exp, &exp->_nodes[node->left], s, greedystop)) == NULL) + break; + nmaches++; + good = s; + if (greedystop) { + /* checks that 0 matches satisfy the expression(if so skips) */ + /* if not would always stop(for instance if is a '?') */ + if (greedystop->type != OP_GREEDY || (greedystop->type == OP_GREEDY && ((greedystop->right >> 16) & 0x0000FFFF) != 0)) { + TRexNode* gnext = NULL; + if (greedystop->next != -1) { + gnext = &exp->_nodes[greedystop->next]; + } else if (next && next->next != -1) { + gnext = &exp->_nodes[next->next]; + } + stop = trex_matchnode(exp, greedystop, s, gnext); + if (stop) { + /* if satisfied stop it */ + if (p0 == p1 && p0 == nmaches) + break; + else if (nmaches >= p0 && p1 == 0xFFFF) + break; + else if (nmaches >= p0 && nmaches <= p1) + break; + } + } + } + + if (s >= exp->_eol) + break; + } + if (p0 == p1 && p0 == nmaches) + return good; + else if (nmaches >= p0 && p1 == 0xFFFF) + return good; + else if (nmaches >= p0 && nmaches <= p1) + return good; + return NULL; + } + case OP_OR: { + const TRexChar* asd = str; + TRexNode* temp = &exp->_nodes[node->left]; + while ((asd = trex_matchnode(exp, temp, asd, NULL)) != NULL) { + if (temp->next != -1) + temp = &exp->_nodes[temp->next]; + else + return asd; + } + asd = str; + temp = &exp->_nodes[node->right]; + while ((asd = trex_matchnode(exp, temp, asd, NULL)) != NULL) { + if (temp->next != -1) + temp = &exp->_nodes[temp->next]; + else + return asd; + } + return NULL; + break; + } + case OP_EXPR: + case OP_NOCAPEXPR: { + TRexNode* n = &exp->_nodes[node->left]; + const TRexChar* cur = str; + int capture = -1; + if (node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) { + capture = exp->_currsubexp; + exp->_matches[capture].begin = cur; + exp->_currsubexp++; + } + + do { + TRexNode* subnext = NULL; + if (n->next != -1) { + subnext = &exp->_nodes[n->next]; + } else { + subnext = next; + } + if ((cur = trex_matchnode(exp, n, cur, subnext)) == NULL) { + if (capture != -1) { + exp->_matches[capture].begin = 0; + exp->_matches[capture].len = 0; + } + return NULL; + } + } while ((n->next != -1) && ((n = &exp->_nodes[n->next]) != NULL)); + + if (capture != -1) + exp->_matches[capture].len = (int)(cur - exp->_matches[capture].begin); + return cur; + } + case OP_WB: + if ((str == exp->_bol && !isspace((int)(*str))) || (str == exp->_eol && !isspace((int)(*(str - 1)))) || (!isspace((int)(*str)) && isspace((int)(*(str + 1)))) || + (isspace((int)(*str)) && !isspace((int)(*(str + 1))))) { + return (node->left == 'b') ? str : NULL; + } + return (node->left == 'b') ? NULL : str; + case OP_BOL: + if (str == exp->_bol) + return str; + return NULL; + case OP_EOL: + if (str == exp->_eol) + return str; + return NULL; + case OP_DOT: { + str++; + } + return str; + case OP_NCLASS: + case OP_CLASS: + if (trex_matchclass(exp, &exp->_nodes[node->left], *str) ? (type == OP_CLASS ? TRex_True : TRex_False) + : (type == OP_NCLASS ? TRex_True : TRex_False)) { + str++; + return str; + } + return NULL; + case OP_CCLASS: + if (trex_matchcclass(node->left, *str)) { + str++; + return str; + } + return NULL; + default: /* char */ + if (exp->_flags & TREX_ICASE) { + if (*str != tolower(node->type) && *str != toupper(node->type)) + return NULL; + } else { + if (*str != node->type) + return NULL; + } + str++; + return str; + } +} + +/* public api */ +TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags) { + TRex* exp = (TRex*)xmalloc(sizeof(TRex)); + exp->_eol = exp->_bol = NULL; + exp->_p = pattern; + exp->_nallocated = (int)(scstrlen(pattern) * sizeof(TRexChar)); + exp->_nodes = (TRexNode*)xmalloc((size_t)exp->_nallocated * sizeof(TRexNode)); + exp->_nsize = 0; + exp->_matches = 0; + exp->_nsubexpr = 0; + exp->_first = trex_newnode(exp, OP_EXPR); + exp->_error = error; + exp->_jmpbuf = xmalloc(sizeof(jmp_buf)); + exp->_flags = flags; + if (setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) { + int res = trex_list(exp); + exp->_nodes[exp->_first].left = res; + if (*exp->_p != '\0') + trex_error(exp, _SC("unexpected character")); +#ifdef ARG_REX_DEBUG + { + int nsize, i; + nsize = exp->_nsize; + scprintf(_SC("\n")); + for (i = 0; i < nsize; i++) { + if (exp->_nodes[i].type > MAX_CHAR) + scprintf(_SC("[%02d] %10s "), i, g_nnames[exp->_nodes[i].type - MAX_CHAR]); + else + scprintf(_SC("[%02d] %10c "), i, exp->_nodes[i].type); + scprintf(_SC("left %02d right %02d next %02d\n"), exp->_nodes[i].left, exp->_nodes[i].right, exp->_nodes[i].next); + } + scprintf(_SC("\n")); + } +#endif + exp->_matches = (TRexMatch*)xmalloc((size_t)exp->_nsubexpr * sizeof(TRexMatch)); + memset(exp->_matches, 0, (size_t)exp->_nsubexpr * sizeof(TRexMatch)); + } else { + trex_free(exp); + return NULL; + } + return exp; +} + +void trex_free(TRex* exp) { + if (exp) { + xfree(exp->_nodes); + xfree(exp->_jmpbuf); + xfree(exp->_matches); + xfree(exp); + } +} + +TRexBool trex_match(TRex* exp, const TRexChar* text) { + const TRexChar* res = NULL; + exp->_bol = text; + exp->_eol = text + scstrlen(text); + exp->_currsubexp = 0; + res = trex_matchnode(exp, exp->_nodes, text, NULL); + if (res == NULL || res != exp->_eol) + return TRex_False; + return TRex_True; +} + +TRexBool trex_searchrange(TRex* exp, const TRexChar* text_begin, const TRexChar* text_end, const TRexChar** out_begin, const TRexChar** out_end) { + const TRexChar* cur = NULL; + int node = exp->_first; + if (text_begin >= text_end) + return TRex_False; + exp->_bol = text_begin; + exp->_eol = text_end; + do { + cur = text_begin; + while (node != -1) { + exp->_currsubexp = 0; + cur = trex_matchnode(exp, &exp->_nodes[node], cur, NULL); + if (!cur) + break; + node = exp->_nodes[node].next; + } + text_begin++; + } while (cur == NULL && text_begin != text_end); + + if (cur == NULL) + return TRex_False; + + --text_begin; + + if (out_begin) + *out_begin = text_begin; + if (out_end) + *out_end = cur; + return TRex_True; +} + +TRexBool trex_search(TRex* exp, const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end) { + return trex_searchrange(exp, text, text + scstrlen(text), out_begin, out_end); +} + +int trex_getsubexpcount(TRex* exp) { + return exp->_nsubexpr; +} + +TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch* subexp) { + if (n < 0 || n >= exp->_nsubexpr) + return TRex_False; + *subexp = exp->_matches[n]; + return TRex_True; +} |
