From 19467bd5d5c104755dbf81981bcb9c91b5b330a1 Mon Sep 17 00:00:00 2001 From: Dirk Zimoch Date: Tue, 10 Jul 2018 11:07:29 +0200 Subject: [PATCH] allow literal \0 - \9 in regsub if there is no matching sub-expression --- doc/formats.html | 19 +++++++++----- src/RegexpConverter.cc | 59 +++++++++++++++++++++++------------------- src/devStream.h | 2 +- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/doc/formats.html b/doc/formats.html index 5fdf49a..425603e 100644 --- a/doc/formats.html +++ b/doc/formats.html @@ -564,8 +564,8 @@ in architecture specific RELEASE.Common.arch files. If the regular expression is not anchored, i.e. does not start with ^, leading non-matching input is skipped. A maximum of width bytes is matched, if specified. -If precision is given, it specifies the sub-expression whose match -is retuned. +If precision is given, it specifies the sub-expression in () +whose match is retuned. Otherwise the complete match is returned. In any case, the complete match is consumed from the input buffer. If the expression contains a / it must be escaped like \/. @@ -586,12 +586,19 @@ as a post-processor for output.

Matches of the regex are replaced by the string subst with all -& or \0 in subst replaced with the match itself and all -\1 through \9 replaced with the match of the corresponding sub-expression. +& in subst replaced with the match itself and all +\1 through \9 replaced with the match of the corresponding +sub-expression if such a sub-expression exists. + +Due to limitations of the parser, \1 and \x01 are the same +which makes it difficult to use literal bytes with values lower than 10 in subst. +Therefore \0 aways means a literal byte (incompatible change from earlier version!) +and \1 through \9 mean literal bytes if they are larger than +the number of sub-expressions. + + To get a literal & or \ or / in the substitution write \& or \\ or \/. -There is no way to specify literal bytes with values less or equal to 9 in the -substitution!

If width is specified, it limits the number of characters processed. diff --git a/src/RegexpConverter.cc b/src/RegexpConverter.cc index 542a978..a49df4f 100644 --- a/src/RegexpConverter.cc +++ b/src/RegexpConverter.cc @@ -32,7 +32,6 @@ run-time leak. - A maximum of 9 subexpressions is supported. Only one of them can be the result of the match. - - vxWorks and maybe other OS don't have a PCRE library. Provide one? */ class RegexpConverter : public StreamFormatConverter @@ -54,9 +53,9 @@ parse(const StreamFormat& fmt, StreamBuffer& info, } if (fmt.prec > 9) { - error("Subexpression index %d too big (>9)\n", fmt.prec); + error("Sub-expression index %d too big (>9)\n", fmt.prec); return false; - } + } StreamBuffer pattern; while (*source != '/') @@ -81,22 +80,30 @@ parse(const StreamFormat& fmt, StreamBuffer& info, } source++; debug("regexp = \"%s\"\n", pattern.expand()()); - + const char* errormsg; int eoffset; - pcre* code = pcre_compile(pattern(), 0, - &errormsg, &eoffset, NULL); + int nsubexpr; + + pcre* code = pcre_compile(pattern(), 0, &errormsg, &eoffset, NULL); if (!code) { error("%s after \"%s\"\n", errormsg, pattern.expand(0, eoffset)()); return false; } + pcre_fullinfo(code, NULL, PCRE_INFO_CAPTURECOUNT, &nsubexpr); + if (fmt.prec > nsubexpr) + { + error("Sub-expression index is %d but pattern has only %d sub-expression\n", fmt.prec, nsubexpr); + return false; + } info.append(&code, sizeof(code)); if (fmt.flags & alt_flag) { StreamBuffer subst; - debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()()); + + debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()()); while (*source != '/') { if (!*source) { @@ -122,15 +129,15 @@ scanString(const StreamFormat& fmt, const char* input, int ovector[30]; int rc; unsigned int l; - + const char* info = fmt.info; pcre* code = extract(info); int length = fmt.width > 0 ? fmt.width : strlen(input); int subexpr = fmt.prec > 0 ? fmt.prec : 0; - + debug("input = \"%s\"\n", input); debug("length=%d\n", length); - + rc = pcre_exec(code, NULL, input, length, 0, 0, ovector, 30); debug("pcre_exec match \"%.*s\" result = %d\n", length, input, rc); if ((subexpr && rc <= subexpr) || rc < 0) @@ -152,7 +159,7 @@ scanString(const StreamFormat& fmt, const char* input, } memcpy(value, input + ovector[subexpr*2], l); value[l] = '\0'; - return ovector[1]; // consume input until end of match + return ovector[1]; // consume input until end of match } static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start) @@ -167,19 +174,19 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start) length = buffer.length() - start; if (fmt.width && fmt.width < length) length = fmt.width; - if (fmt.flags & sign_flag) + if (fmt.flags & left_flag) start = buffer.length() - length; debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\"\n", - buffer.expand()(), start, length, subst); - + buffer.expand()(), start, length, StreamBuffer(subst).expand()()); + for (c = 0, n = 1; c < length; n++) { rc = pcre_exec(code, NULL, buffer(start+c), length-c, 0, 0, ovector, 30); - debug("pcre_exec match \"%.*s\" result = %d\n", (int)length-c, buffer(start+c), rc); - if (rc < 0) // no match + debug("pcre_exec match \"%s\" result = %d\n", buffer.expand(start+c, length-c)(), rc); + if (rc < 0) // no match return; - + if (!(fmt.flags & sign_flag) && n < fmt.prec) // without + flag { // do not yet replace this match @@ -188,24 +195,24 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start) } // replace & by match in subst l = ovector[1] - ovector[0]; - debug("start = \"%s\"\n", buffer(start+c)); - debug("match = \"%.*s\"\n", l, buffer(start+c+ovector[0])); + debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])()); + debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)()); for (r = 1; r < rc; r++) - debug("sub%d = \"%.*s\"\n", r, ovector[r*2+1]-ovector[r*2], buffer(start+c+ovector[r*2])); - debug("rest = \"%s\"\n", buffer(start+c+ovector[1])); + debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])()); + debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])()); s = subst; - debug("subs = \"%s\"\n", s.expand()()); + debug("subs = \"%s\"\n", s.expand()()); for (r = 0; r < s.length(); r++) { debug("check \"%s\"\n", s.expand(r)()); if (s[r] == esc) { unsigned char ch = s[r+1]; - if (ch < 9) // escaped 0 - 9 : replace with subexpr + if (c != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr { ch *= 2; rl = ovector[ch+1] - ovector[ch]; - debug("replace \\%d: \"%.*s\"\n", ch/2, rl, buffer(start+c+ovector[ch])); + debug("replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)()); s.replace(r, 2, buffer(start+c+ovector[ch]), rl); r += rl - 1; } @@ -214,12 +221,12 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start) } else if (s[r] == '&') // unescaped & : replace with match { - debug("replace &: \"%.*s\"\n", l, buffer(start+c+ovector[0])); + debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)()); s.replace(r, 1, buffer(start+c+ovector[0]), l); r += l - 1; } else continue; - debug("subs = \"%s\"\n", s()); + debug("subs = \"%s\"\n", s.expand()()); } buffer.replace(start+c+ovector[0], l, s); length += s.length() - l; diff --git a/src/devStream.h b/src/devStream.h index a69b17c..2a7b8cb 100644 --- a/src/devStream.h +++ b/src/devStream.h @@ -23,7 +23,7 @@ #define STREAM_MAJOR 2 #define STREAM_MINOR 7 -#define STREAM_PATCHLEVEL 12 +#define STREAM_PATCHLEVEL 13 #if defined(__vxworks) || defined(vxWorks) #include