allow literal \0 - \9 in regsub if there is no matching sub-expression

This commit is contained in:
2018-07-10 11:07:29 +02:00
parent d69c74bc8f
commit 19467bd5d5
3 changed files with 47 additions and 33 deletions

View File

@ -564,8 +564,8 @@ in architecture specific RELEASE.Common.<em>arch</em> files.
If the regular expression is not anchored, i.e. does not start with If the regular expression is not anchored, i.e. does not start with
<code>^</code>, leading non-matching input is skipped. <code>^</code>, leading non-matching input is skipped.
A maximum of <em>width</em> bytes is matched, if specified. A maximum of <em>width</em> bytes is matched, if specified.
If <em>precision</em> is given, it specifies the sub-expression whose match If <em>precision</em> is given, it specifies the sub-expression in <code>()</code>
is retuned. whose match is retuned.
Otherwise the complete match is returned. Otherwise the complete match is returned.
In any case, the complete match is consumed from the input buffer. In any case, the complete match is consumed from the input buffer.
If the expression contains a <code>/</code> it must be escaped like <code>\/</code>. If the expression contains a <code>/</code> it must be escaped like <code>\/</code>.
@ -586,12 +586,19 @@ as a post-processor for output.
</p> </p>
<p> <p>
Matches of the <em>regex</em> are replaced by the string <em>subst</em> with all Matches of the <em>regex</em> are replaced by the string <em>subst</em> with all
<code>&</code> or <code>\0</code> in <em>subst</em> replaced with the match itself and all <code>&</code> in <em>subst</em> replaced with the match itself and all
<code>\1</code> through <code>\9</code> replaced with the match of the corresponding sub-expression. <code>\1</code> through <code>\9</code> replaced with the match of the corresponding
sub-expression <span class="new"> if such a sub-expression exists.
Due to limitations of the parser, <code>\1</code> and <code>\x01</code> are the same
which makes it difficult to use literal bytes with values lower than 10 in <em>subst</em>.
Therefore <code>\0</code> aways means a literal byte (incompatible change from earlier version!)
and <code>\1</code> through <code>\9</code> mean literal bytes if they are larger than
the number of sub-expressions.
</span>
To get a literal <code>&</code> or <code>\</code> or <code>/</code> in the substitution write To get a literal <code>&</code> or <code>\</code> or <code>/</code> in the substitution write
<code>\&</code> or <code>\\</code> or <code>\/</code>. <code>\&</code> or <code>\\</code> or <code>\/</code>.
There is no way to specify literal bytes with values less or equal to 9 in the
substitution!
</p> </p>
<p> <p>
If <em>width</em> is specified, it limits the number of characters processed. If <em>width</em> is specified, it limits the number of characters processed.

View File

@ -32,7 +32,6 @@
run-time leak. run-time leak.
- A maximum of 9 subexpressions is supported. Only one of them can - A maximum of 9 subexpressions is supported. Only one of them can
be the result of the match. be the result of the match.
- vxWorks and maybe other OS don't have a PCRE library. Provide one?
*/ */
class RegexpConverter : public StreamFormatConverter class RegexpConverter : public StreamFormatConverter
@ -54,7 +53,7 @@ parse(const StreamFormat& fmt, StreamBuffer& info,
} }
if (fmt.prec > 9) if (fmt.prec > 9)
{ {
error("Subexpression index %d too big (>9)\n", fmt.prec); error("Sub-expression index %d too big (>9)\n", fmt.prec);
return false; return false;
} }
@ -84,18 +83,26 @@ parse(const StreamFormat& fmt, StreamBuffer& info,
const char* errormsg; const char* errormsg;
int eoffset; int eoffset;
pcre* code = pcre_compile(pattern(), 0, int nsubexpr;
&errormsg, &eoffset, NULL);
pcre* code = pcre_compile(pattern(), 0, &errormsg, &eoffset, NULL);
if (!code) if (!code)
{ {
error("%s after \"%s\"\n", errormsg, pattern.expand(0, eoffset)()); error("%s after \"%s\"\n", errormsg, pattern.expand(0, eoffset)());
return false; return false;
} }
pcre_fullinfo(code, NULL, PCRE_INFO_CAPTURECOUNT, &nsubexpr);
if (fmt.prec > nsubexpr)
{
error("Sub-expression index is %d but pattern has only %d sub-expression\n", fmt.prec, nsubexpr);
return false;
}
info.append(&code, sizeof(code)); info.append(&code, sizeof(code));
if (fmt.flags & alt_flag) if (fmt.flags & alt_flag)
{ {
StreamBuffer subst; StreamBuffer subst;
debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()()); debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()());
while (*source != '/') while (*source != '/')
{ {
@ -167,16 +174,16 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
length = buffer.length() - start; length = buffer.length() - start;
if (fmt.width && fmt.width < length) if (fmt.width && fmt.width < length)
length = fmt.width; length = fmt.width;
if (fmt.flags & sign_flag) if (fmt.flags & left_flag)
start = buffer.length() - length; start = buffer.length() - length;
debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\"\n", debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\"\n",
buffer.expand()(), start, length, subst); buffer.expand()(), start, length, StreamBuffer(subst).expand()());
for (c = 0, n = 1; c < length; n++) for (c = 0, n = 1; c < length; n++)
{ {
rc = pcre_exec(code, NULL, buffer(start+c), length-c, 0, 0, ovector, 30); rc = pcre_exec(code, NULL, buffer(start+c), length-c, 0, 0, ovector, 30);
debug("pcre_exec match \"%.*s\" result = %d\n", (int)length-c, buffer(start+c), rc); debug("pcre_exec match \"%s\" result = %d\n", buffer.expand(start+c, length-c)(), rc);
if (rc < 0) // no match if (rc < 0) // no match
return; return;
@ -188,11 +195,11 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
} }
// replace & by match in subst // replace & by match in subst
l = ovector[1] - ovector[0]; l = ovector[1] - ovector[0];
debug("start = \"%s\"\n", buffer(start+c)); debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
debug("match = \"%.*s\"\n", l, buffer(start+c+ovector[0])); debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
for (r = 1; r < rc; r++) for (r = 1; r < rc; r++)
debug("sub%d = \"%.*s\"\n", r, ovector[r*2+1]-ovector[r*2], buffer(start+c+ovector[r*2])); debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
debug("rest = \"%s\"\n", buffer(start+c+ovector[1])); debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
s = subst; s = subst;
debug("subs = \"%s\"\n", s.expand()()); debug("subs = \"%s\"\n", s.expand()());
for (r = 0; r < s.length(); r++) for (r = 0; r < s.length(); r++)
@ -201,11 +208,11 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
if (s[r] == esc) if (s[r] == esc)
{ {
unsigned char ch = s[r+1]; unsigned char ch = s[r+1];
if (ch < 9) // escaped 0 - 9 : replace with subexpr if (c != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
{ {
ch *= 2; ch *= 2;
rl = ovector[ch+1] - ovector[ch]; rl = ovector[ch+1] - ovector[ch];
debug("replace \\%d: \"%.*s\"\n", ch/2, rl, buffer(start+c+ovector[ch])); debug("replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
s.replace(r, 2, buffer(start+c+ovector[ch]), rl); s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
r += rl - 1; r += rl - 1;
} }
@ -214,12 +221,12 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
} }
else if (s[r] == '&') // unescaped & : replace with match else if (s[r] == '&') // unescaped & : replace with match
{ {
debug("replace &: \"%.*s\"\n", l, buffer(start+c+ovector[0])); debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
s.replace(r, 1, buffer(start+c+ovector[0]), l); s.replace(r, 1, buffer(start+c+ovector[0]), l);
r += l - 1; r += l - 1;
} }
else continue; else continue;
debug("subs = \"%s\"\n", s()); debug("subs = \"%s\"\n", s.expand()());
} }
buffer.replace(start+c+ovector[0], l, s); buffer.replace(start+c+ovector[0], l, s);
length += s.length() - l; length += s.length() - l;

View File

@ -23,7 +23,7 @@
#define STREAM_MAJOR 2 #define STREAM_MAJOR 2
#define STREAM_MINOR 7 #define STREAM_MINOR 7
#define STREAM_PATCHLEVEL 12 #define STREAM_PATCHLEVEL 13
#if defined(__vxworks) || defined(vxWorks) #if defined(__vxworks) || defined(vxWorks)
#include <vxWorks.h> #include <vxWorks.h>