allow literal \0 - \9 in regsub if there is no matching sub-expression
This commit is contained in:
@ -564,8 +564,8 @@ in architecture specific RELEASE.Common.<em>arch</em> files.
|
|||||||
If the regular expression is not anchored, i.e. does not start with
|
If the regular expression is not anchored, i.e. does not start with
|
||||||
<code>^</code>, leading non-matching input is skipped.
|
<code>^</code>, leading non-matching input is skipped.
|
||||||
A maximum of <em>width</em> bytes is matched, if specified.
|
A maximum of <em>width</em> bytes is matched, if specified.
|
||||||
If <em>precision</em> is given, it specifies the sub-expression whose match
|
If <em>precision</em> is given, it specifies the sub-expression in <code>()</code>
|
||||||
is retuned.
|
whose match is retuned.
|
||||||
Otherwise the complete match is returned.
|
Otherwise the complete match is returned.
|
||||||
In any case, the complete match is consumed from the input buffer.
|
In any case, the complete match is consumed from the input buffer.
|
||||||
If the expression contains a <code>/</code> it must be escaped like <code>\/</code>.
|
If the expression contains a <code>/</code> it must be escaped like <code>\/</code>.
|
||||||
@ -586,12 +586,19 @@ as a post-processor for output.
|
|||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
Matches of the <em>regex</em> are replaced by the string <em>subst</em> with all
|
Matches of the <em>regex</em> are replaced by the string <em>subst</em> with all
|
||||||
<code>&</code> or <code>\0</code> in <em>subst</em> replaced with the match itself and all
|
<code>&</code> in <em>subst</em> replaced with the match itself and all
|
||||||
<code>\1</code> through <code>\9</code> replaced with the match of the corresponding sub-expression.
|
<code>\1</code> through <code>\9</code> replaced with the match of the corresponding
|
||||||
|
sub-expression <span class="new"> if such a sub-expression exists.
|
||||||
|
|
||||||
|
Due to limitations of the parser, <code>\1</code> and <code>\x01</code> are the same
|
||||||
|
which makes it difficult to use literal bytes with values lower than 10 in <em>subst</em>.
|
||||||
|
Therefore <code>\0</code> aways means a literal byte (incompatible change from earlier version!)
|
||||||
|
and <code>\1</code> through <code>\9</code> mean literal bytes if they are larger than
|
||||||
|
the number of sub-expressions.
|
||||||
|
</span>
|
||||||
|
|
||||||
To get a literal <code>&</code> or <code>\</code> or <code>/</code> in the substitution write
|
To get a literal <code>&</code> or <code>\</code> or <code>/</code> in the substitution write
|
||||||
<code>\&</code> or <code>\\</code> or <code>\/</code>.
|
<code>\&</code> or <code>\\</code> or <code>\/</code>.
|
||||||
There is no way to specify literal bytes with values less or equal to 9 in the
|
|
||||||
substitution!
|
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
If <em>width</em> is specified, it limits the number of characters processed.
|
If <em>width</em> is specified, it limits the number of characters processed.
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
run-time leak.
|
run-time leak.
|
||||||
- A maximum of 9 subexpressions is supported. Only one of them can
|
- A maximum of 9 subexpressions is supported. Only one of them can
|
||||||
be the result of the match.
|
be the result of the match.
|
||||||
- vxWorks and maybe other OS don't have a PCRE library. Provide one?
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class RegexpConverter : public StreamFormatConverter
|
class RegexpConverter : public StreamFormatConverter
|
||||||
@ -54,7 +53,7 @@ parse(const StreamFormat& fmt, StreamBuffer& info,
|
|||||||
}
|
}
|
||||||
if (fmt.prec > 9)
|
if (fmt.prec > 9)
|
||||||
{
|
{
|
||||||
error("Subexpression index %d too big (>9)\n", fmt.prec);
|
error("Sub-expression index %d too big (>9)\n", fmt.prec);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,18 +83,26 @@ parse(const StreamFormat& fmt, StreamBuffer& info,
|
|||||||
|
|
||||||
const char* errormsg;
|
const char* errormsg;
|
||||||
int eoffset;
|
int eoffset;
|
||||||
pcre* code = pcre_compile(pattern(), 0,
|
int nsubexpr;
|
||||||
&errormsg, &eoffset, NULL);
|
|
||||||
|
pcre* code = pcre_compile(pattern(), 0, &errormsg, &eoffset, NULL);
|
||||||
if (!code)
|
if (!code)
|
||||||
{
|
{
|
||||||
error("%s after \"%s\"\n", errormsg, pattern.expand(0, eoffset)());
|
error("%s after \"%s\"\n", errormsg, pattern.expand(0, eoffset)());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
pcre_fullinfo(code, NULL, PCRE_INFO_CAPTURECOUNT, &nsubexpr);
|
||||||
|
if (fmt.prec > nsubexpr)
|
||||||
|
{
|
||||||
|
error("Sub-expression index is %d but pattern has only %d sub-expression\n", fmt.prec, nsubexpr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
info.append(&code, sizeof(code));
|
info.append(&code, sizeof(code));
|
||||||
|
|
||||||
if (fmt.flags & alt_flag)
|
if (fmt.flags & alt_flag)
|
||||||
{
|
{
|
||||||
StreamBuffer subst;
|
StreamBuffer subst;
|
||||||
|
|
||||||
debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()());
|
debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()());
|
||||||
while (*source != '/')
|
while (*source != '/')
|
||||||
{
|
{
|
||||||
@ -167,16 +174,16 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
|
|||||||
length = buffer.length() - start;
|
length = buffer.length() - start;
|
||||||
if (fmt.width && fmt.width < length)
|
if (fmt.width && fmt.width < length)
|
||||||
length = fmt.width;
|
length = fmt.width;
|
||||||
if (fmt.flags & sign_flag)
|
if (fmt.flags & left_flag)
|
||||||
start = buffer.length() - length;
|
start = buffer.length() - length;
|
||||||
|
|
||||||
debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\"\n",
|
debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\"\n",
|
||||||
buffer.expand()(), start, length, subst);
|
buffer.expand()(), start, length, StreamBuffer(subst).expand()());
|
||||||
|
|
||||||
for (c = 0, n = 1; c < length; n++)
|
for (c = 0, n = 1; c < length; n++)
|
||||||
{
|
{
|
||||||
rc = pcre_exec(code, NULL, buffer(start+c), length-c, 0, 0, ovector, 30);
|
rc = pcre_exec(code, NULL, buffer(start+c), length-c, 0, 0, ovector, 30);
|
||||||
debug("pcre_exec match \"%.*s\" result = %d\n", (int)length-c, buffer(start+c), rc);
|
debug("pcre_exec match \"%s\" result = %d\n", buffer.expand(start+c, length-c)(), rc);
|
||||||
if (rc < 0) // no match
|
if (rc < 0) // no match
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -188,11 +195,11 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
|
|||||||
}
|
}
|
||||||
// replace & by match in subst
|
// replace & by match in subst
|
||||||
l = ovector[1] - ovector[0];
|
l = ovector[1] - ovector[0];
|
||||||
debug("start = \"%s\"\n", buffer(start+c));
|
debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
|
||||||
debug("match = \"%.*s\"\n", l, buffer(start+c+ovector[0]));
|
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
|
||||||
for (r = 1; r < rc; r++)
|
for (r = 1; r < rc; r++)
|
||||||
debug("sub%d = \"%.*s\"\n", r, ovector[r*2+1]-ovector[r*2], buffer(start+c+ovector[r*2]));
|
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
|
||||||
debug("rest = \"%s\"\n", buffer(start+c+ovector[1]));
|
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
|
||||||
s = subst;
|
s = subst;
|
||||||
debug("subs = \"%s\"\n", s.expand()());
|
debug("subs = \"%s\"\n", s.expand()());
|
||||||
for (r = 0; r < s.length(); r++)
|
for (r = 0; r < s.length(); r++)
|
||||||
@ -201,11 +208,11 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
|
|||||||
if (s[r] == esc)
|
if (s[r] == esc)
|
||||||
{
|
{
|
||||||
unsigned char ch = s[r+1];
|
unsigned char ch = s[r+1];
|
||||||
if (ch < 9) // escaped 0 - 9 : replace with subexpr
|
if (c != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
|
||||||
{
|
{
|
||||||
ch *= 2;
|
ch *= 2;
|
||||||
rl = ovector[ch+1] - ovector[ch];
|
rl = ovector[ch+1] - ovector[ch];
|
||||||
debug("replace \\%d: \"%.*s\"\n", ch/2, rl, buffer(start+c+ovector[ch]));
|
debug("replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
|
||||||
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
|
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
|
||||||
r += rl - 1;
|
r += rl - 1;
|
||||||
}
|
}
|
||||||
@ -214,12 +221,12 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
|
|||||||
}
|
}
|
||||||
else if (s[r] == '&') // unescaped & : replace with match
|
else if (s[r] == '&') // unescaped & : replace with match
|
||||||
{
|
{
|
||||||
debug("replace &: \"%.*s\"\n", l, buffer(start+c+ovector[0]));
|
debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
|
||||||
s.replace(r, 1, buffer(start+c+ovector[0]), l);
|
s.replace(r, 1, buffer(start+c+ovector[0]), l);
|
||||||
r += l - 1;
|
r += l - 1;
|
||||||
}
|
}
|
||||||
else continue;
|
else continue;
|
||||||
debug("subs = \"%s\"\n", s());
|
debug("subs = \"%s\"\n", s.expand()());
|
||||||
}
|
}
|
||||||
buffer.replace(start+c+ovector[0], l, s);
|
buffer.replace(start+c+ovector[0], l, s);
|
||||||
length += s.length() - l;
|
length += s.length() - l;
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#define STREAM_MAJOR 2
|
#define STREAM_MAJOR 2
|
||||||
#define STREAM_MINOR 7
|
#define STREAM_MINOR 7
|
||||||
#define STREAM_PATCHLEVEL 12
|
#define STREAM_PATCHLEVEL 13
|
||||||
|
|
||||||
#if defined(__vxworks) || defined(vxWorks)
|
#if defined(__vxworks) || defined(vxWorks)
|
||||||
#include <vxWorks.h>
|
#include <vxWorks.h>
|
||||||
|
Reference in New Issue
Block a user