regsub converter: empty match advances by 1 byte to avoid loops
This commit is contained in:
@ -666,6 +666,13 @@ Otherwise <em>precision</em> is the index (counting from 1) of the match to repl
|
|||||||
Without <em>precision</em> (or 0), all matches are replaced.
|
Without <em>precision</em> (or 0), all matches are replaced.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
|
When replacing multiple matches, the next match is searched directly after the currently
|
||||||
|
replaced string, so that the <em>subst</em> string itself will never be modified recursively.
|
||||||
|
<span class="new">
|
||||||
|
However if an empty string is matched, searching advances by 1 character in order to
|
||||||
|
avoid matching the same empty string again.</span>
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
In input this converter pre-processes data received from the device before
|
In input this converter pre-processes data received from the device before
|
||||||
following converters read it.
|
following converters read it.
|
||||||
Converters preceding this one will read unmodified input.
|
Converters preceding this one will read unmodified input.
|
||||||
|
@ -197,54 +197,63 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, size_t start
|
|||||||
debug("pcre_exec: no match\n");
|
debug("pcre_exec: no match\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!(fmt.flags & sign_flag) && n < fmt.prec) // without + flag
|
|
||||||
{
|
|
||||||
// do not yet replace this match
|
|
||||||
c += ovector[1];
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// replace subexpressions
|
|
||||||
l = ovector[1] - ovector[0];
|
l = ovector[1] - ovector[0];
|
||||||
debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
|
|
||||||
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
|
// no prec: replace all matches
|
||||||
for (r = 1; r < rc; r++)
|
// prec with + flag: replace first prec matches
|
||||||
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
|
// prec without + flag: replace only match number prec
|
||||||
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
|
|
||||||
s = subst;
|
if ((fmt.flags & sign_flag) || n >= fmt.prec)
|
||||||
debug("subs = \"%s\"\n", s.expand()());
|
|
||||||
for (r = 0; r < (int)s.length(); r++)
|
|
||||||
{
|
{
|
||||||
debug("check \"%s\"\n", s.expand(r)());
|
// replace subexpressions
|
||||||
if (s[r] == esc)
|
debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
|
||||||
|
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
|
||||||
|
for (r = 1; r < rc; r++)
|
||||||
|
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
|
||||||
|
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
|
||||||
|
s = subst;
|
||||||
|
debug("subs = \"%s\"\n", s.expand()());
|
||||||
|
for (r = 0; r < (int)s.length(); r++)
|
||||||
{
|
{
|
||||||
unsigned char ch = s[r+1];
|
debug("check \"%s\"\n", s.expand(r)());
|
||||||
debug("found escaped \\%u, in range 1-%d?\n", ch, rc-1);
|
if (s[r] == esc)
|
||||||
if (ch != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
|
|
||||||
{
|
{
|
||||||
ch *= 2;
|
unsigned char ch = s[r+1];
|
||||||
rl = ovector[ch+1] - ovector[ch];
|
debug("found escaped \\%u, in range 1-%d?\n", ch, rc-1);
|
||||||
debug("yes, replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
|
if (ch != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
|
||||||
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
|
{
|
||||||
r += rl - 1;
|
ch *= 2;
|
||||||
|
rl = ovector[ch+1] - ovector[ch];
|
||||||
|
debug("yes, replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
|
||||||
|
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
|
||||||
|
r += rl - 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
debug("no, use literal \\%u\n", ch);
|
||||||
|
s.remove(r, 1); // just remove escape
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (s[r] == '&') // unescaped & : replace with match
|
||||||
{
|
{
|
||||||
debug("no, use literal \\%u\n", ch);
|
debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
|
||||||
s.remove(r, 1); // just remove escape
|
s.replace(r, 1, buffer(start+c+ovector[0]), l);
|
||||||
|
r += l - 1;
|
||||||
}
|
}
|
||||||
|
else continue;
|
||||||
|
debug("subs = \"%s\"\n", s.expand()());
|
||||||
}
|
}
|
||||||
else if (s[r] == '&') // unescaped & : replace with match
|
buffer.replace(start+c+ovector[0], l, s);
|
||||||
{
|
length -= l;
|
||||||
debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
|
length += s.length();
|
||||||
s.replace(r, 1, buffer(start+c+ovector[0]), l);
|
c += s.length();
|
||||||
r += l - 1;
|
}
|
||||||
}
|
c += ovector[0];
|
||||||
else continue;
|
if (l == 0)
|
||||||
debug("subs = \"%s\"\n", s.expand()());
|
{
|
||||||
|
debug("pcre_exec: empty match\n");
|
||||||
|
c++; // Empty strings may lead to an endless loop. Match them only once.
|
||||||
}
|
}
|
||||||
buffer.replace(start+c+ovector[0], l, s);
|
|
||||||
length += s.length() - l;
|
|
||||||
c += ovector[0] + s.length();
|
|
||||||
if (n == fmt.prec) // max match reached
|
if (n == fmt.prec) // max match reached
|
||||||
{
|
{
|
||||||
debug("pcre_exec: max match %d reached\n", n);
|
debug("pcre_exec: max match %d reached\n", n);
|
||||||
|
Reference in New Issue
Block a user