regsub converter: empty match advances by 1 byte to avoid loops

This commit is contained in:
2019-02-18 11:08:21 +01:00
parent acf7efcff2
commit 04906a5835
2 changed files with 55 additions and 39 deletions

View File

@ -666,6 +666,13 @@ Otherwise <em>precision</em> is the index (counting from 1) of the match to repl
Without <em>precision</em> (or 0), all matches are replaced. Without <em>precision</em> (or 0), all matches are replaced.
</p> </p>
<p> <p>
When replacing multiple matches, the next match is searched directly after the currently
replaced string, so that the <em>subst</em> string itself will never be modified recursively.
<span class="new">
However if an empty string is matched, searching advances by 1 character in order to
avoid matching the same empty string again.</span>
</p>
<p>
In input this converter pre-processes data received from the device before In input this converter pre-processes data received from the device before
following converters read it. following converters read it.
Converters preceding this one will read unmodified input. Converters preceding this one will read unmodified input.

View File

@ -197,54 +197,63 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, size_t start
debug("pcre_exec: no match\n"); debug("pcre_exec: no match\n");
break; break;
} }
if (!(fmt.flags & sign_flag) && n < fmt.prec) // without + flag
{
// do not yet replace this match
c += ovector[1];
continue;
}
// replace subexpressions
l = ovector[1] - ovector[0]; l = ovector[1] - ovector[0];
debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)()); // no prec: replace all matches
for (r = 1; r < rc; r++) // prec with + flag: replace first prec matches
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])()); // prec without + flag: replace only match number prec
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
s = subst; if ((fmt.flags & sign_flag) || n >= fmt.prec)
debug("subs = \"%s\"\n", s.expand()());
for (r = 0; r < (int)s.length(); r++)
{ {
debug("check \"%s\"\n", s.expand(r)()); // replace subexpressions
if (s[r] == esc) debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
for (r = 1; r < rc; r++)
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
s = subst;
debug("subs = \"%s\"\n", s.expand()());
for (r = 0; r < (int)s.length(); r++)
{ {
unsigned char ch = s[r+1]; debug("check \"%s\"\n", s.expand(r)());
debug("found escaped \\%u, in range 1-%d?\n", ch, rc-1); if (s[r] == esc)
if (ch != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
{ {
ch *= 2; unsigned char ch = s[r+1];
rl = ovector[ch+1] - ovector[ch]; debug("found escaped \\%u, in range 1-%d?\n", ch, rc-1);
debug("yes, replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)()); if (ch != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
s.replace(r, 2, buffer(start+c+ovector[ch]), rl); {
r += rl - 1; ch *= 2;
rl = ovector[ch+1] - ovector[ch];
debug("yes, replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
r += rl - 1;
}
else
{
debug("no, use literal \\%u\n", ch);
s.remove(r, 1); // just remove escape
}
} }
else else if (s[r] == '&') // unescaped & : replace with match
{ {
debug("no, use literal \\%u\n", ch); debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
s.remove(r, 1); // just remove escape s.replace(r, 1, buffer(start+c+ovector[0]), l);
r += l - 1;
} }
else continue;
debug("subs = \"%s\"\n", s.expand()());
} }
else if (s[r] == '&') // unescaped & : replace with match buffer.replace(start+c+ovector[0], l, s);
{ length -= l;
debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)()); length += s.length();
s.replace(r, 1, buffer(start+c+ovector[0]), l); c += s.length();
r += l - 1; }
} c += ovector[0];
else continue; if (l == 0)
debug("subs = \"%s\"\n", s.expand()()); {
debug("pcre_exec: empty match\n");
c++; // Empty strings may lead to an endless loop. Match them only once.
} }
buffer.replace(start+c+ovector[0], l, s);
length += s.length() - l;
c += ovector[0] + s.length();
if (n == fmt.prec) // max match reached if (n == fmt.prec) // max match reached
{ {
debug("pcre_exec: max match %d reached\n", n); debug("pcre_exec: max match %d reached\n", n);