change the meaning of pre for regsub slightly
This commit is contained in:
@ -367,7 +367,7 @@ endian</em>, i.e. least significant byte first.
|
||||
With the <code>0</code> flag, the value is unsigned, otherwise signed.
|
||||
</p>
|
||||
<p>
|
||||
In output, the <em>prec</em> (or sizeof(long) whatever is less) least
|
||||
In output, the <em>precision</em> (or sizeof(long) whatever is less) least
|
||||
significant bytes of the value are sign extended or zero extended
|
||||
(depending on the <code>0</code> flag) to <em>width</em> bytes.
|
||||
</p>
|
||||
@ -434,7 +434,7 @@ The <em>width</em> field is the byte number from which to start
|
||||
calculating the checksum.
|
||||
Default is 0, i.e. the first byte of the input or output of the current
|
||||
command.
|
||||
The last byte is <em>prec</em> bytes before the checksum (default 0).
|
||||
The last byte is <em>precision</em> bytes before the checksum (default 0).
|
||||
For example in <code>"abcdefg%<xor>"</code> the checksum is calculated
|
||||
from <code>abcdefg</code>,
|
||||
but in <code>"abcdefg%2.1<xor>"</code> only from <code>cdef</code>.
|
||||
@ -534,35 +534,38 @@ This input-only format matches <a target="ex"
|
||||
href="http://www.pcre.org/" >Perl compatible regular expressions (PCRE)</a>.
|
||||
It is only available if a PCRE library is installed.
|
||||
</p>
|
||||
<div class="box">
|
||||
<p>
|
||||
If PCRE is not available for your host or cross architecture, download
|
||||
the sourcecode from <a target="ex" href="http://www.pcre.org/">www.pcre.org</a>
|
||||
and try my EPICS compatible <a target="ex"
|
||||
href="http://epics.web.psi.ch/software/streamdevice/pcre/Makefile">Makefile</a>
|
||||
to compile it like a normal EPICS application.
|
||||
to compile it like a normal EPICS support module.
|
||||
The Makefile is known to work with EPICS 3.14.8 and PCRE 7.2.
|
||||
In your RELEASE file define the variable <code>PCRE</code> so that
|
||||
it points to the install location of PCRE.
|
||||
</p>
|
||||
<p>
|
||||
If PCRE is already installed on your system, use the variables
|
||||
<code>PCRE_INCLUDE</code> and <code>PCRE_LIB</code> instead to provide
|
||||
the install directories of <code>pcre.h</code> and the library.
|
||||
</p>
|
||||
<p>
|
||||
If you have PCRE installed in different locations for different (cross)
|
||||
architectures, define the variables in RELEASE.Common.<architecture>
|
||||
instead of the global RELEASE file.
|
||||
If PCRE is already installed on (some of) your systems, you may add
|
||||
architectures where PCRE can be found in standard include and library
|
||||
locations to the variable <code>WITH_SYSTEM_PCRE</code>.
|
||||
If either the header file or the library are in a non-standard place,
|
||||
set in your RELEASE file the variables <code>PCRE_INCLUDE_<em>arch</em></code>
|
||||
and/or <code>PCRE_LIB_<em>arch</em></code> for the respective architectures
|
||||
to the correct directories or set
|
||||
<code>PCRE_INCLUDE</code> and/or <code>PCRE_LIB</code>
|
||||
in architecture specific RELEASE.Common.<em>arch</em> files.
|
||||
</p>
|
||||
</div>
|
||||
<p>
|
||||
If the regular expression is not anchored, i.e. does not start with
|
||||
<code>^</code>, leading non-matching input is skipped.
|
||||
A maximum of <em>width</em> bytes is matched, if specified.
|
||||
If <em>prec</em> is given, it specifies the sub-expression whose match
|
||||
If <em>precision</em> is given, it specifies the sub-expression whose match
|
||||
is retuned.
|
||||
Otherwise the complete match is returned.
|
||||
In any case, the complete match is consumed from the input buffer.
|
||||
If the expression contains a <code>/</code> it must be escaped.
|
||||
If the expression contains a <code>/</code> it must be escaped like <code>\/</code>.
|
||||
</p>
|
||||
<p>
|
||||
Example: <code>%.1/<title>(.*)<\/title>/</code> returns
|
||||
@ -579,48 +582,63 @@ it can be used as a pre-processor for input or
|
||||
as a post-processor for output.
|
||||
</p>
|
||||
<p>
|
||||
Any match of the <em>regex</em> is replaced by the string <em>subst</em> with any
|
||||
<code>&</code> or <code>\0</code> in <em>subst</em> replaced with the match itself and any
|
||||
<code>\1</code> through <code>\9</code> with the corresponding sub-expressions.
|
||||
To get a literal <code>&</code> or <code>\</code> in the substitution write
|
||||
<code>\&</code> or <code>\\</code>.
|
||||
Matches of the <em>regex</em> are replaced by the string <em>subst</em> with all
|
||||
<code>&</code> or <code>\0</code> in <em>subst</em> replaced with the match itself and all
|
||||
<code>\1</code> through <code>\9</code> replaced with the match of the corresponding sub-expression.
|
||||
To get a literal <code>&</code> or <code>\</code> or <code>/</code> in the substitution write
|
||||
<code>\&</code> or <code>\\</code> or <code>\/</code>.
|
||||
There is no way to specify literal bytes with values less or equal to 9 in the
|
||||
substitution!
|
||||
</p>
|
||||
<p>
|
||||
If <em>width</em> is specified, it limits the number of characters processed.
|
||||
If the <code>-</code> flag is used (i.e. <em>width</em> looks like a negative number)
|
||||
only the last <em>width</em> caracters are processed, else the first.
|
||||
Without <em>width</em> all available characters are processed.
|
||||
only the last <em>width</em> characters are processed, else the first.
|
||||
Without <em>width</em> (or 0) all available characters are processed.
|
||||
</p>
|
||||
<p>
|
||||
If <em>prec</em> is specified, it limits the number of times the substitution is applied.
|
||||
Without <em>prec</em>, the substitution is applied as often as possible.
|
||||
If <em>precision</em> is specified, it indicates which matches to replace.
|
||||
With the <code>+</code> flag given, <em>precision</em> is the maximum
|
||||
number of matches to replace.
|
||||
Otherwise <em>precision</em> is the index (counting from 1) of the match to replace.
|
||||
Without <em>precision</em> (or 0), all matches are replaced.
|
||||
</p>
|
||||
<p>
|
||||
In input this converter pre-processes data received from the device before
|
||||
other converters after this one read it.
|
||||
Converters before this one will see unmodified input.
|
||||
following converters read it.
|
||||
Converters preceding this one will read unmodified input.
|
||||
Thus place this converter before those whose input should be pre-processed.
|
||||
</p>
|
||||
<p>
|
||||
In output it post-processes data already formatted by other converters before this one
|
||||
In output it post-processes data already formatted by preceding converters
|
||||
before sending it to the device.
|
||||
Converters after this one will send their output unmodified.
|
||||
Converters following this one will send their output unmodified.
|
||||
Thus place this converter after those whose output should be post-processed.
|
||||
</p>
|
||||
<p>
|
||||
Examples:<br>
|
||||
<code>%#-10.2/ab/X/</code> replaces the string <code>ab</code> with <code>X</code>
|
||||
Examples:
|
||||
<div class="indent">
|
||||
<code>%#+-10.2/ab/X/</code> replaces the string <code>ab</code> with <code>X</code>
|
||||
maximal 2 times in the last 10 characters.
|
||||
(<code>abcabcabcabc</code> becomes <code>abcXcXcabc</code>)<br>
|
||||
<code>%#/..\B/&:/</code> writes <code>:</code> after every second character
|
||||
(<code>abcabcabcabc</code> becomes <code>abcXcXcabc</code>)
|
||||
</div>
|
||||
<div class="indent">
|
||||
<code>%#/\\/\//</code> replaces all <code>\</code> with <code>/</code>
|
||||
(<code>\dir\file</code> becomes <code>/dir/file</code>)
|
||||
</div>
|
||||
<div class="indent">
|
||||
<code>%#/..\B/&:/</code> inserts <code>:</code> after every second character
|
||||
which is not at the end of a word.
|
||||
(<code>0b19353134</code> becomes <code>0b:19:35:31:34</code>)<br>
|
||||
<code>%#/://</code> removes all <code>:</code>.
|
||||
(<code>0b:19:35:31:34</code> becomes <code>0b19353134</code>)<br>
|
||||
(<code>0b19353134</code> becomes <code>0b:19:35:31:34</code>)
|
||||
</div>
|
||||
<div class="indent">
|
||||
<code>%#/://</code> removes all <code>:</code> characters.
|
||||
(<code>0b:19:35:31:34</code> becomes <code>0b19353134</code>)
|
||||
</div>
|
||||
<div class="indent">
|
||||
<code>%#/([^+-])*([+-])/\2\1/</code> moves a postfix sign to the front.
|
||||
(<code>1.23-</code> becomes <code>-1.23</code>)<br>
|
||||
|
||||
</p>
|
||||
</div>
|
||||
<a name="mantexp"></a>
|
||||
<h2>15. MantissaExponent DOUBLE converter (<code>%m</code>)</h2>
|
||||
<p>
|
||||
@ -679,7 +697,7 @@ In output, the system function <em>strftime()</em> is used to format the time.
|
||||
There may be differences in the implementation between operating systems.
|
||||
</p>
|
||||
<p>
|
||||
In input, <em>StreamDevice</em> used its own implementation because many
|
||||
In input, <em>StreamDevice</em> uses its own implementation because many
|
||||
systems are missing the <em>strptime()</em> function and additional formats
|
||||
are supported.
|
||||
</p>
|
||||
|
@ -88,6 +88,16 @@ code {
|
||||
text-align:left;
|
||||
}
|
||||
|
||||
.box {
|
||||
margin-left:1ex;
|
||||
margin-right:1ex;
|
||||
margin-top:0.5ex;
|
||||
padding: 0 1ex;
|
||||
border: 1px solid black;
|
||||
text-align:left;
|
||||
background-color:#f0f0f0;
|
||||
}
|
||||
|
||||
#navleft {
|
||||
position:fixed;
|
||||
left:0;
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "string.h"
|
||||
#include "pcre.h"
|
||||
|
||||
// Perl regular expressions (PCRE) %/regexp/
|
||||
// Perl regular expressions (PCRE) %/regexp/ and %#/regexp/subst/
|
||||
|
||||
/* Notes:
|
||||
- Memory for compiled regexp is allocated in parse but never freed.
|
||||
@ -65,15 +65,22 @@ parse(const StreamFormat& fmt, StreamBuffer& info,
|
||||
error("Missing closing '/' after %%/%s format conversion\n", pattern());
|
||||
return false;
|
||||
}
|
||||
if (*source == esc) {
|
||||
source++;
|
||||
pattern.append('\\');
|
||||
continue;
|
||||
if (*source == esc) { // handle escaped chars
|
||||
if (*++source != '/') // just un-escape /
|
||||
{
|
||||
pattern.append('\\');
|
||||
if ((*source & 0x7f) < 0x30) // handle control chars
|
||||
{
|
||||
pattern.print("x%02x", *source++);
|
||||
continue;
|
||||
}
|
||||
// fall through for PCRE codes like \B
|
||||
}
|
||||
}
|
||||
pattern.append(*source++);
|
||||
}
|
||||
source++;
|
||||
debug("regexp = \"%s\"\n", pattern());
|
||||
debug("regexp = \"%s\"\n", pattern.expand()());
|
||||
|
||||
const char* errormsg;
|
||||
int eoffset;
|
||||
@ -89,22 +96,19 @@ parse(const StreamFormat& fmt, StreamBuffer& info,
|
||||
if (fmt.flags & alt_flag)
|
||||
{
|
||||
StreamBuffer subst;
|
||||
debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()());
|
||||
while (*source != '/')
|
||||
{
|
||||
if (!*source) {
|
||||
error("Missing closing '/' after %%#/%s/%s format conversion\n", pattern(), subst());
|
||||
return false;
|
||||
}
|
||||
if (*source == esc) {
|
||||
source++;
|
||||
subst.append('\\');
|
||||
if (*source <= 9) subst.append('0'+*source++);
|
||||
continue;
|
||||
}
|
||||
if (*source == esc)
|
||||
subst.append(*source++);
|
||||
subst.append(*source++);
|
||||
}
|
||||
source++;
|
||||
debug("subst = \"%s\"\n", subst());
|
||||
debug("subst = \"%s\"\n", subst.expand()());
|
||||
info.append(subst).append('\0');
|
||||
return pseudo_format;
|
||||
}
|
||||
@ -131,7 +135,7 @@ scanString(const StreamFormat& fmt, const char* input,
|
||||
debug("pcre_exec match \"%.*s\" result = %d\n", length, input, rc);
|
||||
if ((subexpr && rc <= subexpr) || rc < 0)
|
||||
{
|
||||
/* error or no match or not enough sub-expressions */
|
||||
// error or no match or not enough sub-expressions
|
||||
return -1;
|
||||
}
|
||||
if (fmt.flags & skip_flag) return ovector[subexpr*2+1];
|
||||
@ -148,40 +152,41 @@ scanString(const StreamFormat& fmt, const char* input,
|
||||
}
|
||||
memcpy(value, input + ovector[subexpr*2], l);
|
||||
value[l] = '\0';
|
||||
return ovector[1]; /* consume input until end of match */;
|
||||
return ovector[1]; // consume input until end of match
|
||||
}
|
||||
|
||||
static void regsubst(pcre* code, StreamBuffer& buffer, long start, long length, const char* subst, int max)
|
||||
static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start)
|
||||
{
|
||||
int rc, l, c, r, rl, n=0;
|
||||
const char* subst = fmt.info;
|
||||
pcre* code = extract<pcre*>(subst);
|
||||
long length;
|
||||
int rc, l, c, r, rl, n;
|
||||
int ovector[30];
|
||||
StreamBuffer s;
|
||||
if (length == 0)
|
||||
{
|
||||
length = buffer.length() - start;
|
||||
}
|
||||
else if (length < 0)
|
||||
{
|
||||
length = -length;
|
||||
if (length > buffer.length() - start)
|
||||
length = buffer.length() - start;
|
||||
|
||||
length = buffer.length() - start;
|
||||
if (fmt.width && fmt.width < length)
|
||||
length = fmt.width;
|
||||
if (fmt.flags & sign_flag)
|
||||
start = buffer.length() - length;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (length > buffer.length() - start)
|
||||
length = buffer.length() - start;
|
||||
}
|
||||
debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\", max = %d\n",
|
||||
buffer.expand()(), start, length, subst, max);
|
||||
for (c = 0; c < length; )
|
||||
|
||||
debug("regsubst buffer=\"%s\", start=%ld, length=%ld, subst = \"%s\"\n",
|
||||
buffer.expand()(), start, length, subst);
|
||||
|
||||
for (c = 0, n = 1; c < length; n++)
|
||||
{
|
||||
rc = pcre_exec(code, NULL, buffer(start+c), length-c, 0, 0, ovector, 30);
|
||||
debug("pcre_exec match \"%.*s\" result = %d\n", (int)length-c, buffer(start+c), rc);
|
||||
if (rc < 0) // no match
|
||||
return;
|
||||
|
||||
if (rc < 0 || (max && n++ == max))
|
||||
return; /* no match or maximum substitutions reached */
|
||||
/* replace & by match in subst */
|
||||
if (!(fmt.flags & sign_flag) && n < fmt.prec) // without + flag
|
||||
{
|
||||
// do not yet replace this match
|
||||
c += ovector[1];
|
||||
continue;
|
||||
}
|
||||
// replace & by match in subst
|
||||
l = ovector[1] - ovector[0];
|
||||
debug("start = \"%s\"\n", buffer(start+c));
|
||||
debug("match = \"%.*s\"\n", l, buffer(start+c+ovector[0]));
|
||||
@ -192,22 +197,22 @@ static void regsubst(pcre* code, StreamBuffer& buffer, long start, long length,
|
||||
debug("subs = \"%s\"\n", s.expand()());
|
||||
for (r = 0; r < s.length(); r++)
|
||||
{
|
||||
debug("check \"%s\"\n", s(r));
|
||||
if (s[r] == '\\')
|
||||
debug("check \"%s\"\n", s.expand(r)());
|
||||
if (s[r] == esc)
|
||||
{
|
||||
unsigned char ch = s[r+1];
|
||||
if (ch >= '0' && ch <= '9')
|
||||
if (ch < 9) // escaped 0 - 9 : replace with subexpr
|
||||
{
|
||||
ch = (ch - '0')*2;
|
||||
ch *= 2;
|
||||
rl = ovector[ch+1] - ovector[ch];
|
||||
debug("replace \\%d: \"%.*s\"\n", ch/2, rl, buffer(start+c+ovector[ch]));
|
||||
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
|
||||
r += rl - 1;
|
||||
}
|
||||
else if (ch == '\\' || ch == '&')
|
||||
s.remove(r, 1);
|
||||
else
|
||||
s.remove(r, 1); // just remove escape
|
||||
}
|
||||
else if (s[r] == '&')
|
||||
else if (s[r] == '&') // unescaped & : replace with match
|
||||
{
|
||||
debug("replace &: \"%.*s\"\n", l, buffer(start+c+ovector[0]));
|
||||
s.replace(r, 1, buffer(start+c+ovector[0]), l);
|
||||
@ -219,6 +224,8 @@ static void regsubst(pcre* code, StreamBuffer& buffer, long start, long length,
|
||||
buffer.replace(start+c+ovector[0], l, s);
|
||||
length += s.length() - l;
|
||||
c += s.length();
|
||||
if (n == fmt.prec) // max match reached
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,15 +233,7 @@ int RegexpConverter::
|
||||
scanPseudo(const StreamFormat& fmt, StreamBuffer& input, long& cursor)
|
||||
{
|
||||
/* re-write input buffer */
|
||||
const char* info = fmt.info;
|
||||
pcre* code;
|
||||
long length;
|
||||
StreamBuffer subst;
|
||||
|
||||
code = extract<pcre*>(info);
|
||||
if (fmt.flags & left_flag) length = -fmt.width;
|
||||
else length = fmt.width;
|
||||
regsubst(code, input, cursor, length, info, fmt.prec);
|
||||
regsubst(fmt, input, cursor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -242,15 +241,7 @@ bool RegexpConverter::
|
||||
printPseudo(const StreamFormat& fmt, StreamBuffer& output)
|
||||
{
|
||||
/* re-write output buffer */
|
||||
const char* info = fmt.info;
|
||||
pcre* code;
|
||||
long length;
|
||||
StreamBuffer subst;
|
||||
|
||||
code = extract<pcre*>(info);
|
||||
if (fmt.flags & left_flag) length = -fmt.width;
|
||||
else length = fmt.width;
|
||||
regsubst(code, output, 0, length, info, fmt.prec);
|
||||
regsubst(fmt, output, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user